diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-12-16 12:04:36 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-12-16 13:01:56 -0500 |
commit | e2912009fb7b715728311b0d8fe327a1432b3f79 (patch) | |
tree | 9918e2f9690d85b7d7f5550d09f0ae6cc3fa4e0f /kernel | |
parent | 06b83b5fbea273672822b6ee93e16781046553ec (diff) |
sched: Ensure set_task_cpu() is never called on blocked tasks
In order to clean up the set_task_cpu() rq dependencies we need
to ensure it is never called on blocked tasks because such usage
does not pair with consistent rq->lock usage.
This puts the migration burden on ttwu().
Furthermore we need to close a race against changing
->cpus_allowed, since select_task_rq() runs with only preemption
disabled.
For sched_fork() this is safe because the child isn't in the
tasklist yet, for wakeup we fix this by synchronizing
set_cpus_allowed_ptr() against TASK_WAKING, which leaves
sched_exec to be a problem
This also closes a hole in (6ad4c1888 sched: Fix balance vs
hotplug race) where ->select_task_rq() doesn't validate the
result against the sched_domain/root_domain.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
LKML-Reference: <20091216170517.807938893@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 85 |
1 files changed, 66 insertions, 19 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 1672823aabf..33d7965f63f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2018,22 +2018,15 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2018 | */ | 2018 | */ |
2019 | void kthread_bind(struct task_struct *p, unsigned int cpu) | 2019 | void kthread_bind(struct task_struct *p, unsigned int cpu) |
2020 | { | 2020 | { |
2021 | struct rq *rq = cpu_rq(cpu); | ||
2022 | unsigned long flags; | ||
2023 | |||
2024 | /* Must have done schedule() in kthread() before we set_task_cpu */ | 2021 | /* Must have done schedule() in kthread() before we set_task_cpu */ |
2025 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | 2022 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { |
2026 | WARN_ON(1); | 2023 | WARN_ON(1); |
2027 | return; | 2024 | return; |
2028 | } | 2025 | } |
2029 | 2026 | ||
2030 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2031 | update_rq_clock(rq); | ||
2032 | set_task_cpu(p, cpu); | ||
2033 | p->cpus_allowed = cpumask_of_cpu(cpu); | 2027 | p->cpus_allowed = cpumask_of_cpu(cpu); |
2034 | p->rt.nr_cpus_allowed = 1; | 2028 | p->rt.nr_cpus_allowed = 1; |
2035 | p->flags |= PF_THREAD_BOUND; | 2029 | p->flags |= PF_THREAD_BOUND; |
2036 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2037 | } | 2030 | } |
2038 | EXPORT_SYMBOL(kthread_bind); | 2031 | EXPORT_SYMBOL(kthread_bind); |
2039 | 2032 | ||
@@ -2074,6 +2067,14 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2074 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2067 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
2075 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2068 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
2076 | 2069 | ||
2070 | #ifdef CONFIG_SCHED_DEBUG | ||
2071 | /* | ||
2072 | * We should never call set_task_cpu() on a blocked task, | ||
2073 | * ttwu() will sort out the placement. | ||
2074 | */ | ||
2075 | WARN_ON(p->state != TASK_RUNNING && p->state != TASK_WAKING); | ||
2076 | #endif | ||
2077 | |||
2077 | trace_sched_migrate_task(p, new_cpu); | 2078 | trace_sched_migrate_task(p, new_cpu); |
2078 | 2079 | ||
2079 | if (old_cpu != new_cpu) { | 2080 | if (old_cpu != new_cpu) { |
@@ -2107,13 +2108,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2107 | 2108 | ||
2108 | /* | 2109 | /* |
2109 | * If the task is not on a runqueue (and not running), then | 2110 | * If the task is not on a runqueue (and not running), then |
2110 | * it is sufficient to simply update the task's cpu field. | 2111 | * the next wake-up will properly place the task. |
2111 | */ | 2112 | */ |
2112 | if (!p->se.on_rq && !task_running(rq, p)) { | 2113 | if (!p->se.on_rq && !task_running(rq, p)) |
2113 | update_rq_clock(rq); | ||
2114 | set_task_cpu(p, dest_cpu); | ||
2115 | return 0; | 2114 | return 0; |
2116 | } | ||
2117 | 2115 | ||
2118 | init_completion(&req->done); | 2116 | init_completion(&req->done); |
2119 | req->task = p; | 2117 | req->task = p; |
@@ -2319,10 +2317,42 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2319 | } | 2317 | } |
2320 | 2318 | ||
2321 | #ifdef CONFIG_SMP | 2319 | #ifdef CONFIG_SMP |
2320 | /* | ||
2321 | * Called from: | ||
2322 | * | ||
2323 | * - fork, @p is stable because it isn't on the tasklist yet | ||
2324 | * | ||
2325 | * - exec, @p is unstable XXX | ||
2326 | * | ||
2327 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2328 | * we should be good. | ||
2329 | */ | ||
2322 | static inline | 2330 | static inline |
2323 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | 2331 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2324 | { | 2332 | { |
2325 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | 2333 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2334 | |||
2335 | /* | ||
2336 | * In order not to call set_task_cpu() on a blocking task we need | ||
2337 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||
2338 | * cpu. | ||
2339 | * | ||
2340 | * Since this is common to all placement strategies, this lives here. | ||
2341 | * | ||
2342 | * [ this allows ->select_task() to simply return task_cpu(p) and | ||
2343 | * not worry about this generic constraint ] | ||
2344 | */ | ||
2345 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | ||
2346 | !cpu_active(cpu))) { | ||
2347 | |||
2348 | cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
2349 | /* | ||
2350 | * XXX: race against hot-plug modifying cpu_active_mask | ||
2351 | */ | ||
2352 | BUG_ON(cpu >= nr_cpu_ids); | ||
2353 | } | ||
2354 | |||
2355 | return cpu; | ||
2326 | } | 2356 | } |
2327 | #endif | 2357 | #endif |
2328 | 2358 | ||
@@ -7098,7 +7128,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7098 | struct rq *rq; | 7128 | struct rq *rq; |
7099 | int ret = 0; | 7129 | int ret = 0; |
7100 | 7130 | ||
7131 | /* | ||
7132 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7133 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7134 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7135 | * TASK_WAKING to avoid that. | ||
7136 | */ | ||
7137 | again: | ||
7138 | while (p->state == TASK_WAKING) | ||
7139 | cpu_relax(); | ||
7140 | |||
7101 | rq = task_rq_lock(p, &flags); | 7141 | rq = task_rq_lock(p, &flags); |
7142 | |||
7143 | if (p->state == TASK_WAKING) { | ||
7144 | task_rq_unlock(rq, &flags); | ||
7145 | goto again; | ||
7146 | } | ||
7147 | |||
7102 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7148 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7103 | ret = -EINVAL; | 7149 | ret = -EINVAL; |
7104 | goto out; | 7150 | goto out; |
@@ -7154,7 +7200,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
7154 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 7200 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
7155 | { | 7201 | { |
7156 | struct rq *rq_dest, *rq_src; | 7202 | struct rq *rq_dest, *rq_src; |
7157 | int ret = 0, on_rq; | 7203 | int ret = 0; |
7158 | 7204 | ||
7159 | if (unlikely(!cpu_active(dest_cpu))) | 7205 | if (unlikely(!cpu_active(dest_cpu))) |
7160 | return ret; | 7206 | return ret; |
@@ -7170,12 +7216,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7170 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7216 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7171 | goto fail; | 7217 | goto fail; |
7172 | 7218 | ||
7173 | on_rq = p->se.on_rq; | 7219 | /* |
7174 | if (on_rq) | 7220 | * If we're not on a rq, the next wake-up will ensure we're |
7221 | * placed properly. | ||
7222 | */ | ||
7223 | if (p->se.on_rq) { | ||
7175 | deactivate_task(rq_src, p, 0); | 7224 | deactivate_task(rq_src, p, 0); |
7176 | 7225 | set_task_cpu(p, dest_cpu); | |
7177 | set_task_cpu(p, dest_cpu); | ||
7178 | if (on_rq) { | ||
7179 | activate_task(rq_dest, p, 0); | 7226 | activate_task(rq_dest, p, 0); |
7180 | check_preempt_curr(rq_dest, p, 0); | 7227 | check_preempt_curr(rq_dest, p, 0); |
7181 | } | 7228 | } |