diff options
| author | Oleg Nesterov <oleg@redhat.com> | 2010-03-15 05:10:23 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-04-02 14:12:03 -0400 |
| commit | 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 (patch) | |
| tree | 516130eedf782dd14505bd111e06bcfad9923b07 | |
| parent | 30da688ef6b76e01969b00608202fff1eed2accc (diff) | |
sched: _cpu_down(): Don't play with current->cpus_allowed
_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.
_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091023.GA9148@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | include/linux/sched.h | 1 | ||||
| -rw-r--r-- | kernel/cpu.c | 18 | ||||
| -rw-r--r-- | kernel/sched.c | 2 |
3 files changed, 8 insertions, 13 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 43c945152732..8bea40725c76 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void); | |||
| 1843 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); | 1843 | extern void sched_clock_idle_wakeup_event(u64 delta_ns); |
| 1844 | 1844 | ||
| 1845 | #ifdef CONFIG_HOTPLUG_CPU | 1845 | #ifdef CONFIG_HOTPLUG_CPU |
| 1846 | extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p); | ||
| 1846 | extern void idle_task_exit(void); | 1847 | extern void idle_task_exit(void); |
| 1847 | #else | 1848 | #else |
| 1848 | static inline void idle_task_exit(void) {} | 1849 | static inline void idle_task_exit(void) {} |
diff --git a/kernel/cpu.c b/kernel/cpu.c index f8cced2692b3..8d340faac380 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu) | |||
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | struct take_cpu_down_param { | 165 | struct take_cpu_down_param { |
| 166 | struct task_struct *caller; | ||
| 166 | unsigned long mod; | 167 | unsigned long mod; |
| 167 | void *hcpu; | 168 | void *hcpu; |
| 168 | }; | 169 | }; |
| @@ -171,6 +172,7 @@ struct take_cpu_down_param { | |||
| 171 | static int __ref take_cpu_down(void *_param) | 172 | static int __ref take_cpu_down(void *_param) |
| 172 | { | 173 | { |
| 173 | struct take_cpu_down_param *param = _param; | 174 | struct take_cpu_down_param *param = _param; |
| 175 | unsigned int cpu = (unsigned long)param->hcpu; | ||
| 174 | int err; | 176 | int err; |
| 175 | 177 | ||
| 176 | /* Ensure this CPU doesn't handle any more interrupts. */ | 178 | /* Ensure this CPU doesn't handle any more interrupts. */ |
| @@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param) | |||
| 181 | raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, | 183 | raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod, |
| 182 | param->hcpu); | 184 | param->hcpu); |
| 183 | 185 | ||
| 186 | if (task_cpu(param->caller) == cpu) | ||
| 187 | move_task_off_dead_cpu(cpu, param->caller); | ||
| 184 | /* Force idle task to run as soon as we yield: it should | 188 | /* Force idle task to run as soon as we yield: it should |
| 185 | immediately notice cpu is offline and die quickly. */ | 189 | immediately notice cpu is offline and die quickly. */ |
| 186 | sched_idle_next(); | 190 | sched_idle_next(); |
| @@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param) | |||
| 191 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | 195 | static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) |
| 192 | { | 196 | { |
| 193 | int err, nr_calls = 0; | 197 | int err, nr_calls = 0; |
| 194 | cpumask_var_t old_allowed; | ||
| 195 | void *hcpu = (void *)(long)cpu; | 198 | void *hcpu = (void *)(long)cpu; |
| 196 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; | 199 | unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; |
| 197 | struct take_cpu_down_param tcd_param = { | 200 | struct take_cpu_down_param tcd_param = { |
| 201 | .caller = current, | ||
| 198 | .mod = mod, | 202 | .mod = mod, |
| 199 | .hcpu = hcpu, | 203 | .hcpu = hcpu, |
| 200 | }; | 204 | }; |
| @@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 205 | if (!cpu_online(cpu)) | 209 | if (!cpu_online(cpu)) |
| 206 | return -EINVAL; | 210 | return -EINVAL; |
| 207 | 211 | ||
| 208 | if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL)) | ||
| 209 | return -ENOMEM; | ||
| 210 | |||
| 211 | cpu_hotplug_begin(); | 212 | cpu_hotplug_begin(); |
| 212 | set_cpu_active(cpu, false); | 213 | set_cpu_active(cpu, false); |
| 213 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, | 214 | err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod, |
| @@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 224 | goto out_release; | 225 | goto out_release; |
| 225 | } | 226 | } |
| 226 | 227 | ||
| 227 | /* Ensure that we are not runnable on dying cpu */ | ||
| 228 | cpumask_copy(old_allowed, ¤t->cpus_allowed); | ||
| 229 | set_cpus_allowed_ptr(current, cpu_active_mask); | ||
| 230 | |||
| 231 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); | 228 | err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); |
| 232 | if (err) { | 229 | if (err) { |
| 233 | set_cpu_active(cpu, true); | 230 | set_cpu_active(cpu, true); |
| @@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 236 | hcpu) == NOTIFY_BAD) | 233 | hcpu) == NOTIFY_BAD) |
| 237 | BUG(); | 234 | BUG(); |
| 238 | 235 | ||
| 239 | goto out_allowed; | 236 | goto out_release; |
| 240 | } | 237 | } |
| 241 | BUG_ON(cpu_online(cpu)); | 238 | BUG_ON(cpu_online(cpu)); |
| 242 | 239 | ||
| @@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
| 254 | 251 | ||
| 255 | check_for_tasks(cpu); | 252 | check_for_tasks(cpu); |
| 256 | 253 | ||
| 257 | out_allowed: | ||
| 258 | set_cpus_allowed_ptr(current, old_allowed); | ||
| 259 | out_release: | 254 | out_release: |
| 260 | cpu_hotplug_done(); | 255 | cpu_hotplug_done(); |
| 261 | if (!err) { | 256 | if (!err) { |
| @@ -263,7 +258,6 @@ out_release: | |||
| 263 | hcpu) == NOTIFY_BAD) | 258 | hcpu) == NOTIFY_BAD) |
| 264 | BUG(); | 259 | BUG(); |
| 265 | } | 260 | } |
| 266 | free_cpumask_var(old_allowed); | ||
| 267 | return err; | 261 | return err; |
| 268 | } | 262 | } |
| 269 | 263 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 165b532dd8c2..11119deffa48 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -5442,7 +5442,7 @@ static int migration_thread(void *data) | |||
| 5442 | /* | 5442 | /* |
| 5443 | * Figure out where task on dead CPU should go, use force if necessary. | 5443 | * Figure out where task on dead CPU should go, use force if necessary. |
| 5444 | */ | 5444 | */ |
| 5445 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 5445 | void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
| 5446 | { | 5446 | { |
| 5447 | struct rq *rq = cpu_rq(dead_cpu); | 5447 | struct rq *rq = cpu_rq(dead_cpu); |
| 5448 | int needs_cpu, uninitialized_var(dest_cpu); | 5448 | int needs_cpu, uninitialized_var(dest_cpu); |
