diff options
Diffstat (limited to 'kernel/smpboot.c')
-rw-r--r-- | kernel/smpboot.c | 173 |
1 files changed, 166 insertions, 7 deletions
diff --git a/kernel/smpboot.c b/kernel/smpboot.c index eb89e1807408..c697f73d82d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
6 | #include <linux/smp.h> | 6 | #include <linux/smp.h> |
7 | #include <linux/delay.h> | ||
7 | #include <linux/init.h> | 8 | #include <linux/init.h> |
8 | #include <linux/list.h> | 9 | #include <linux/list.h> |
9 | #include <linux/slab.h> | 10 | #include <linux/slab.h> |
@@ -110,7 +111,7 @@ static int smpboot_thread_fn(void *data) | |||
110 | set_current_state(TASK_INTERRUPTIBLE); | 111 | set_current_state(TASK_INTERRUPTIBLE); |
111 | preempt_disable(); | 112 | preempt_disable(); |
112 | if (kthread_should_stop()) { | 113 | if (kthread_should_stop()) { |
113 | set_current_state(TASK_RUNNING); | 114 | __set_current_state(TASK_RUNNING); |
114 | preempt_enable(); | 115 | preempt_enable(); |
115 | if (ht->cleanup) | 116 | if (ht->cleanup) |
116 | ht->cleanup(td->cpu, cpu_online(td->cpu)); | 117 | ht->cleanup(td->cpu, cpu_online(td->cpu)); |
@@ -136,26 +137,27 @@ static int smpboot_thread_fn(void *data) | |||
136 | /* Check for state change setup */ | 137 | /* Check for state change setup */ |
137 | switch (td->status) { | 138 | switch (td->status) { |
138 | case HP_THREAD_NONE: | 139 | case HP_THREAD_NONE: |
140 | __set_current_state(TASK_RUNNING); | ||
139 | preempt_enable(); | 141 | preempt_enable(); |
140 | if (ht->setup) | 142 | if (ht->setup) |
141 | ht->setup(td->cpu); | 143 | ht->setup(td->cpu); |
142 | td->status = HP_THREAD_ACTIVE; | 144 | td->status = HP_THREAD_ACTIVE; |
143 | preempt_disable(); | 145 | continue; |
144 | break; | 146 | |
145 | case HP_THREAD_PARKED: | 147 | case HP_THREAD_PARKED: |
148 | __set_current_state(TASK_RUNNING); | ||
146 | preempt_enable(); | 149 | preempt_enable(); |
147 | if (ht->unpark) | 150 | if (ht->unpark) |
148 | ht->unpark(td->cpu); | 151 | ht->unpark(td->cpu); |
149 | td->status = HP_THREAD_ACTIVE; | 152 | td->status = HP_THREAD_ACTIVE; |
150 | preempt_disable(); | 153 | continue; |
151 | break; | ||
152 | } | 154 | } |
153 | 155 | ||
154 | if (!ht->thread_should_run(td->cpu)) { | 156 | if (!ht->thread_should_run(td->cpu)) { |
155 | preempt_enable(); | 157 | preempt_enable_no_resched(); |
156 | schedule(); | 158 | schedule(); |
157 | } else { | 159 | } else { |
158 | set_current_state(TASK_RUNNING); | 160 | __set_current_state(TASK_RUNNING); |
159 | preempt_enable(); | 161 | preempt_enable(); |
160 | ht->thread_fn(td->cpu); | 162 | ht->thread_fn(td->cpu); |
161 | } | 163 | } |
@@ -279,6 +281,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) | |||
279 | unsigned int cpu; | 281 | unsigned int cpu; |
280 | int ret = 0; | 282 | int ret = 0; |
281 | 283 | ||
284 | get_online_cpus(); | ||
282 | mutex_lock(&smpboot_threads_lock); | 285 | mutex_lock(&smpboot_threads_lock); |
283 | for_each_online_cpu(cpu) { | 286 | for_each_online_cpu(cpu) { |
284 | ret = __smpboot_create_thread(plug_thread, cpu); | 287 | ret = __smpboot_create_thread(plug_thread, cpu); |
@@ -291,6 +294,7 @@ int smpboot_register_percpu_thread(struct smp_hotplug_thread *plug_thread) | |||
291 | list_add(&plug_thread->list, &hotplug_threads); | 294 | list_add(&plug_thread->list, &hotplug_threads); |
292 | out: | 295 | out: |
293 | mutex_unlock(&smpboot_threads_lock); | 296 | mutex_unlock(&smpboot_threads_lock); |
297 | put_online_cpus(); | ||
294 | return ret; | 298 | return ret; |
295 | } | 299 | } |
296 | EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); | 300 | EXPORT_SYMBOL_GPL(smpboot_register_percpu_thread); |
@@ -311,3 +315,158 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread) | |||
311 | put_online_cpus(); | 315 | put_online_cpus(); |
312 | } | 316 | } |
313 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); | 317 | EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread); |
318 | |||
319 | static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD); | ||
320 | |||
321 | /* | ||
322 | * Called to poll specified CPU's state, for example, when waiting for | ||
323 | * a CPU to come online. | ||
324 | */ | ||
325 | int cpu_report_state(int cpu) | ||
326 | { | ||
327 | return atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
328 | } | ||
329 | |||
330 | /* | ||
331 | * If CPU has died properly, set its state to CPU_UP_PREPARE and | ||
332 | * return success. Otherwise, return -EBUSY if the CPU died after | ||
333 | * cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN | ||
334 | * if cpu_wait_death() timed out and the CPU still hasn't gotten around | ||
335 | * to dying. In the latter two cases, the CPU might not be set up | ||
336 | * properly, but it is up to the arch-specific code to decide. | ||
337 | * Finally, -EIO indicates an unanticipated problem. | ||
338 | * | ||
339 | * Note that it is permissible to omit this call entirely, as is | ||
340 | * done in architectures that do no CPU-hotplug error checking. | ||
341 | */ | ||
342 | int cpu_check_up_prepare(int cpu) | ||
343 | { | ||
344 | if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) { | ||
345 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | ||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) { | ||
350 | |||
351 | case CPU_POST_DEAD: | ||
352 | |||
353 | /* The CPU died properly, so just start it up again. */ | ||
354 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE); | ||
355 | return 0; | ||
356 | |||
357 | case CPU_DEAD_FROZEN: | ||
358 | |||
359 | /* | ||
360 | * Timeout during CPU death, so let caller know. | ||
361 | * The outgoing CPU completed its processing, but after | ||
362 | * cpu_wait_death() timed out and reported the error. The | ||
363 | * caller is free to proceed, in which case the state | ||
364 | * will be reset properly by cpu_set_state_online(). | ||
365 | * Proceeding despite this -EBUSY return makes sense | ||
366 | * for systems where the outgoing CPUs take themselves | ||
367 | * offline, with no post-death manipulation required from | ||
368 | * a surviving CPU. | ||
369 | */ | ||
370 | return -EBUSY; | ||
371 | |||
372 | case CPU_BROKEN: | ||
373 | |||
374 | /* | ||
375 | * The most likely reason we got here is that there was | ||
376 | * a timeout during CPU death, and the outgoing CPU never | ||
377 | * did complete its processing. This could happen on | ||
378 | * a virtualized system if the outgoing VCPU gets preempted | ||
379 | * for more than five seconds, and the user attempts to | ||
380 | * immediately online that same CPU. Trying again later | ||
381 | * might return -EBUSY above, hence -EAGAIN. | ||
382 | */ | ||
383 | return -EAGAIN; | ||
384 | |||
385 | default: | ||
386 | |||
387 | /* Should not happen. Famous last words. */ | ||
388 | return -EIO; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Mark the specified CPU online. | ||
394 | * | ||
395 | * Note that it is permissible to omit this call entirely, as is | ||
396 | * done in architectures that do no CPU-hotplug error checking. | ||
397 | */ | ||
398 | void cpu_set_state_online(int cpu) | ||
399 | { | ||
400 | (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE); | ||
401 | } | ||
402 | |||
403 | #ifdef CONFIG_HOTPLUG_CPU | ||
404 | |||
405 | /* | ||
406 | * Wait for the specified CPU to exit the idle loop and die. | ||
407 | */ | ||
408 | bool cpu_wait_death(unsigned int cpu, int seconds) | ||
409 | { | ||
410 | int jf_left = seconds * HZ; | ||
411 | int oldstate; | ||
412 | bool ret = true; | ||
413 | int sleep_jf = 1; | ||
414 | |||
415 | might_sleep(); | ||
416 | |||
417 | /* The outgoing CPU will normally get done quite quickly. */ | ||
418 | if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD) | ||
419 | goto update_state; | ||
420 | udelay(5); | ||
421 | |||
422 | /* But if the outgoing CPU dawdles, wait increasingly long times. */ | ||
423 | while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) { | ||
424 | schedule_timeout_uninterruptible(sleep_jf); | ||
425 | jf_left -= sleep_jf; | ||
426 | if (jf_left <= 0) | ||
427 | break; | ||
428 | sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10); | ||
429 | } | ||
430 | update_state: | ||
431 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
432 | if (oldstate == CPU_DEAD) { | ||
433 | /* Outgoing CPU died normally, update state. */ | ||
434 | smp_mb(); /* atomic_read() before update. */ | ||
435 | atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD); | ||
436 | } else { | ||
437 | /* Outgoing CPU still hasn't died, set state accordingly. */ | ||
438 | if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | ||
439 | oldstate, CPU_BROKEN) != oldstate) | ||
440 | goto update_state; | ||
441 | ret = false; | ||
442 | } | ||
443 | return ret; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Called by the outgoing CPU to report its successful death. Return | ||
448 | * false if this report follows the surviving CPU's timing out. | ||
449 | * | ||
450 | * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU | ||
451 | * timed out. This approach allows architectures to omit calls to | ||
452 | * cpu_check_up_prepare() and cpu_set_state_online() without defeating | ||
453 | * the next cpu_wait_death()'s polling loop. | ||
454 | */ | ||
455 | bool cpu_report_death(void) | ||
456 | { | ||
457 | int oldstate; | ||
458 | int newstate; | ||
459 | int cpu = smp_processor_id(); | ||
460 | |||
461 | do { | ||
462 | oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu)); | ||
463 | if (oldstate != CPU_BROKEN) | ||
464 | newstate = CPU_DEAD; | ||
465 | else | ||
466 | newstate = CPU_DEAD_FROZEN; | ||
467 | } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu), | ||
468 | oldstate, newstate) != oldstate); | ||
469 | return newstate == CPU_DEAD; | ||
470 | } | ||
471 | |||
472 | #endif /* #ifdef CONFIG_HOTPLUG_CPU */ | ||