diff options
author | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
commit | 3fc1f1e27a5b807791d72e5d992aa33b668a6626 (patch) | |
tree | 396c2f49909c506c3ad53fd6a9bdddf6c24f7860 | |
parent | 1142d810298e694754498dbb4983fcb6cb7fd884 (diff) |
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.
With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler. Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.
stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.
The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines. According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines. cpu_stop resources are preallocated for all online
cpus and should have the same effect.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
-rw-r--r-- | arch/s390/kernel/time.c | 1 | ||||
-rw-r--r-- | drivers/xen/manage.c | 14 | ||||
-rw-r--r-- | include/linux/stop_machine.h | 20 | ||||
-rw-r--r-- | kernel/cpu.c | 8 | ||||
-rw-r--r-- | kernel/module.c | 14 | ||||
-rw-r--r-- | kernel/stop_machine.c | 158 |
6 files changed, 42 insertions, 173 deletions
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index fba6dec156bf..03d96569f187 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c | |||
@@ -390,7 +390,6 @@ static void __init time_init_wq(void) | |||
390 | if (time_sync_wq) | 390 | if (time_sync_wq) |
391 | return; | 391 | return; |
392 | time_sync_wq = create_singlethread_workqueue("timesync"); | 392 | time_sync_wq = create_singlethread_workqueue("timesync"); |
393 | stop_machine_create(); | ||
394 | } | 393 | } |
395 | 394 | ||
396 | /* | 395 | /* |
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 2ac4440e7b08..8943b8ccee1a 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
@@ -80,12 +80,6 @@ static void do_suspend(void) | |||
80 | 80 | ||
81 | shutting_down = SHUTDOWN_SUSPEND; | 81 | shutting_down = SHUTDOWN_SUSPEND; |
82 | 82 | ||
83 | err = stop_machine_create(); | ||
84 | if (err) { | ||
85 | printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err); | ||
86 | goto out; | ||
87 | } | ||
88 | |||
89 | #ifdef CONFIG_PREEMPT | 83 | #ifdef CONFIG_PREEMPT |
90 | /* If the kernel is preemptible, we need to freeze all the processes | 84 | /* If the kernel is preemptible, we need to freeze all the processes |
91 | to prevent them from being in the middle of a pagetable update | 85 | to prevent them from being in the middle of a pagetable update |
@@ -93,7 +87,7 @@ static void do_suspend(void) | |||
93 | err = freeze_processes(); | 87 | err = freeze_processes(); |
94 | if (err) { | 88 | if (err) { |
95 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); | 89 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); |
96 | goto out_destroy_sm; | 90 | goto out; |
97 | } | 91 | } |
98 | #endif | 92 | #endif |
99 | 93 | ||
@@ -136,12 +130,8 @@ out_resume: | |||
136 | out_thaw: | 130 | out_thaw: |
137 | #ifdef CONFIG_PREEMPT | 131 | #ifdef CONFIG_PREEMPT |
138 | thaw_processes(); | 132 | thaw_processes(); |
139 | |||
140 | out_destroy_sm: | ||
141 | #endif | ||
142 | stop_machine_destroy(); | ||
143 | |||
144 | out: | 133 | out: |
134 | #endif | ||
145 | shutting_down = SHUTDOWN_INVALID; | 135 | shutting_down = SHUTDOWN_INVALID; |
146 | } | 136 | } |
147 | #endif /* CONFIG_PM_SLEEP */ | 137 | #endif /* CONFIG_PM_SLEEP */ |
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index efcbd6c37947..0e552e72a4c4 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h | |||
@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); | |||
67 | */ | 67 | */ |
68 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); | 68 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); |
69 | 69 | ||
70 | /** | ||
71 | * stop_machine_create: create all stop_machine threads | ||
72 | * | ||
73 | * Description: This causes all stop_machine threads to be created before | ||
74 | * stop_machine actually gets called. This can be used by subsystems that | ||
75 | * need a non failing stop_machine infrastructure. | ||
76 | */ | ||
77 | int stop_machine_create(void); | ||
78 | |||
79 | /** | ||
80 | * stop_machine_destroy: destroy all stop_machine threads | ||
81 | * | ||
82 | * Description: This causes all stop_machine threads which were created with | ||
83 | * stop_machine_create to be destroyed again. | ||
84 | */ | ||
85 | void stop_machine_destroy(void); | ||
86 | |||
87 | #else | 70 | #else |
88 | 71 | ||
89 | static inline int stop_machine(int (*fn)(void *), void *data, | 72 | static inline int stop_machine(int (*fn)(void *), void *data, |
@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, | |||
96 | return ret; | 79 | return ret; |
97 | } | 80 | } |
98 | 81 | ||
99 | static inline int stop_machine_create(void) { return 0; } | ||
100 | static inline void stop_machine_destroy(void) { } | ||
101 | |||
102 | #endif /* CONFIG_SMP */ | 82 | #endif /* CONFIG_SMP */ |
103 | #endif /* _LINUX_STOP_MACHINE */ | 83 | #endif /* _LINUX_STOP_MACHINE */ |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 914aedcde849..545777574779 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) | |||
266 | { | 266 | { |
267 | int err; | 267 | int err; |
268 | 268 | ||
269 | err = stop_machine_create(); | ||
270 | if (err) | ||
271 | return err; | ||
272 | cpu_maps_update_begin(); | 269 | cpu_maps_update_begin(); |
273 | 270 | ||
274 | if (cpu_hotplug_disabled) { | 271 | if (cpu_hotplug_disabled) { |
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) | |||
280 | 277 | ||
281 | out: | 278 | out: |
282 | cpu_maps_update_done(); | 279 | cpu_maps_update_done(); |
283 | stop_machine_destroy(); | ||
284 | return err; | 280 | return err; |
285 | } | 281 | } |
286 | EXPORT_SYMBOL(cpu_down); | 282 | EXPORT_SYMBOL(cpu_down); |
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) | |||
361 | { | 357 | { |
362 | int cpu, first_cpu, error; | 358 | int cpu, first_cpu, error; |
363 | 359 | ||
364 | error = stop_machine_create(); | ||
365 | if (error) | ||
366 | return error; | ||
367 | cpu_maps_update_begin(); | 360 | cpu_maps_update_begin(); |
368 | first_cpu = cpumask_first(cpu_online_mask); | 361 | first_cpu = cpumask_first(cpu_online_mask); |
369 | /* | 362 | /* |
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) | |||
394 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 387 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
395 | } | 388 | } |
396 | cpu_maps_update_done(); | 389 | cpu_maps_update_done(); |
397 | stop_machine_destroy(); | ||
398 | return error; | 390 | return error; |
399 | } | 391 | } |
400 | 392 | ||
diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026a..0838246d8c94 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
723 | return -EFAULT; | 723 | return -EFAULT; |
724 | name[MODULE_NAME_LEN-1] = '\0'; | 724 | name[MODULE_NAME_LEN-1] = '\0'; |
725 | 725 | ||
726 | /* Create stop_machine threads since free_module relies on | 726 | if (mutex_lock_interruptible(&module_mutex) != 0) |
727 | * a non-failing stop_machine call. */ | 727 | return -EINTR; |
728 | ret = stop_machine_create(); | ||
729 | if (ret) | ||
730 | return ret; | ||
731 | |||
732 | if (mutex_lock_interruptible(&module_mutex) != 0) { | ||
733 | ret = -EINTR; | ||
734 | goto out_stop; | ||
735 | } | ||
736 | 728 | ||
737 | mod = find_module(name); | 729 | mod = find_module(name); |
738 | if (!mod) { | 730 | if (!mod) { |
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
792 | 784 | ||
793 | out: | 785 | out: |
794 | mutex_unlock(&module_mutex); | 786 | mutex_unlock(&module_mutex); |
795 | out_stop: | ||
796 | stop_machine_destroy(); | ||
797 | return ret; | 787 | return ret; |
798 | } | 788 | } |
799 | 789 | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 7e3f9182aef3..884c7a1afeed 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -388,174 +388,92 @@ enum stopmachine_state { | |||
388 | /* Exit */ | 388 | /* Exit */ |
389 | STOPMACHINE_EXIT, | 389 | STOPMACHINE_EXIT, |
390 | }; | 390 | }; |
391 | static enum stopmachine_state state; | ||
392 | 391 | ||
393 | struct stop_machine_data { | 392 | struct stop_machine_data { |
394 | int (*fn)(void *); | 393 | int (*fn)(void *); |
395 | void *data; | 394 | void *data; |
396 | int fnret; | 395 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
396 | unsigned int num_threads; | ||
397 | const struct cpumask *active_cpus; | ||
398 | |||
399 | enum stopmachine_state state; | ||
400 | atomic_t thread_ack; | ||
397 | }; | 401 | }; |
398 | 402 | ||
399 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 403 | static void set_state(struct stop_machine_data *smdata, |
400 | static unsigned int num_threads; | 404 | enum stopmachine_state newstate) |
401 | static atomic_t thread_ack; | ||
402 | static DEFINE_MUTEX(lock); | ||
403 | /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | ||
404 | static DEFINE_MUTEX(setup_lock); | ||
405 | /* Users of stop_machine. */ | ||
406 | static int refcount; | ||
407 | static struct workqueue_struct *stop_machine_wq; | ||
408 | static struct stop_machine_data active, idle; | ||
409 | static const struct cpumask *active_cpus; | ||
410 | static void __percpu *stop_machine_work; | ||
411 | |||
412 | static void set_state(enum stopmachine_state newstate) | ||
413 | { | 405 | { |
414 | /* Reset ack counter. */ | 406 | /* Reset ack counter. */ |
415 | atomic_set(&thread_ack, num_threads); | 407 | atomic_set(&smdata->thread_ack, smdata->num_threads); |
416 | smp_wmb(); | 408 | smp_wmb(); |
417 | state = newstate; | 409 | smdata->state = newstate; |
418 | } | 410 | } |
419 | 411 | ||
420 | /* Last one to ack a state moves to the next state. */ | 412 | /* Last one to ack a state moves to the next state. */ |
421 | static void ack_state(void) | 413 | static void ack_state(struct stop_machine_data *smdata) |
422 | { | 414 | { |
423 | if (atomic_dec_and_test(&thread_ack)) | 415 | if (atomic_dec_and_test(&smdata->thread_ack)) |
424 | set_state(state + 1); | 416 | set_state(smdata, smdata->state + 1); |
425 | } | 417 | } |
426 | 418 | ||
427 | /* This is the actual function which stops the CPU. It runs | 419 | /* This is the cpu_stop function which stops the CPU. */ |
428 | * in the context of a dedicated stopmachine workqueue. */ | 420 | static int stop_machine_cpu_stop(void *data) |
429 | static void stop_cpu(struct work_struct *unused) | ||
430 | { | 421 | { |
422 | struct stop_machine_data *smdata = data; | ||
431 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 423 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
432 | struct stop_machine_data *smdata = &idle; | 424 | int cpu = smp_processor_id(), err = 0; |
433 | int cpu = smp_processor_id(); | 425 | bool is_active; |
434 | int err; | 426 | |
427 | if (!smdata->active_cpus) | ||
428 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
429 | else | ||
430 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
435 | 431 | ||
436 | if (!active_cpus) { | ||
437 | if (cpu == cpumask_first(cpu_online_mask)) | ||
438 | smdata = &active; | ||
439 | } else { | ||
440 | if (cpumask_test_cpu(cpu, active_cpus)) | ||
441 | smdata = &active; | ||
442 | } | ||
443 | /* Simple state machine */ | 432 | /* Simple state machine */ |
444 | do { | 433 | do { |
445 | /* Chill out and ensure we re-read stopmachine_state. */ | 434 | /* Chill out and ensure we re-read stopmachine_state. */ |
446 | cpu_relax(); | 435 | cpu_relax(); |
447 | if (state != curstate) { | 436 | if (smdata->state != curstate) { |
448 | curstate = state; | 437 | curstate = smdata->state; |
449 | switch (curstate) { | 438 | switch (curstate) { |
450 | case STOPMACHINE_DISABLE_IRQ: | 439 | case STOPMACHINE_DISABLE_IRQ: |
451 | local_irq_disable(); | 440 | local_irq_disable(); |
452 | hard_irq_disable(); | 441 | hard_irq_disable(); |
453 | break; | 442 | break; |
454 | case STOPMACHINE_RUN: | 443 | case STOPMACHINE_RUN: |
455 | /* On multiple CPUs only a single error code | 444 | if (is_active) |
456 | * is needed to tell that something failed. */ | 445 | err = smdata->fn(smdata->data); |
457 | err = smdata->fn(smdata->data); | ||
458 | if (err) | ||
459 | smdata->fnret = err; | ||
460 | break; | 446 | break; |
461 | default: | 447 | default: |
462 | break; | 448 | break; |
463 | } | 449 | } |
464 | ack_state(); | 450 | ack_state(smdata); |
465 | } | 451 | } |
466 | } while (curstate != STOPMACHINE_EXIT); | 452 | } while (curstate != STOPMACHINE_EXIT); |
467 | 453 | ||
468 | local_irq_enable(); | 454 | local_irq_enable(); |
455 | return err; | ||
469 | } | 456 | } |
470 | 457 | ||
471 | /* Callback for CPUs which aren't supposed to do anything. */ | ||
472 | static int chill(void *unused) | ||
473 | { | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | int stop_machine_create(void) | ||
478 | { | ||
479 | mutex_lock(&setup_lock); | ||
480 | if (refcount) | ||
481 | goto done; | ||
482 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
483 | if (!stop_machine_wq) | ||
484 | goto err_out; | ||
485 | stop_machine_work = alloc_percpu(struct work_struct); | ||
486 | if (!stop_machine_work) | ||
487 | goto err_out; | ||
488 | done: | ||
489 | refcount++; | ||
490 | mutex_unlock(&setup_lock); | ||
491 | return 0; | ||
492 | |||
493 | err_out: | ||
494 | if (stop_machine_wq) | ||
495 | destroy_workqueue(stop_machine_wq); | ||
496 | mutex_unlock(&setup_lock); | ||
497 | return -ENOMEM; | ||
498 | } | ||
499 | EXPORT_SYMBOL_GPL(stop_machine_create); | ||
500 | |||
501 | void stop_machine_destroy(void) | ||
502 | { | ||
503 | mutex_lock(&setup_lock); | ||
504 | refcount--; | ||
505 | if (refcount) | ||
506 | goto done; | ||
507 | destroy_workqueue(stop_machine_wq); | ||
508 | free_percpu(stop_machine_work); | ||
509 | done: | ||
510 | mutex_unlock(&setup_lock); | ||
511 | } | ||
512 | EXPORT_SYMBOL_GPL(stop_machine_destroy); | ||
513 | |||
514 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 458 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
515 | { | 459 | { |
516 | struct work_struct *sm_work; | 460 | struct stop_machine_data smdata = { .fn = fn, .data = data, |
517 | int i, ret; | 461 | .num_threads = num_online_cpus(), |
518 | 462 | .active_cpus = cpus }; | |
519 | /* Set up initial state. */ | 463 | |
520 | mutex_lock(&lock); | 464 | /* Set the initial state and stop all online cpus. */ |
521 | num_threads = num_online_cpus(); | 465 | set_state(&smdata, STOPMACHINE_PREPARE); |
522 | active_cpus = cpus; | 466 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
523 | active.fn = fn; | ||
524 | active.data = data; | ||
525 | active.fnret = 0; | ||
526 | idle.fn = chill; | ||
527 | idle.data = NULL; | ||
528 | |||
529 | set_state(STOPMACHINE_PREPARE); | ||
530 | |||
531 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one | ||
532 | * doesn't hit this CPU until we're ready. */ | ||
533 | get_cpu(); | ||
534 | for_each_online_cpu(i) { | ||
535 | sm_work = per_cpu_ptr(stop_machine_work, i); | ||
536 | INIT_WORK(sm_work, stop_cpu); | ||
537 | queue_work_on(i, stop_machine_wq, sm_work); | ||
538 | } | ||
539 | /* This will release the thread on our CPU. */ | ||
540 | put_cpu(); | ||
541 | flush_workqueue(stop_machine_wq); | ||
542 | ret = active.fnret; | ||
543 | mutex_unlock(&lock); | ||
544 | return ret; | ||
545 | } | 467 | } |
546 | 468 | ||
547 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 469 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
548 | { | 470 | { |
549 | int ret; | 471 | int ret; |
550 | 472 | ||
551 | ret = stop_machine_create(); | ||
552 | if (ret) | ||
553 | return ret; | ||
554 | /* No CPUs can come up or down during this. */ | 473 | /* No CPUs can come up or down during this. */ |
555 | get_online_cpus(); | 474 | get_online_cpus(); |
556 | ret = __stop_machine(fn, data, cpus); | 475 | ret = __stop_machine(fn, data, cpus); |
557 | put_online_cpus(); | 476 | put_online_cpus(); |
558 | stop_machine_destroy(); | ||
559 | return ret; | 477 | return ret; |
560 | } | 478 | } |
561 | EXPORT_SYMBOL_GPL(stop_machine); | 479 | EXPORT_SYMBOL_GPL(stop_machine); |