diff options
author | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
commit | 3fc1f1e27a5b807791d72e5d992aa33b668a6626 (patch) | |
tree | 396c2f49909c506c3ad53fd6a9bdddf6c24f7860 /kernel | |
parent | 1142d810298e694754498dbb4983fcb6cb7fd884 (diff) |
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.
With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler. Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.
stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.
The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines. According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines. cpu_stop resources are preallocated for all online
cpus and should have the same effect.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 8 | ||||
-rw-r--r-- | kernel/module.c | 14 | ||||
-rw-r--r-- | kernel/stop_machine.c | 158 |
3 files changed, 40 insertions, 140 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 914aedcde849..545777574779 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) | |||
266 | { | 266 | { |
267 | int err; | 267 | int err; |
268 | 268 | ||
269 | err = stop_machine_create(); | ||
270 | if (err) | ||
271 | return err; | ||
272 | cpu_maps_update_begin(); | 269 | cpu_maps_update_begin(); |
273 | 270 | ||
274 | if (cpu_hotplug_disabled) { | 271 | if (cpu_hotplug_disabled) { |
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) | |||
280 | 277 | ||
281 | out: | 278 | out: |
282 | cpu_maps_update_done(); | 279 | cpu_maps_update_done(); |
283 | stop_machine_destroy(); | ||
284 | return err; | 280 | return err; |
285 | } | 281 | } |
286 | EXPORT_SYMBOL(cpu_down); | 282 | EXPORT_SYMBOL(cpu_down); |
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) | |||
361 | { | 357 | { |
362 | int cpu, first_cpu, error; | 358 | int cpu, first_cpu, error; |
363 | 359 | ||
364 | error = stop_machine_create(); | ||
365 | if (error) | ||
366 | return error; | ||
367 | cpu_maps_update_begin(); | 360 | cpu_maps_update_begin(); |
368 | first_cpu = cpumask_first(cpu_online_mask); | 361 | first_cpu = cpumask_first(cpu_online_mask); |
369 | /* | 362 | /* |
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) | |||
394 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 387 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
395 | } | 388 | } |
396 | cpu_maps_update_done(); | 389 | cpu_maps_update_done(); |
397 | stop_machine_destroy(); | ||
398 | return error; | 390 | return error; |
399 | } | 391 | } |
400 | 392 | ||
diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026a..0838246d8c94 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
723 | return -EFAULT; | 723 | return -EFAULT; |
724 | name[MODULE_NAME_LEN-1] = '\0'; | 724 | name[MODULE_NAME_LEN-1] = '\0'; |
725 | 725 | ||
726 | /* Create stop_machine threads since free_module relies on | 726 | if (mutex_lock_interruptible(&module_mutex) != 0) |
727 | * a non-failing stop_machine call. */ | 727 | return -EINTR; |
728 | ret = stop_machine_create(); | ||
729 | if (ret) | ||
730 | return ret; | ||
731 | |||
732 | if (mutex_lock_interruptible(&module_mutex) != 0) { | ||
733 | ret = -EINTR; | ||
734 | goto out_stop; | ||
735 | } | ||
736 | 728 | ||
737 | mod = find_module(name); | 729 | mod = find_module(name); |
738 | if (!mod) { | 730 | if (!mod) { |
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
792 | 784 | ||
793 | out: | 785 | out: |
794 | mutex_unlock(&module_mutex); | 786 | mutex_unlock(&module_mutex); |
795 | out_stop: | ||
796 | stop_machine_destroy(); | ||
797 | return ret; | 787 | return ret; |
798 | } | 788 | } |
799 | 789 | ||
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 7e3f9182aef3..884c7a1afeed 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -388,174 +388,92 @@ enum stopmachine_state { | |||
388 | /* Exit */ | 388 | /* Exit */ |
389 | STOPMACHINE_EXIT, | 389 | STOPMACHINE_EXIT, |
390 | }; | 390 | }; |
391 | static enum stopmachine_state state; | ||
392 | 391 | ||
393 | struct stop_machine_data { | 392 | struct stop_machine_data { |
394 | int (*fn)(void *); | 393 | int (*fn)(void *); |
395 | void *data; | 394 | void *data; |
396 | int fnret; | 395 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
396 | unsigned int num_threads; | ||
397 | const struct cpumask *active_cpus; | ||
398 | |||
399 | enum stopmachine_state state; | ||
400 | atomic_t thread_ack; | ||
397 | }; | 401 | }; |
398 | 402 | ||
399 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 403 | static void set_state(struct stop_machine_data *smdata, |
400 | static unsigned int num_threads; | 404 | enum stopmachine_state newstate) |
401 | static atomic_t thread_ack; | ||
402 | static DEFINE_MUTEX(lock); | ||
403 | /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | ||
404 | static DEFINE_MUTEX(setup_lock); | ||
405 | /* Users of stop_machine. */ | ||
406 | static int refcount; | ||
407 | static struct workqueue_struct *stop_machine_wq; | ||
408 | static struct stop_machine_data active, idle; | ||
409 | static const struct cpumask *active_cpus; | ||
410 | static void __percpu *stop_machine_work; | ||
411 | |||
412 | static void set_state(enum stopmachine_state newstate) | ||
413 | { | 405 | { |
414 | /* Reset ack counter. */ | 406 | /* Reset ack counter. */ |
415 | atomic_set(&thread_ack, num_threads); | 407 | atomic_set(&smdata->thread_ack, smdata->num_threads); |
416 | smp_wmb(); | 408 | smp_wmb(); |
417 | state = newstate; | 409 | smdata->state = newstate; |
418 | } | 410 | } |
419 | 411 | ||
420 | /* Last one to ack a state moves to the next state. */ | 412 | /* Last one to ack a state moves to the next state. */ |
421 | static void ack_state(void) | 413 | static void ack_state(struct stop_machine_data *smdata) |
422 | { | 414 | { |
423 | if (atomic_dec_and_test(&thread_ack)) | 415 | if (atomic_dec_and_test(&smdata->thread_ack)) |
424 | set_state(state + 1); | 416 | set_state(smdata, smdata->state + 1); |
425 | } | 417 | } |
426 | 418 | ||
427 | /* This is the actual function which stops the CPU. It runs | 419 | /* This is the cpu_stop function which stops the CPU. */ |
428 | * in the context of a dedicated stopmachine workqueue. */ | 420 | static int stop_machine_cpu_stop(void *data) |
429 | static void stop_cpu(struct work_struct *unused) | ||
430 | { | 421 | { |
422 | struct stop_machine_data *smdata = data; | ||
431 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 423 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
432 | struct stop_machine_data *smdata = &idle; | 424 | int cpu = smp_processor_id(), err = 0; |
433 | int cpu = smp_processor_id(); | 425 | bool is_active; |
434 | int err; | 426 | |
427 | if (!smdata->active_cpus) | ||
428 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
429 | else | ||
430 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
435 | 431 | ||
436 | if (!active_cpus) { | ||
437 | if (cpu == cpumask_first(cpu_online_mask)) | ||
438 | smdata = &active; | ||
439 | } else { | ||
440 | if (cpumask_test_cpu(cpu, active_cpus)) | ||
441 | smdata = &active; | ||
442 | } | ||
443 | /* Simple state machine */ | 432 | /* Simple state machine */ |
444 | do { | 433 | do { |
445 | /* Chill out and ensure we re-read stopmachine_state. */ | 434 | /* Chill out and ensure we re-read stopmachine_state. */ |
446 | cpu_relax(); | 435 | cpu_relax(); |
447 | if (state != curstate) { | 436 | if (smdata->state != curstate) { |
448 | curstate = state; | 437 | curstate = smdata->state; |
449 | switch (curstate) { | 438 | switch (curstate) { |
450 | case STOPMACHINE_DISABLE_IRQ: | 439 | case STOPMACHINE_DISABLE_IRQ: |
451 | local_irq_disable(); | 440 | local_irq_disable(); |
452 | hard_irq_disable(); | 441 | hard_irq_disable(); |
453 | break; | 442 | break; |
454 | case STOPMACHINE_RUN: | 443 | case STOPMACHINE_RUN: |
455 | /* On multiple CPUs only a single error code | 444 | if (is_active) |
456 | * is needed to tell that something failed. */ | 445 | err = smdata->fn(smdata->data); |
457 | err = smdata->fn(smdata->data); | ||
458 | if (err) | ||
459 | smdata->fnret = err; | ||
460 | break; | 446 | break; |
461 | default: | 447 | default: |
462 | break; | 448 | break; |
463 | } | 449 | } |
464 | ack_state(); | 450 | ack_state(smdata); |
465 | } | 451 | } |
466 | } while (curstate != STOPMACHINE_EXIT); | 452 | } while (curstate != STOPMACHINE_EXIT); |
467 | 453 | ||
468 | local_irq_enable(); | 454 | local_irq_enable(); |
455 | return err; | ||
469 | } | 456 | } |
470 | 457 | ||
471 | /* Callback for CPUs which aren't supposed to do anything. */ | ||
472 | static int chill(void *unused) | ||
473 | { | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | int stop_machine_create(void) | ||
478 | { | ||
479 | mutex_lock(&setup_lock); | ||
480 | if (refcount) | ||
481 | goto done; | ||
482 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
483 | if (!stop_machine_wq) | ||
484 | goto err_out; | ||
485 | stop_machine_work = alloc_percpu(struct work_struct); | ||
486 | if (!stop_machine_work) | ||
487 | goto err_out; | ||
488 | done: | ||
489 | refcount++; | ||
490 | mutex_unlock(&setup_lock); | ||
491 | return 0; | ||
492 | |||
493 | err_out: | ||
494 | if (stop_machine_wq) | ||
495 | destroy_workqueue(stop_machine_wq); | ||
496 | mutex_unlock(&setup_lock); | ||
497 | return -ENOMEM; | ||
498 | } | ||
499 | EXPORT_SYMBOL_GPL(stop_machine_create); | ||
500 | |||
501 | void stop_machine_destroy(void) | ||
502 | { | ||
503 | mutex_lock(&setup_lock); | ||
504 | refcount--; | ||
505 | if (refcount) | ||
506 | goto done; | ||
507 | destroy_workqueue(stop_machine_wq); | ||
508 | free_percpu(stop_machine_work); | ||
509 | done: | ||
510 | mutex_unlock(&setup_lock); | ||
511 | } | ||
512 | EXPORT_SYMBOL_GPL(stop_machine_destroy); | ||
513 | |||
514 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 458 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
515 | { | 459 | { |
516 | struct work_struct *sm_work; | 460 | struct stop_machine_data smdata = { .fn = fn, .data = data, |
517 | int i, ret; | 461 | .num_threads = num_online_cpus(), |
518 | 462 | .active_cpus = cpus }; | |
519 | /* Set up initial state. */ | 463 | |
520 | mutex_lock(&lock); | 464 | /* Set the initial state and stop all online cpus. */ |
521 | num_threads = num_online_cpus(); | 465 | set_state(&smdata, STOPMACHINE_PREPARE); |
522 | active_cpus = cpus; | 466 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
523 | active.fn = fn; | ||
524 | active.data = data; | ||
525 | active.fnret = 0; | ||
526 | idle.fn = chill; | ||
527 | idle.data = NULL; | ||
528 | |||
529 | set_state(STOPMACHINE_PREPARE); | ||
530 | |||
531 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one | ||
532 | * doesn't hit this CPU until we're ready. */ | ||
533 | get_cpu(); | ||
534 | for_each_online_cpu(i) { | ||
535 | sm_work = per_cpu_ptr(stop_machine_work, i); | ||
536 | INIT_WORK(sm_work, stop_cpu); | ||
537 | queue_work_on(i, stop_machine_wq, sm_work); | ||
538 | } | ||
539 | /* This will release the thread on our CPU. */ | ||
540 | put_cpu(); | ||
541 | flush_workqueue(stop_machine_wq); | ||
542 | ret = active.fnret; | ||
543 | mutex_unlock(&lock); | ||
544 | return ret; | ||
545 | } | 467 | } |
546 | 468 | ||
547 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 469 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
548 | { | 470 | { |
549 | int ret; | 471 | int ret; |
550 | 472 | ||
551 | ret = stop_machine_create(); | ||
552 | if (ret) | ||
553 | return ret; | ||
554 | /* No CPUs can come up or down during this. */ | 473 | /* No CPUs can come up or down during this. */ |
555 | get_online_cpus(); | 474 | get_online_cpus(); |
556 | ret = __stop_machine(fn, data, cpus); | 475 | ret = __stop_machine(fn, data, cpus); |
557 | put_online_cpus(); | 476 | put_online_cpus(); |
558 | stop_machine_destroy(); | ||
559 | return ret; | 477 | return ret; |
560 | } | 478 | } |
561 | EXPORT_SYMBOL_GPL(stop_machine); | 479 | EXPORT_SYMBOL_GPL(stop_machine); |