diff options
| author | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:20 -0400 |
| commit | 3fc1f1e27a5b807791d72e5d992aa33b668a6626 (patch) | |
| tree | 396c2f49909c506c3ad53fd6a9bdddf6c24f7860 /kernel/stop_machine.c | |
| parent | 1142d810298e694754498dbb4983fcb6cb7fd884 (diff) | |
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.
With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler. Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.
stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.
The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines. According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines. cpu_stop resources are preallocated for all online
cpus and should have the same effect.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Diffstat (limited to 'kernel/stop_machine.c')
| -rw-r--r-- | kernel/stop_machine.c | 158 |
1 files changed, 38 insertions, 120 deletions
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 7e3f9182aef3..884c7a1afeed 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -388,174 +388,92 @@ enum stopmachine_state { | |||
| 388 | /* Exit */ | 388 | /* Exit */ |
| 389 | STOPMACHINE_EXIT, | 389 | STOPMACHINE_EXIT, |
| 390 | }; | 390 | }; |
| 391 | static enum stopmachine_state state; | ||
| 392 | 391 | ||
| 393 | struct stop_machine_data { | 392 | struct stop_machine_data { |
| 394 | int (*fn)(void *); | 393 | int (*fn)(void *); |
| 395 | void *data; | 394 | void *data; |
| 396 | int fnret; | 395 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
| 396 | unsigned int num_threads; | ||
| 397 | const struct cpumask *active_cpus; | ||
| 398 | |||
| 399 | enum stopmachine_state state; | ||
| 400 | atomic_t thread_ack; | ||
| 397 | }; | 401 | }; |
| 398 | 402 | ||
| 399 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 403 | static void set_state(struct stop_machine_data *smdata, |
| 400 | static unsigned int num_threads; | 404 | enum stopmachine_state newstate) |
| 401 | static atomic_t thread_ack; | ||
| 402 | static DEFINE_MUTEX(lock); | ||
| 403 | /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | ||
| 404 | static DEFINE_MUTEX(setup_lock); | ||
| 405 | /* Users of stop_machine. */ | ||
| 406 | static int refcount; | ||
| 407 | static struct workqueue_struct *stop_machine_wq; | ||
| 408 | static struct stop_machine_data active, idle; | ||
| 409 | static const struct cpumask *active_cpus; | ||
| 410 | static void __percpu *stop_machine_work; | ||
| 411 | |||
| 412 | static void set_state(enum stopmachine_state newstate) | ||
| 413 | { | 405 | { |
| 414 | /* Reset ack counter. */ | 406 | /* Reset ack counter. */ |
| 415 | atomic_set(&thread_ack, num_threads); | 407 | atomic_set(&smdata->thread_ack, smdata->num_threads); |
| 416 | smp_wmb(); | 408 | smp_wmb(); |
| 417 | state = newstate; | 409 | smdata->state = newstate; |
| 418 | } | 410 | } |
| 419 | 411 | ||
| 420 | /* Last one to ack a state moves to the next state. */ | 412 | /* Last one to ack a state moves to the next state. */ |
| 421 | static void ack_state(void) | 413 | static void ack_state(struct stop_machine_data *smdata) |
| 422 | { | 414 | { |
| 423 | if (atomic_dec_and_test(&thread_ack)) | 415 | if (atomic_dec_and_test(&smdata->thread_ack)) |
| 424 | set_state(state + 1); | 416 | set_state(smdata, smdata->state + 1); |
| 425 | } | 417 | } |
| 426 | 418 | ||
| 427 | /* This is the actual function which stops the CPU. It runs | 419 | /* This is the cpu_stop function which stops the CPU. */ |
| 428 | * in the context of a dedicated stopmachine workqueue. */ | 420 | static int stop_machine_cpu_stop(void *data) |
| 429 | static void stop_cpu(struct work_struct *unused) | ||
| 430 | { | 421 | { |
| 422 | struct stop_machine_data *smdata = data; | ||
| 431 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 423 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
| 432 | struct stop_machine_data *smdata = &idle; | 424 | int cpu = smp_processor_id(), err = 0; |
| 433 | int cpu = smp_processor_id(); | 425 | bool is_active; |
| 434 | int err; | 426 | |
| 427 | if (!smdata->active_cpus) | ||
| 428 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
| 429 | else | ||
| 430 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
| 435 | 431 | ||
| 436 | if (!active_cpus) { | ||
| 437 | if (cpu == cpumask_first(cpu_online_mask)) | ||
| 438 | smdata = &active; | ||
| 439 | } else { | ||
| 440 | if (cpumask_test_cpu(cpu, active_cpus)) | ||
| 441 | smdata = &active; | ||
| 442 | } | ||
| 443 | /* Simple state machine */ | 432 | /* Simple state machine */ |
| 444 | do { | 433 | do { |
| 445 | /* Chill out and ensure we re-read stopmachine_state. */ | 434 | /* Chill out and ensure we re-read stopmachine_state. */ |
| 446 | cpu_relax(); | 435 | cpu_relax(); |
| 447 | if (state != curstate) { | 436 | if (smdata->state != curstate) { |
| 448 | curstate = state; | 437 | curstate = smdata->state; |
| 449 | switch (curstate) { | 438 | switch (curstate) { |
| 450 | case STOPMACHINE_DISABLE_IRQ: | 439 | case STOPMACHINE_DISABLE_IRQ: |
| 451 | local_irq_disable(); | 440 | local_irq_disable(); |
| 452 | hard_irq_disable(); | 441 | hard_irq_disable(); |
| 453 | break; | 442 | break; |
| 454 | case STOPMACHINE_RUN: | 443 | case STOPMACHINE_RUN: |
| 455 | /* On multiple CPUs only a single error code | 444 | if (is_active) |
| 456 | * is needed to tell that something failed. */ | 445 | err = smdata->fn(smdata->data); |
| 457 | err = smdata->fn(smdata->data); | ||
| 458 | if (err) | ||
| 459 | smdata->fnret = err; | ||
| 460 | break; | 446 | break; |
| 461 | default: | 447 | default: |
| 462 | break; | 448 | break; |
| 463 | } | 449 | } |
| 464 | ack_state(); | 450 | ack_state(smdata); |
| 465 | } | 451 | } |
| 466 | } while (curstate != STOPMACHINE_EXIT); | 452 | } while (curstate != STOPMACHINE_EXIT); |
| 467 | 453 | ||
| 468 | local_irq_enable(); | 454 | local_irq_enable(); |
| 455 | return err; | ||
| 469 | } | 456 | } |
| 470 | 457 | ||
| 471 | /* Callback for CPUs which aren't supposed to do anything. */ | ||
| 472 | static int chill(void *unused) | ||
| 473 | { | ||
| 474 | return 0; | ||
| 475 | } | ||
| 476 | |||
| 477 | int stop_machine_create(void) | ||
| 478 | { | ||
| 479 | mutex_lock(&setup_lock); | ||
| 480 | if (refcount) | ||
| 481 | goto done; | ||
| 482 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
| 483 | if (!stop_machine_wq) | ||
| 484 | goto err_out; | ||
| 485 | stop_machine_work = alloc_percpu(struct work_struct); | ||
| 486 | if (!stop_machine_work) | ||
| 487 | goto err_out; | ||
| 488 | done: | ||
| 489 | refcount++; | ||
| 490 | mutex_unlock(&setup_lock); | ||
| 491 | return 0; | ||
| 492 | |||
| 493 | err_out: | ||
| 494 | if (stop_machine_wq) | ||
| 495 | destroy_workqueue(stop_machine_wq); | ||
| 496 | mutex_unlock(&setup_lock); | ||
| 497 | return -ENOMEM; | ||
| 498 | } | ||
| 499 | EXPORT_SYMBOL_GPL(stop_machine_create); | ||
| 500 | |||
| 501 | void stop_machine_destroy(void) | ||
| 502 | { | ||
| 503 | mutex_lock(&setup_lock); | ||
| 504 | refcount--; | ||
| 505 | if (refcount) | ||
| 506 | goto done; | ||
| 507 | destroy_workqueue(stop_machine_wq); | ||
| 508 | free_percpu(stop_machine_work); | ||
| 509 | done: | ||
| 510 | mutex_unlock(&setup_lock); | ||
| 511 | } | ||
| 512 | EXPORT_SYMBOL_GPL(stop_machine_destroy); | ||
| 513 | |||
| 514 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 458 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| 515 | { | 459 | { |
| 516 | struct work_struct *sm_work; | 460 | struct stop_machine_data smdata = { .fn = fn, .data = data, |
| 517 | int i, ret; | 461 | .num_threads = num_online_cpus(), |
| 518 | 462 | .active_cpus = cpus }; | |
| 519 | /* Set up initial state. */ | 463 | |
| 520 | mutex_lock(&lock); | 464 | /* Set the initial state and stop all online cpus. */ |
| 521 | num_threads = num_online_cpus(); | 465 | set_state(&smdata, STOPMACHINE_PREPARE); |
| 522 | active_cpus = cpus; | 466 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
| 523 | active.fn = fn; | ||
| 524 | active.data = data; | ||
| 525 | active.fnret = 0; | ||
| 526 | idle.fn = chill; | ||
| 527 | idle.data = NULL; | ||
| 528 | |||
| 529 | set_state(STOPMACHINE_PREPARE); | ||
| 530 | |||
| 531 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one | ||
| 532 | * doesn't hit this CPU until we're ready. */ | ||
| 533 | get_cpu(); | ||
| 534 | for_each_online_cpu(i) { | ||
| 535 | sm_work = per_cpu_ptr(stop_machine_work, i); | ||
| 536 | INIT_WORK(sm_work, stop_cpu); | ||
| 537 | queue_work_on(i, stop_machine_wq, sm_work); | ||
| 538 | } | ||
| 539 | /* This will release the thread on our CPU. */ | ||
| 540 | put_cpu(); | ||
| 541 | flush_workqueue(stop_machine_wq); | ||
| 542 | ret = active.fnret; | ||
| 543 | mutex_unlock(&lock); | ||
| 544 | return ret; | ||
| 545 | } | 467 | } |
| 546 | 468 | ||
| 547 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 469 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| 548 | { | 470 | { |
| 549 | int ret; | 471 | int ret; |
| 550 | 472 | ||
| 551 | ret = stop_machine_create(); | ||
| 552 | if (ret) | ||
| 553 | return ret; | ||
| 554 | /* No CPUs can come up or down during this. */ | 473 | /* No CPUs can come up or down during this. */ |
| 555 | get_online_cpus(); | 474 | get_online_cpus(); |
| 556 | ret = __stop_machine(fn, data, cpus); | 475 | ret = __stop_machine(fn, data, cpus); |
| 557 | put_online_cpus(); | 476 | put_online_cpus(); |
| 558 | stop_machine_destroy(); | ||
| 559 | return ret; | 477 | return ret; |
| 560 | } | 478 | } |
| 561 | EXPORT_SYMBOL_GPL(stop_machine); | 479 | EXPORT_SYMBOL_GPL(stop_machine); |
