aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-05-06 12:49:20 -0400
committerTejun Heo <tj@kernel.org>2010-05-06 12:49:20 -0400
commit3fc1f1e27a5b807791d72e5d992aa33b668a6626 (patch)
tree396c2f49909c506c3ad53fd6a9bdddf6c24f7860 /kernel
parent1142d810298e694754498dbb4983fcb6cb7fd884 (diff)
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Dimitri Sivanich <sivanich@sgi.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/module.c14
-rw-r--r--kernel/stop_machine.c158
3 files changed, 40 insertions, 140 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 914aedcde849..545777574779 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
266{ 266{
267 int err; 267 int err;
268 268
269 err = stop_machine_create();
270 if (err)
271 return err;
272 cpu_maps_update_begin(); 269 cpu_maps_update_begin();
273 270
274 if (cpu_hotplug_disabled) { 271 if (cpu_hotplug_disabled) {
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
280 277
281out: 278out:
282 cpu_maps_update_done(); 279 cpu_maps_update_done();
283 stop_machine_destroy();
284 return err; 280 return err;
285} 281}
286EXPORT_SYMBOL(cpu_down); 282EXPORT_SYMBOL(cpu_down);
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
361{ 357{
362 int cpu, first_cpu, error; 358 int cpu, first_cpu, error;
363 359
364 error = stop_machine_create();
365 if (error)
366 return error;
367 cpu_maps_update_begin(); 360 cpu_maps_update_begin();
368 first_cpu = cpumask_first(cpu_online_mask); 361 first_cpu = cpumask_first(cpu_online_mask);
369 /* 362 /*
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
394 printk(KERN_ERR "Non-boot CPUs are not disabled\n"); 387 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
395 } 388 }
396 cpu_maps_update_done(); 389 cpu_maps_update_done();
397 stop_machine_destroy();
398 return error; 390 return error;
399} 391}
400 392
diff --git a/kernel/module.c b/kernel/module.c
index 1016b75b026a..0838246d8c94 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
723 return -EFAULT; 723 return -EFAULT;
724 name[MODULE_NAME_LEN-1] = '\0'; 724 name[MODULE_NAME_LEN-1] = '\0';
725 725
726 /* Create stop_machine threads since free_module relies on 726 if (mutex_lock_interruptible(&module_mutex) != 0)
727 * a non-failing stop_machine call. */ 727 return -EINTR;
728 ret = stop_machine_create();
729 if (ret)
730 return ret;
731
732 if (mutex_lock_interruptible(&module_mutex) != 0) {
733 ret = -EINTR;
734 goto out_stop;
735 }
736 728
737 mod = find_module(name); 729 mod = find_module(name);
738 if (!mod) { 730 if (!mod) {
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
792 784
793 out: 785 out:
794 mutex_unlock(&module_mutex); 786 mutex_unlock(&module_mutex);
795out_stop:
796 stop_machine_destroy();
797 return ret; 787 return ret;
798} 788}
799 789
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 7e3f9182aef3..884c7a1afeed 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -388,174 +388,92 @@ enum stopmachine_state {
388 /* Exit */ 388 /* Exit */
389 STOPMACHINE_EXIT, 389 STOPMACHINE_EXIT,
390}; 390};
391static enum stopmachine_state state;
392 391
393struct stop_machine_data { 392struct stop_machine_data {
394 int (*fn)(void *); 393 int (*fn)(void *);
395 void *data; 394 void *data;
396 int fnret; 395 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
396 unsigned int num_threads;
397 const struct cpumask *active_cpus;
398
399 enum stopmachine_state state;
400 atomic_t thread_ack;
397}; 401};
398 402
399/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 403static void set_state(struct stop_machine_data *smdata,
400static unsigned int num_threads; 404 enum stopmachine_state newstate)
401static atomic_t thread_ack;
402static DEFINE_MUTEX(lock);
403/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
404static DEFINE_MUTEX(setup_lock);
405/* Users of stop_machine. */
406static int refcount;
407static struct workqueue_struct *stop_machine_wq;
408static struct stop_machine_data active, idle;
409static const struct cpumask *active_cpus;
410static void __percpu *stop_machine_work;
411
412static void set_state(enum stopmachine_state newstate)
413{ 405{
414 /* Reset ack counter. */ 406 /* Reset ack counter. */
415 atomic_set(&thread_ack, num_threads); 407 atomic_set(&smdata->thread_ack, smdata->num_threads);
416 smp_wmb(); 408 smp_wmb();
417 state = newstate; 409 smdata->state = newstate;
418} 410}
419 411
420/* Last one to ack a state moves to the next state. */ 412/* Last one to ack a state moves to the next state. */
421static void ack_state(void) 413static void ack_state(struct stop_machine_data *smdata)
422{ 414{
423 if (atomic_dec_and_test(&thread_ack)) 415 if (atomic_dec_and_test(&smdata->thread_ack))
424 set_state(state + 1); 416 set_state(smdata, smdata->state + 1);
425} 417}
426 418
427/* This is the actual function which stops the CPU. It runs 419/* This is the cpu_stop function which stops the CPU. */
428 * in the context of a dedicated stopmachine workqueue. */ 420static int stop_machine_cpu_stop(void *data)
429static void stop_cpu(struct work_struct *unused)
430{ 421{
422 struct stop_machine_data *smdata = data;
431 enum stopmachine_state curstate = STOPMACHINE_NONE; 423 enum stopmachine_state curstate = STOPMACHINE_NONE;
432 struct stop_machine_data *smdata = &idle; 424 int cpu = smp_processor_id(), err = 0;
433 int cpu = smp_processor_id(); 425 bool is_active;
434 int err; 426
427 if (!smdata->active_cpus)
428 is_active = cpu == cpumask_first(cpu_online_mask);
429 else
430 is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
435 431
436 if (!active_cpus) {
437 if (cpu == cpumask_first(cpu_online_mask))
438 smdata = &active;
439 } else {
440 if (cpumask_test_cpu(cpu, active_cpus))
441 smdata = &active;
442 }
443 /* Simple state machine */ 432 /* Simple state machine */
444 do { 433 do {
445 /* Chill out and ensure we re-read stopmachine_state. */ 434 /* Chill out and ensure we re-read stopmachine_state. */
446 cpu_relax(); 435 cpu_relax();
447 if (state != curstate) { 436 if (smdata->state != curstate) {
448 curstate = state; 437 curstate = smdata->state;
449 switch (curstate) { 438 switch (curstate) {
450 case STOPMACHINE_DISABLE_IRQ: 439 case STOPMACHINE_DISABLE_IRQ:
451 local_irq_disable(); 440 local_irq_disable();
452 hard_irq_disable(); 441 hard_irq_disable();
453 break; 442 break;
454 case STOPMACHINE_RUN: 443 case STOPMACHINE_RUN:
455 /* On multiple CPUs only a single error code 444 if (is_active)
456 * is needed to tell that something failed. */ 445 err = smdata->fn(smdata->data);
457 err = smdata->fn(smdata->data);
458 if (err)
459 smdata->fnret = err;
460 break; 446 break;
461 default: 447 default:
462 break; 448 break;
463 } 449 }
464 ack_state(); 450 ack_state(smdata);
465 } 451 }
466 } while (curstate != STOPMACHINE_EXIT); 452 } while (curstate != STOPMACHINE_EXIT);
467 453
468 local_irq_enable(); 454 local_irq_enable();
455 return err;
469} 456}
470 457
471/* Callback for CPUs which aren't supposed to do anything. */
472static int chill(void *unused)
473{
474 return 0;
475}
476
477int stop_machine_create(void)
478{
479 mutex_lock(&setup_lock);
480 if (refcount)
481 goto done;
482 stop_machine_wq = create_rt_workqueue("kstop");
483 if (!stop_machine_wq)
484 goto err_out;
485 stop_machine_work = alloc_percpu(struct work_struct);
486 if (!stop_machine_work)
487 goto err_out;
488done:
489 refcount++;
490 mutex_unlock(&setup_lock);
491 return 0;
492
493err_out:
494 if (stop_machine_wq)
495 destroy_workqueue(stop_machine_wq);
496 mutex_unlock(&setup_lock);
497 return -ENOMEM;
498}
499EXPORT_SYMBOL_GPL(stop_machine_create);
500
501void stop_machine_destroy(void)
502{
503 mutex_lock(&setup_lock);
504 refcount--;
505 if (refcount)
506 goto done;
507 destroy_workqueue(stop_machine_wq);
508 free_percpu(stop_machine_work);
509done:
510 mutex_unlock(&setup_lock);
511}
512EXPORT_SYMBOL_GPL(stop_machine_destroy);
513
514int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 458int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
515{ 459{
516 struct work_struct *sm_work; 460 struct stop_machine_data smdata = { .fn = fn, .data = data,
517 int i, ret; 461 .num_threads = num_online_cpus(),
518 462 .active_cpus = cpus };
519 /* Set up initial state. */ 463
520 mutex_lock(&lock); 464 /* Set the initial state and stop all online cpus. */
521 num_threads = num_online_cpus(); 465 set_state(&smdata, STOPMACHINE_PREPARE);
522 active_cpus = cpus; 466 return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
523 active.fn = fn;
524 active.data = data;
525 active.fnret = 0;
526 idle.fn = chill;
527 idle.data = NULL;
528
529 set_state(STOPMACHINE_PREPARE);
530
531 /* Schedule the stop_cpu work on all cpus: hold this CPU so one
532 * doesn't hit this CPU until we're ready. */
533 get_cpu();
534 for_each_online_cpu(i) {
535 sm_work = per_cpu_ptr(stop_machine_work, i);
536 INIT_WORK(sm_work, stop_cpu);
537 queue_work_on(i, stop_machine_wq, sm_work);
538 }
539 /* This will release the thread on our CPU. */
540 put_cpu();
541 flush_workqueue(stop_machine_wq);
542 ret = active.fnret;
543 mutex_unlock(&lock);
544 return ret;
545} 467}
546 468
547int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 469int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
548{ 470{
549 int ret; 471 int ret;
550 472
551 ret = stop_machine_create();
552 if (ret)
553 return ret;
554 /* No CPUs can come up or down during this. */ 473 /* No CPUs can come up or down during this. */
555 get_online_cpus(); 474 get_online_cpus();
556 ret = __stop_machine(fn, data, cpus); 475 ret = __stop_machine(fn, data, cpus);
557 put_online_cpus(); 476 put_online_cpus();
558 stop_machine_destroy();
559 return ret; 477 return ret;
560} 478}
561EXPORT_SYMBOL_GPL(stop_machine); 479EXPORT_SYMBOL_GPL(stop_machine);