aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-05-06 12:49:20 -0400
committerTejun Heo <tj@kernel.org>2010-05-06 12:49:20 -0400
commit3fc1f1e27a5b807791d72e5d992aa33b668a6626 (patch)
tree396c2f49909c506c3ad53fd6a9bdddf6c24f7860
parent1142d810298e694754498dbb4983fcb6cb7fd884 (diff)
stop_machine: reimplement using cpu_stop
Reimplement stop_machine using cpu_stop. As cpu stoppers are guaranteed to be available for all online cpus, stop_machine_create/destroy() are no longer necessary and removed. With resource management and synchronization handled by cpu_stop, the new implementation is much simpler. Asking the cpu_stop to execute the stop_cpu() state machine on all online cpus with cpu hotplug disabled is enough. stop_machine itself doesn't need to manage any global resources anymore, so all per-instance information is rolled into struct stop_machine_data and the mutex and all static data variables are removed. The previous implementation created and destroyed RT workqueues as necessary which made stop_machine() calls highly expensive on very large machines. According to Dimitri Sivanich, preventing the dynamic creation/destruction makes booting faster more than twice on very large machines. cpu_stop resources are preallocated for all online cpus and should have the same effect. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Rusty Russell <rusty@rustcorp.com.au> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Dimitri Sivanich <sivanich@sgi.com>
-rw-r--r--arch/s390/kernel/time.c1
-rw-r--r--drivers/xen/manage.c14
-rw-r--r--include/linux/stop_machine.h20
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/module.c14
-rw-r--r--kernel/stop_machine.c158
6 files changed, 42 insertions, 173 deletions
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fba6dec156bf..03d96569f187 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -390,7 +390,6 @@ static void __init time_init_wq(void)
390 if (time_sync_wq) 390 if (time_sync_wq)
391 return; 391 return;
392 time_sync_wq = create_singlethread_workqueue("timesync"); 392 time_sync_wq = create_singlethread_workqueue("timesync");
393 stop_machine_create();
394} 393}
395 394
396/* 395/*
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 2ac4440e7b08..8943b8ccee1a 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -80,12 +80,6 @@ static void do_suspend(void)
80 80
81 shutting_down = SHUTDOWN_SUSPEND; 81 shutting_down = SHUTDOWN_SUSPEND;
82 82
83 err = stop_machine_create();
84 if (err) {
85 printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
86 goto out;
87 }
88
89#ifdef CONFIG_PREEMPT 83#ifdef CONFIG_PREEMPT
90 /* If the kernel is preemptible, we need to freeze all the processes 84 /* If the kernel is preemptible, we need to freeze all the processes
91 to prevent them from being in the middle of a pagetable update 85 to prevent them from being in the middle of a pagetable update
@@ -93,7 +87,7 @@ static void do_suspend(void)
93 err = freeze_processes(); 87 err = freeze_processes();
94 if (err) { 88 if (err) {
95 printk(KERN_ERR "xen suspend: freeze failed %d\n", err); 89 printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
96 goto out_destroy_sm; 90 goto out;
97 } 91 }
98#endif 92#endif
99 93
@@ -136,12 +130,8 @@ out_resume:
136out_thaw: 130out_thaw:
137#ifdef CONFIG_PREEMPT 131#ifdef CONFIG_PREEMPT
138 thaw_processes(); 132 thaw_processes();
139
140out_destroy_sm:
141#endif
142 stop_machine_destroy();
143
144out: 133out:
134#endif
145 shutting_down = SHUTDOWN_INVALID; 135 shutting_down = SHUTDOWN_INVALID;
146} 136}
147#endif /* CONFIG_PM_SLEEP */ 137#endif /* CONFIG_PM_SLEEP */
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index efcbd6c37947..0e552e72a4c4 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
67 */ 67 */
68int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); 68int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
69 69
70/**
71 * stop_machine_create: create all stop_machine threads
72 *
73 * Description: This causes all stop_machine threads to be created before
74 * stop_machine actually gets called. This can be used by subsystems that
75 * need a non failing stop_machine infrastructure.
76 */
77int stop_machine_create(void);
78
79/**
80 * stop_machine_destroy: destroy all stop_machine threads
81 *
82 * Description: This causes all stop_machine threads which were created with
83 * stop_machine_create to be destroyed again.
84 */
85void stop_machine_destroy(void);
86
87#else 70#else
88 71
89static inline int stop_machine(int (*fn)(void *), void *data, 72static inline int stop_machine(int (*fn)(void *), void *data,
@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
96 return ret; 79 return ret;
97} 80}
98 81
99static inline int stop_machine_create(void) { return 0; }
100static inline void stop_machine_destroy(void) { }
101
102#endif /* CONFIG_SMP */ 82#endif /* CONFIG_SMP */
103#endif /* _LINUX_STOP_MACHINE */ 83#endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 914aedcde849..545777574779 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
266{ 266{
267 int err; 267 int err;
268 268
269 err = stop_machine_create();
270 if (err)
271 return err;
272 cpu_maps_update_begin(); 269 cpu_maps_update_begin();
273 270
274 if (cpu_hotplug_disabled) { 271 if (cpu_hotplug_disabled) {
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
280 277
281out: 278out:
282 cpu_maps_update_done(); 279 cpu_maps_update_done();
283 stop_machine_destroy();
284 return err; 280 return err;
285} 281}
286EXPORT_SYMBOL(cpu_down); 282EXPORT_SYMBOL(cpu_down);
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
361{ 357{
362 int cpu, first_cpu, error; 358 int cpu, first_cpu, error;
363 359
364 error = stop_machine_create();
365 if (error)
366 return error;
367 cpu_maps_update_begin(); 360 cpu_maps_update_begin();
368 first_cpu = cpumask_first(cpu_online_mask); 361 first_cpu = cpumask_first(cpu_online_mask);
369 /* 362 /*
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
394 printk(KERN_ERR "Non-boot CPUs are not disabled\n"); 387 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
395 } 388 }
396 cpu_maps_update_done(); 389 cpu_maps_update_done();
397 stop_machine_destroy();
398 return error; 390 return error;
399} 391}
400 392
diff --git a/kernel/module.c b/kernel/module.c
index 1016b75b026a..0838246d8c94 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
723 return -EFAULT; 723 return -EFAULT;
724 name[MODULE_NAME_LEN-1] = '\0'; 724 name[MODULE_NAME_LEN-1] = '\0';
725 725
726 /* Create stop_machine threads since free_module relies on 726 if (mutex_lock_interruptible(&module_mutex) != 0)
727 * a non-failing stop_machine call. */ 727 return -EINTR;
728 ret = stop_machine_create();
729 if (ret)
730 return ret;
731
732 if (mutex_lock_interruptible(&module_mutex) != 0) {
733 ret = -EINTR;
734 goto out_stop;
735 }
736 728
737 mod = find_module(name); 729 mod = find_module(name);
738 if (!mod) { 730 if (!mod) {
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
792 784
793 out: 785 out:
794 mutex_unlock(&module_mutex); 786 mutex_unlock(&module_mutex);
795out_stop:
796 stop_machine_destroy();
797 return ret; 787 return ret;
798} 788}
799 789
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 7e3f9182aef3..884c7a1afeed 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -388,174 +388,92 @@ enum stopmachine_state {
388 /* Exit */ 388 /* Exit */
389 STOPMACHINE_EXIT, 389 STOPMACHINE_EXIT,
390}; 390};
391static enum stopmachine_state state;
392 391
393struct stop_machine_data { 392struct stop_machine_data {
394 int (*fn)(void *); 393 int (*fn)(void *);
395 void *data; 394 void *data;
396 int fnret; 395 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
396 unsigned int num_threads;
397 const struct cpumask *active_cpus;
398
399 enum stopmachine_state state;
400 atomic_t thread_ack;
397}; 401};
398 402
399/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 403static void set_state(struct stop_machine_data *smdata,
400static unsigned int num_threads; 404 enum stopmachine_state newstate)
401static atomic_t thread_ack;
402static DEFINE_MUTEX(lock);
403/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
404static DEFINE_MUTEX(setup_lock);
405/* Users of stop_machine. */
406static int refcount;
407static struct workqueue_struct *stop_machine_wq;
408static struct stop_machine_data active, idle;
409static const struct cpumask *active_cpus;
410static void __percpu *stop_machine_work;
411
412static void set_state(enum stopmachine_state newstate)
413{ 405{
414 /* Reset ack counter. */ 406 /* Reset ack counter. */
415 atomic_set(&thread_ack, num_threads); 407 atomic_set(&smdata->thread_ack, smdata->num_threads);
416 smp_wmb(); 408 smp_wmb();
417 state = newstate; 409 smdata->state = newstate;
418} 410}
419 411
420/* Last one to ack a state moves to the next state. */ 412/* Last one to ack a state moves to the next state. */
421static void ack_state(void) 413static void ack_state(struct stop_machine_data *smdata)
422{ 414{
423 if (atomic_dec_and_test(&thread_ack)) 415 if (atomic_dec_and_test(&smdata->thread_ack))
424 set_state(state + 1); 416 set_state(smdata, smdata->state + 1);
425} 417}
426 418
427/* This is the actual function which stops the CPU. It runs 419/* This is the cpu_stop function which stops the CPU. */
428 * in the context of a dedicated stopmachine workqueue. */ 420static int stop_machine_cpu_stop(void *data)
429static void stop_cpu(struct work_struct *unused)
430{ 421{
422 struct stop_machine_data *smdata = data;
431 enum stopmachine_state curstate = STOPMACHINE_NONE; 423 enum stopmachine_state curstate = STOPMACHINE_NONE;
432 struct stop_machine_data *smdata = &idle; 424 int cpu = smp_processor_id(), err = 0;
433 int cpu = smp_processor_id(); 425 bool is_active;
434 int err; 426
427 if (!smdata->active_cpus)
428 is_active = cpu == cpumask_first(cpu_online_mask);
429 else
430 is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
435 431
436 if (!active_cpus) {
437 if (cpu == cpumask_first(cpu_online_mask))
438 smdata = &active;
439 } else {
440 if (cpumask_test_cpu(cpu, active_cpus))
441 smdata = &active;
442 }
443 /* Simple state machine */ 432 /* Simple state machine */
444 do { 433 do {
445 /* Chill out and ensure we re-read stopmachine_state. */ 434 /* Chill out and ensure we re-read stopmachine_state. */
446 cpu_relax(); 435 cpu_relax();
447 if (state != curstate) { 436 if (smdata->state != curstate) {
448 curstate = state; 437 curstate = smdata->state;
449 switch (curstate) { 438 switch (curstate) {
450 case STOPMACHINE_DISABLE_IRQ: 439 case STOPMACHINE_DISABLE_IRQ:
451 local_irq_disable(); 440 local_irq_disable();
452 hard_irq_disable(); 441 hard_irq_disable();
453 break; 442 break;
454 case STOPMACHINE_RUN: 443 case STOPMACHINE_RUN:
455 /* On multiple CPUs only a single error code 444 if (is_active)
456 * is needed to tell that something failed. */ 445 err = smdata->fn(smdata->data);
457 err = smdata->fn(smdata->data);
458 if (err)
459 smdata->fnret = err;
460 break; 446 break;
461 default: 447 default:
462 break; 448 break;
463 } 449 }
464 ack_state(); 450 ack_state(smdata);
465 } 451 }
466 } while (curstate != STOPMACHINE_EXIT); 452 } while (curstate != STOPMACHINE_EXIT);
467 453
468 local_irq_enable(); 454 local_irq_enable();
455 return err;
469} 456}
470 457
471/* Callback for CPUs which aren't supposed to do anything. */
472static int chill(void *unused)
473{
474 return 0;
475}
476
477int stop_machine_create(void)
478{
479 mutex_lock(&setup_lock);
480 if (refcount)
481 goto done;
482 stop_machine_wq = create_rt_workqueue("kstop");
483 if (!stop_machine_wq)
484 goto err_out;
485 stop_machine_work = alloc_percpu(struct work_struct);
486 if (!stop_machine_work)
487 goto err_out;
488done:
489 refcount++;
490 mutex_unlock(&setup_lock);
491 return 0;
492
493err_out:
494 if (stop_machine_wq)
495 destroy_workqueue(stop_machine_wq);
496 mutex_unlock(&setup_lock);
497 return -ENOMEM;
498}
499EXPORT_SYMBOL_GPL(stop_machine_create);
500
501void stop_machine_destroy(void)
502{
503 mutex_lock(&setup_lock);
504 refcount--;
505 if (refcount)
506 goto done;
507 destroy_workqueue(stop_machine_wq);
508 free_percpu(stop_machine_work);
509done:
510 mutex_unlock(&setup_lock);
511}
512EXPORT_SYMBOL_GPL(stop_machine_destroy);
513
514int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 458int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
515{ 459{
516 struct work_struct *sm_work; 460 struct stop_machine_data smdata = { .fn = fn, .data = data,
517 int i, ret; 461 .num_threads = num_online_cpus(),
518 462 .active_cpus = cpus };
519 /* Set up initial state. */ 463
520 mutex_lock(&lock); 464 /* Set the initial state and stop all online cpus. */
521 num_threads = num_online_cpus(); 465 set_state(&smdata, STOPMACHINE_PREPARE);
522 active_cpus = cpus; 466 return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
523 active.fn = fn;
524 active.data = data;
525 active.fnret = 0;
526 idle.fn = chill;
527 idle.data = NULL;
528
529 set_state(STOPMACHINE_PREPARE);
530
531 /* Schedule the stop_cpu work on all cpus: hold this CPU so one
532 * doesn't hit this CPU until we're ready. */
533 get_cpu();
534 for_each_online_cpu(i) {
535 sm_work = per_cpu_ptr(stop_machine_work, i);
536 INIT_WORK(sm_work, stop_cpu);
537 queue_work_on(i, stop_machine_wq, sm_work);
538 }
539 /* This will release the thread on our CPU. */
540 put_cpu();
541 flush_workqueue(stop_machine_wq);
542 ret = active.fnret;
543 mutex_unlock(&lock);
544 return ret;
545} 467}
546 468
547int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 469int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
548{ 470{
549 int ret; 471 int ret;
550 472
551 ret = stop_machine_create();
552 if (ret)
553 return ret;
554 /* No CPUs can come up or down during this. */ 473 /* No CPUs can come up or down during this. */
555 get_online_cpus(); 474 get_online_cpus();
556 ret = __stop_machine(fn, data, cpus); 475 ret = __stop_machine(fn, data, cpus);
557 put_online_cpus(); 476 put_online_cpus();
558 stop_machine_destroy();
559 return ret; 477 return ret;
560} 478}
561EXPORT_SYMBOL_GPL(stop_machine); 479EXPORT_SYMBOL_GPL(stop_machine);