diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-05-08 12:11:19 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-05-08 12:11:19 -0400 |
commit | e7858f52a5cb868289a72264534a1f05f3340c6c (patch) | |
tree | aa7308603cf30d8aec6e45ecaddc6c8ed29d2edb /kernel | |
parent | 27a9da6538ee18046d7bff8e36a9f783542c54c3 (diff) | |
parent | bbf1bb3eee86f2eef2baa14e600be454d09109ee (diff) |
Merge branch 'cpu_stop' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into sched/core
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/cpu.c | 8 | ||||
-rw-r--r-- | kernel/module.c | 14 | ||||
-rw-r--r-- | kernel/rcutorture.c | 2 | ||||
-rw-r--r-- | kernel/sched.c | 285 | ||||
-rw-r--r-- | kernel/sched_fair.c | 48 | ||||
-rw-r--r-- | kernel/stop_machine.c | 534 |
7 files changed, 507 insertions, 386 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index a987aa1676b5..149e18ef1ab1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -68,7 +68,7 @@ obj-$(CONFIG_USER_NS) += user_namespace.o | |||
68 | obj-$(CONFIG_PID_NS) += pid_namespace.o | 68 | obj-$(CONFIG_PID_NS) += pid_namespace.o |
69 | obj-$(CONFIG_IKCONFIG) += configs.o | 69 | obj-$(CONFIG_IKCONFIG) += configs.o |
70 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | 70 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o |
71 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 71 | obj-$(CONFIG_SMP) += stop_machine.o |
72 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 72 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
73 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o | 73 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o |
74 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o | 74 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 914aedcde849..545777574779 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) | |||
266 | { | 266 | { |
267 | int err; | 267 | int err; |
268 | 268 | ||
269 | err = stop_machine_create(); | ||
270 | if (err) | ||
271 | return err; | ||
272 | cpu_maps_update_begin(); | 269 | cpu_maps_update_begin(); |
273 | 270 | ||
274 | if (cpu_hotplug_disabled) { | 271 | if (cpu_hotplug_disabled) { |
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) | |||
280 | 277 | ||
281 | out: | 278 | out: |
282 | cpu_maps_update_done(); | 279 | cpu_maps_update_done(); |
283 | stop_machine_destroy(); | ||
284 | return err; | 280 | return err; |
285 | } | 281 | } |
286 | EXPORT_SYMBOL(cpu_down); | 282 | EXPORT_SYMBOL(cpu_down); |
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) | |||
361 | { | 357 | { |
362 | int cpu, first_cpu, error; | 358 | int cpu, first_cpu, error; |
363 | 359 | ||
364 | error = stop_machine_create(); | ||
365 | if (error) | ||
366 | return error; | ||
367 | cpu_maps_update_begin(); | 360 | cpu_maps_update_begin(); |
368 | first_cpu = cpumask_first(cpu_online_mask); | 361 | first_cpu = cpumask_first(cpu_online_mask); |
369 | /* | 362 | /* |
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) | |||
394 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 387 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
395 | } | 388 | } |
396 | cpu_maps_update_done(); | 389 | cpu_maps_update_done(); |
397 | stop_machine_destroy(); | ||
398 | return error; | 390 | return error; |
399 | } | 391 | } |
400 | 392 | ||
diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026a..0838246d8c94 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
723 | return -EFAULT; | 723 | return -EFAULT; |
724 | name[MODULE_NAME_LEN-1] = '\0'; | 724 | name[MODULE_NAME_LEN-1] = '\0'; |
725 | 725 | ||
726 | /* Create stop_machine threads since free_module relies on | 726 | if (mutex_lock_interruptible(&module_mutex) != 0) |
727 | * a non-failing stop_machine call. */ | 727 | return -EINTR; |
728 | ret = stop_machine_create(); | ||
729 | if (ret) | ||
730 | return ret; | ||
731 | |||
732 | if (mutex_lock_interruptible(&module_mutex) != 0) { | ||
733 | ret = -EINTR; | ||
734 | goto out_stop; | ||
735 | } | ||
736 | 728 | ||
737 | mod = find_module(name); | 729 | mod = find_module(name); |
738 | if (!mod) { | 730 | if (!mod) { |
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
792 | 784 | ||
793 | out: | 785 | out: |
794 | mutex_unlock(&module_mutex); | 786 | mutex_unlock(&module_mutex); |
795 | out_stop: | ||
796 | stop_machine_destroy(); | ||
797 | return ret; | 787 | return ret; |
798 | } | 788 | } |
799 | 789 | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 58df55bf83ed..2b676f3a0f26 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
@@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
669 | .sync = synchronize_sched_expedited, | 669 | .sync = synchronize_sched_expedited, |
670 | .cb_barrier = NULL, | 670 | .cb_barrier = NULL, |
671 | .fqs = rcu_sched_force_quiescent_state, | 671 | .fqs = rcu_sched_force_quiescent_state, |
672 | .stats = rcu_expedited_torture_stats, | 672 | .stats = NULL, |
673 | .irq_capable = 1, | 673 | .irq_capable = 1, |
674 | .name = "sched_expedited" | 674 | .name = "sched_expedited" |
675 | }; | 675 | }; |
diff --git a/kernel/sched.c b/kernel/sched.c index 11ac0eb0bce7..39aa9c7e22c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -55,9 +55,9 @@ | |||
55 | #include <linux/cpu.h> | 55 | #include <linux/cpu.h> |
56 | #include <linux/cpuset.h> | 56 | #include <linux/cpuset.h> |
57 | #include <linux/percpu.h> | 57 | #include <linux/percpu.h> |
58 | #include <linux/kthread.h> | ||
59 | #include <linux/proc_fs.h> | 58 | #include <linux/proc_fs.h> |
60 | #include <linux/seq_file.h> | 59 | #include <linux/seq_file.h> |
60 | #include <linux/stop_machine.h> | ||
61 | #include <linux/sysctl.h> | 61 | #include <linux/sysctl.h> |
62 | #include <linux/syscalls.h> | 62 | #include <linux/syscalls.h> |
63 | #include <linux/times.h> | 63 | #include <linux/times.h> |
@@ -539,15 +539,13 @@ struct rq { | |||
539 | int post_schedule; | 539 | int post_schedule; |
540 | int active_balance; | 540 | int active_balance; |
541 | int push_cpu; | 541 | int push_cpu; |
542 | struct cpu_stop_work active_balance_work; | ||
542 | /* cpu of this runqueue: */ | 543 | /* cpu of this runqueue: */ |
543 | int cpu; | 544 | int cpu; |
544 | int online; | 545 | int online; |
545 | 546 | ||
546 | unsigned long avg_load_per_task; | 547 | unsigned long avg_load_per_task; |
547 | 548 | ||
548 | struct task_struct *migration_thread; | ||
549 | struct list_head migration_queue; | ||
550 | |||
551 | u64 rt_avg; | 549 | u64 rt_avg; |
552 | u64 age_stamp; | 550 | u64 age_stamp; |
553 | u64 idle_stamp; | 551 | u64 idle_stamp; |
@@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2037 | __set_task_cpu(p, new_cpu); | 2035 | __set_task_cpu(p, new_cpu); |
2038 | } | 2036 | } |
2039 | 2037 | ||
2040 | struct migration_req { | 2038 | struct migration_arg { |
2041 | struct list_head list; | ||
2042 | |||
2043 | struct task_struct *task; | 2039 | struct task_struct *task; |
2044 | int dest_cpu; | 2040 | int dest_cpu; |
2045 | |||
2046 | struct completion done; | ||
2047 | }; | 2041 | }; |
2048 | 2042 | ||
2043 | static int migration_cpu_stop(void *data); | ||
2044 | |||
2049 | /* | 2045 | /* |
2050 | * The task's runqueue lock must be held. | 2046 | * The task's runqueue lock must be held. |
2051 | * Returns true if you have to wait for migration thread. | 2047 | * Returns true if you have to wait for migration thread. |
2052 | */ | 2048 | */ |
2053 | static int | 2049 | static bool migrate_task(struct task_struct *p, int dest_cpu) |
2054 | migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | ||
2055 | { | 2050 | { |
2056 | struct rq *rq = task_rq(p); | 2051 | struct rq *rq = task_rq(p); |
2057 | 2052 | ||
@@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2059 | * If the task is not on a runqueue (and not running), then | 2054 | * If the task is not on a runqueue (and not running), then |
2060 | * the next wake-up will properly place the task. | 2055 | * the next wake-up will properly place the task. |
2061 | */ | 2056 | */ |
2062 | if (!p->se.on_rq && !task_running(rq, p)) | 2057 | return p->se.on_rq || task_running(rq, p); |
2063 | return 0; | ||
2064 | |||
2065 | init_completion(&req->done); | ||
2066 | req->task = p; | ||
2067 | req->dest_cpu = dest_cpu; | ||
2068 | list_add(&req->list, &rq->migration_queue); | ||
2069 | |||
2070 | return 1; | ||
2071 | } | 2058 | } |
2072 | 2059 | ||
2073 | /* | 2060 | /* |
@@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq) | |||
3110 | void sched_exec(void) | 3097 | void sched_exec(void) |
3111 | { | 3098 | { |
3112 | struct task_struct *p = current; | 3099 | struct task_struct *p = current; |
3113 | struct migration_req req; | ||
3114 | unsigned long flags; | 3100 | unsigned long flags; |
3115 | struct rq *rq; | 3101 | struct rq *rq; |
3116 | int dest_cpu; | 3102 | int dest_cpu; |
@@ -3124,17 +3110,11 @@ void sched_exec(void) | |||
3124 | * select_task_rq() can race against ->cpus_allowed | 3110 | * select_task_rq() can race against ->cpus_allowed |
3125 | */ | 3111 | */ |
3126 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3112 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
3127 | likely(cpu_active(dest_cpu)) && | 3113 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { |
3128 | migrate_task(p, dest_cpu, &req)) { | 3114 | struct migration_arg arg = { p, dest_cpu }; |
3129 | /* Need to wait for migration thread (might exit: take ref). */ | ||
3130 | struct task_struct *mt = rq->migration_thread; | ||
3131 | 3115 | ||
3132 | get_task_struct(mt); | ||
3133 | task_rq_unlock(rq, &flags); | 3116 | task_rq_unlock(rq, &flags); |
3134 | wake_up_process(mt); | 3117 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
3135 | put_task_struct(mt); | ||
3136 | wait_for_completion(&req.done); | ||
3137 | |||
3138 | return; | 3118 | return; |
3139 | } | 3119 | } |
3140 | unlock: | 3120 | unlock: |
@@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void) | |||
5290 | /* | 5270 | /* |
5291 | * This is how migration works: | 5271 | * This is how migration works: |
5292 | * | 5272 | * |
5293 | * 1) we queue a struct migration_req structure in the source CPU's | 5273 | * 1) we invoke migration_cpu_stop() on the target CPU using |
5294 | * runqueue and wake up that CPU's migration thread. | 5274 | * stop_one_cpu(). |
5295 | * 2) we down() the locked semaphore => thread blocks. | 5275 | * 2) stopper starts to run (implicitly forcing the migrated thread |
5296 | * 3) migration thread wakes up (implicitly it forces the migrated | 5276 | * off the CPU) |
5297 | * thread off the CPU) | 5277 | * 3) it checks whether the migrated task is still in the wrong runqueue. |
5298 | * 4) it gets the migration request and checks whether the migrated | 5278 | * 4) if it's in the wrong runqueue then the migration thread removes |
5299 | * task is still in the wrong runqueue. | ||
5300 | * 5) if it's in the wrong runqueue then the migration thread removes | ||
5301 | * it and puts it into the right queue. | 5279 | * it and puts it into the right queue. |
5302 | * 6) migration thread up()s the semaphore. | 5280 | * 5) stopper completes and stop_one_cpu() returns and the migration |
5303 | * 7) we wake up and the migration is done. | 5281 | * is done. |
5304 | */ | 5282 | */ |
5305 | 5283 | ||
5306 | /* | 5284 | /* |
@@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void) | |||
5314 | */ | 5292 | */ |
5315 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | 5293 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
5316 | { | 5294 | { |
5317 | struct migration_req req; | ||
5318 | unsigned long flags; | 5295 | unsigned long flags; |
5319 | struct rq *rq; | 5296 | struct rq *rq; |
5297 | unsigned int dest_cpu; | ||
5320 | int ret = 0; | 5298 | int ret = 0; |
5321 | 5299 | ||
5322 | /* | 5300 | /* |
@@ -5354,15 +5332,12 @@ again: | |||
5354 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 5332 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
5355 | goto out; | 5333 | goto out; |
5356 | 5334 | ||
5357 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { | 5335 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5336 | if (migrate_task(p, dest_cpu)) { | ||
5337 | struct migration_arg arg = { p, dest_cpu }; | ||
5358 | /* Need help from migration thread: drop lock and wait. */ | 5338 | /* Need help from migration thread: drop lock and wait. */ |
5359 | struct task_struct *mt = rq->migration_thread; | ||
5360 | |||
5361 | get_task_struct(mt); | ||
5362 | task_rq_unlock(rq, &flags); | 5339 | task_rq_unlock(rq, &flags); |
5363 | wake_up_process(mt); | 5340 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
5364 | put_task_struct(mt); | ||
5365 | wait_for_completion(&req.done); | ||
5366 | tlb_migrate_finish(p->mm); | 5341 | tlb_migrate_finish(p->mm); |
5367 | return 0; | 5342 | return 0; |
5368 | } | 5343 | } |
@@ -5420,70 +5395,22 @@ fail: | |||
5420 | return ret; | 5395 | return ret; |
5421 | } | 5396 | } |
5422 | 5397 | ||
5423 | #define RCU_MIGRATION_IDLE 0 | ||
5424 | #define RCU_MIGRATION_NEED_QS 1 | ||
5425 | #define RCU_MIGRATION_GOT_QS 2 | ||
5426 | #define RCU_MIGRATION_MUST_SYNC 3 | ||
5427 | |||
5428 | /* | 5398 | /* |
5429 | * migration_thread - this is a highprio system thread that performs | 5399 | * migration_cpu_stop - this will be executed by a highprio stopper thread |
5430 | * thread migration by bumping thread off CPU then 'pushing' onto | 5400 | * and performs thread migration by bumping thread off CPU then |
5431 | * another runqueue. | 5401 | * 'pushing' onto another runqueue. |
5432 | */ | 5402 | */ |
5433 | static int migration_thread(void *data) | 5403 | static int migration_cpu_stop(void *data) |
5434 | { | 5404 | { |
5435 | int badcpu; | 5405 | struct migration_arg *arg = data; |
5436 | int cpu = (long)data; | ||
5437 | struct rq *rq; | ||
5438 | |||
5439 | rq = cpu_rq(cpu); | ||
5440 | BUG_ON(rq->migration_thread != current); | ||
5441 | |||
5442 | set_current_state(TASK_INTERRUPTIBLE); | ||
5443 | while (!kthread_should_stop()) { | ||
5444 | struct migration_req *req; | ||
5445 | struct list_head *head; | ||
5446 | |||
5447 | raw_spin_lock_irq(&rq->lock); | ||
5448 | |||
5449 | if (cpu_is_offline(cpu)) { | ||
5450 | raw_spin_unlock_irq(&rq->lock); | ||
5451 | break; | ||
5452 | } | ||
5453 | |||
5454 | if (rq->active_balance) { | ||
5455 | active_load_balance(rq, cpu); | ||
5456 | rq->active_balance = 0; | ||
5457 | } | ||
5458 | |||
5459 | head = &rq->migration_queue; | ||
5460 | |||
5461 | if (list_empty(head)) { | ||
5462 | raw_spin_unlock_irq(&rq->lock); | ||
5463 | schedule(); | ||
5464 | set_current_state(TASK_INTERRUPTIBLE); | ||
5465 | continue; | ||
5466 | } | ||
5467 | req = list_entry(head->next, struct migration_req, list); | ||
5468 | list_del_init(head->next); | ||
5469 | |||
5470 | if (req->task != NULL) { | ||
5471 | raw_spin_unlock(&rq->lock); | ||
5472 | __migrate_task(req->task, cpu, req->dest_cpu); | ||
5473 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | ||
5474 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | ||
5475 | raw_spin_unlock(&rq->lock); | ||
5476 | } else { | ||
5477 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | ||
5478 | raw_spin_unlock(&rq->lock); | ||
5479 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | ||
5480 | } | ||
5481 | local_irq_enable(); | ||
5482 | |||
5483 | complete(&req->done); | ||
5484 | } | ||
5485 | __set_current_state(TASK_RUNNING); | ||
5486 | 5406 | ||
5407 | /* | ||
5408 | * The original target cpu might have gone down and we might | ||
5409 | * be on another cpu but it doesn't matter. | ||
5410 | */ | ||
5411 | local_irq_disable(); | ||
5412 | __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); | ||
5413 | local_irq_enable(); | ||
5487 | return 0; | 5414 | return 0; |
5488 | } | 5415 | } |
5489 | 5416 | ||
@@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq) | |||
5850 | static int __cpuinit | 5777 | static int __cpuinit |
5851 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | 5778 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) |
5852 | { | 5779 | { |
5853 | struct task_struct *p; | ||
5854 | int cpu = (long)hcpu; | 5780 | int cpu = (long)hcpu; |
5855 | unsigned long flags; | 5781 | unsigned long flags; |
5856 | struct rq *rq; | 5782 | struct rq *rq = cpu_rq(cpu); |
5857 | 5783 | ||
5858 | switch (action) { | 5784 | switch (action) { |
5859 | 5785 | ||
5860 | case CPU_UP_PREPARE: | 5786 | case CPU_UP_PREPARE: |
5861 | case CPU_UP_PREPARE_FROZEN: | 5787 | case CPU_UP_PREPARE_FROZEN: |
5862 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | ||
5863 | if (IS_ERR(p)) | ||
5864 | return NOTIFY_BAD; | ||
5865 | kthread_bind(p, cpu); | ||
5866 | /* Must be high prio: stop_machine expects to yield to it. */ | ||
5867 | rq = task_rq_lock(p, &flags); | ||
5868 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
5869 | task_rq_unlock(rq, &flags); | ||
5870 | get_task_struct(p); | ||
5871 | cpu_rq(cpu)->migration_thread = p; | ||
5872 | rq->calc_load_update = calc_load_update; | 5788 | rq->calc_load_update = calc_load_update; |
5873 | break; | 5789 | break; |
5874 | 5790 | ||
5875 | case CPU_ONLINE: | 5791 | case CPU_ONLINE: |
5876 | case CPU_ONLINE_FROZEN: | 5792 | case CPU_ONLINE_FROZEN: |
5877 | /* Strictly unnecessary, as first user will wake it. */ | ||
5878 | wake_up_process(cpu_rq(cpu)->migration_thread); | ||
5879 | |||
5880 | /* Update our root-domain */ | 5793 | /* Update our root-domain */ |
5881 | rq = cpu_rq(cpu); | ||
5882 | raw_spin_lock_irqsave(&rq->lock, flags); | 5794 | raw_spin_lock_irqsave(&rq->lock, flags); |
5883 | if (rq->rd) { | 5795 | if (rq->rd) { |
5884 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5796 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
@@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5889 | break; | 5801 | break; |
5890 | 5802 | ||
5891 | #ifdef CONFIG_HOTPLUG_CPU | 5803 | #ifdef CONFIG_HOTPLUG_CPU |
5892 | case CPU_UP_CANCELED: | ||
5893 | case CPU_UP_CANCELED_FROZEN: | ||
5894 | if (!cpu_rq(cpu)->migration_thread) | ||
5895 | break; | ||
5896 | /* Unbind it from offline cpu so it can run. Fall thru. */ | ||
5897 | kthread_bind(cpu_rq(cpu)->migration_thread, | ||
5898 | cpumask_any(cpu_online_mask)); | ||
5899 | kthread_stop(cpu_rq(cpu)->migration_thread); | ||
5900 | put_task_struct(cpu_rq(cpu)->migration_thread); | ||
5901 | cpu_rq(cpu)->migration_thread = NULL; | ||
5902 | break; | ||
5903 | |||
5904 | case CPU_DEAD: | 5804 | case CPU_DEAD: |
5905 | case CPU_DEAD_FROZEN: | 5805 | case CPU_DEAD_FROZEN: |
5906 | migrate_live_tasks(cpu); | 5806 | migrate_live_tasks(cpu); |
5907 | rq = cpu_rq(cpu); | ||
5908 | kthread_stop(rq->migration_thread); | ||
5909 | put_task_struct(rq->migration_thread); | ||
5910 | rq->migration_thread = NULL; | ||
5911 | /* Idle task back to normal (off runqueue, low prio) */ | 5807 | /* Idle task back to normal (off runqueue, low prio) */ |
5912 | raw_spin_lock_irq(&rq->lock); | 5808 | raw_spin_lock_irq(&rq->lock); |
5913 | deactivate_task(rq, rq->idle, 0); | 5809 | deactivate_task(rq, rq->idle, 0); |
@@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
5918 | migrate_nr_uninterruptible(rq); | 5814 | migrate_nr_uninterruptible(rq); |
5919 | BUG_ON(rq->nr_running != 0); | 5815 | BUG_ON(rq->nr_running != 0); |
5920 | calc_global_load_remove(rq); | 5816 | calc_global_load_remove(rq); |
5921 | /* | ||
5922 | * No need to migrate the tasks: it was best-effort if | ||
5923 | * they didn't take sched_hotcpu_mutex. Just wake up | ||
5924 | * the requestors. | ||
5925 | */ | ||
5926 | raw_spin_lock_irq(&rq->lock); | ||
5927 | while (!list_empty(&rq->migration_queue)) { | ||
5928 | struct migration_req *req; | ||
5929 | |||
5930 | req = list_entry(rq->migration_queue.next, | ||
5931 | struct migration_req, list); | ||
5932 | list_del_init(&req->list); | ||
5933 | raw_spin_unlock_irq(&rq->lock); | ||
5934 | complete(&req->done); | ||
5935 | raw_spin_lock_irq(&rq->lock); | ||
5936 | } | ||
5937 | raw_spin_unlock_irq(&rq->lock); | ||
5938 | break; | 5817 | break; |
5939 | 5818 | ||
5940 | case CPU_DYING: | 5819 | case CPU_DYING: |
5941 | case CPU_DYING_FROZEN: | 5820 | case CPU_DYING_FROZEN: |
5942 | /* Update our root-domain */ | 5821 | /* Update our root-domain */ |
5943 | rq = cpu_rq(cpu); | ||
5944 | raw_spin_lock_irqsave(&rq->lock, flags); | 5822 | raw_spin_lock_irqsave(&rq->lock, flags); |
5945 | if (rq->rd) { | 5823 | if (rq->rd) { |
5946 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5824 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
@@ -7757,10 +7635,8 @@ void __init sched_init(void) | |||
7757 | rq->push_cpu = 0; | 7635 | rq->push_cpu = 0; |
7758 | rq->cpu = i; | 7636 | rq->cpu = i; |
7759 | rq->online = 0; | 7637 | rq->online = 0; |
7760 | rq->migration_thread = NULL; | ||
7761 | rq->idle_stamp = 0; | 7638 | rq->idle_stamp = 0; |
7762 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 7639 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
7763 | INIT_LIST_HEAD(&rq->migration_queue); | ||
7764 | rq_attach_root(rq, &def_root_domain); | 7640 | rq_attach_root(rq, &def_root_domain); |
7765 | #endif | 7641 | #endif |
7766 | init_rq_hrtick(rq); | 7642 | init_rq_hrtick(rq); |
@@ -9054,43 +8930,32 @@ struct cgroup_subsys cpuacct_subsys = { | |||
9054 | 8930 | ||
9055 | #ifndef CONFIG_SMP | 8931 | #ifndef CONFIG_SMP |
9056 | 8932 | ||
9057 | int rcu_expedited_torture_stats(char *page) | ||
9058 | { | ||
9059 | return 0; | ||
9060 | } | ||
9061 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
9062 | |||
9063 | void synchronize_sched_expedited(void) | 8933 | void synchronize_sched_expedited(void) |
9064 | { | 8934 | { |
8935 | barrier(); | ||
9065 | } | 8936 | } |
9066 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 8937 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
9067 | 8938 | ||
9068 | #else /* #ifndef CONFIG_SMP */ | 8939 | #else /* #ifndef CONFIG_SMP */ |
9069 | 8940 | ||
9070 | static DEFINE_PER_CPU(struct migration_req, rcu_migration_req); | 8941 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); |
9071 | static DEFINE_MUTEX(rcu_sched_expedited_mutex); | ||
9072 | |||
9073 | #define RCU_EXPEDITED_STATE_POST -2 | ||
9074 | #define RCU_EXPEDITED_STATE_IDLE -1 | ||
9075 | |||
9076 | static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | ||
9077 | 8942 | ||
9078 | int rcu_expedited_torture_stats(char *page) | 8943 | static int synchronize_sched_expedited_cpu_stop(void *data) |
9079 | { | 8944 | { |
9080 | int cnt = 0; | 8945 | /* |
9081 | int cpu; | 8946 | * There must be a full memory barrier on each affected CPU |
9082 | 8947 | * between the time that try_stop_cpus() is called and the | |
9083 | cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state); | 8948 | * time that it returns. |
9084 | for_each_online_cpu(cpu) { | 8949 | * |
9085 | cnt += sprintf(&page[cnt], " %d:%d", | 8950 | * In the current initial implementation of cpu_stop, the |
9086 | cpu, per_cpu(rcu_migration_req, cpu).dest_cpu); | 8951 | * above condition is already met when the control reaches |
9087 | } | 8952 | * this point and the following smp_mb() is not strictly |
9088 | cnt += sprintf(&page[cnt], "\n"); | 8953 | * necessary. Do smp_mb() anyway for documentation and |
9089 | return cnt; | 8954 | * robustness against future implementation changes. |
8955 | */ | ||
8956 | smp_mb(); /* See above comment block. */ | ||
8957 | return 0; | ||
9090 | } | 8958 | } |
9091 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
9092 | |||
9093 | static long synchronize_sched_expedited_count; | ||
9094 | 8959 | ||
9095 | /* | 8960 | /* |
9096 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | 8961 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" |
@@ -9104,18 +8969,14 @@ static long synchronize_sched_expedited_count; | |||
9104 | */ | 8969 | */ |
9105 | void synchronize_sched_expedited(void) | 8970 | void synchronize_sched_expedited(void) |
9106 | { | 8971 | { |
9107 | int cpu; | 8972 | int snap, trycount = 0; |
9108 | unsigned long flags; | ||
9109 | bool need_full_sync = 0; | ||
9110 | struct rq *rq; | ||
9111 | struct migration_req *req; | ||
9112 | long snap; | ||
9113 | int trycount = 0; | ||
9114 | 8973 | ||
9115 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | 8974 | smp_mb(); /* ensure prior mod happens before capturing snap. */ |
9116 | snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1; | 8975 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; |
9117 | get_online_cpus(); | 8976 | get_online_cpus(); |
9118 | while (!mutex_trylock(&rcu_sched_expedited_mutex)) { | 8977 | while (try_stop_cpus(cpu_online_mask, |
8978 | synchronize_sched_expedited_cpu_stop, | ||
8979 | NULL) == -EAGAIN) { | ||
9119 | put_online_cpus(); | 8980 | put_online_cpus(); |
9120 | if (trycount++ < 10) | 8981 | if (trycount++ < 10) |
9121 | udelay(trycount * num_online_cpus()); | 8982 | udelay(trycount * num_online_cpus()); |
@@ -9123,41 +8984,15 @@ void synchronize_sched_expedited(void) | |||
9123 | synchronize_sched(); | 8984 | synchronize_sched(); |
9124 | return; | 8985 | return; |
9125 | } | 8986 | } |
9126 | if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) { | 8987 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { |
9127 | smp_mb(); /* ensure test happens before caller kfree */ | 8988 | smp_mb(); /* ensure test happens before caller kfree */ |
9128 | return; | 8989 | return; |
9129 | } | 8990 | } |
9130 | get_online_cpus(); | 8991 | get_online_cpus(); |
9131 | } | 8992 | } |
9132 | rcu_expedited_state = RCU_EXPEDITED_STATE_POST; | 8993 | atomic_inc(&synchronize_sched_expedited_count); |
9133 | for_each_online_cpu(cpu) { | 8994 | smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ |
9134 | rq = cpu_rq(cpu); | ||
9135 | req = &per_cpu(rcu_migration_req, cpu); | ||
9136 | init_completion(&req->done); | ||
9137 | req->task = NULL; | ||
9138 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | ||
9139 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
9140 | list_add(&req->list, &rq->migration_queue); | ||
9141 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
9142 | wake_up_process(rq->migration_thread); | ||
9143 | } | ||
9144 | for_each_online_cpu(cpu) { | ||
9145 | rcu_expedited_state = cpu; | ||
9146 | req = &per_cpu(rcu_migration_req, cpu); | ||
9147 | rq = cpu_rq(cpu); | ||
9148 | wait_for_completion(&req->done); | ||
9149 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
9150 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | ||
9151 | need_full_sync = 1; | ||
9152 | req->dest_cpu = RCU_MIGRATION_IDLE; | ||
9153 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
9154 | } | ||
9155 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | ||
9156 | synchronize_sched_expedited_count++; | ||
9157 | mutex_unlock(&rcu_sched_expedited_mutex); | ||
9158 | put_online_cpus(); | 8995 | put_online_cpus(); |
9159 | if (need_full_sync) | ||
9160 | synchronize_sched(); | ||
9161 | } | 8996 | } |
9162 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 8997 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
9163 | 8998 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cbd8b8a296d1..217e4a9393e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle) | |||
2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
2799 | } | 2799 | } |
2800 | 2800 | ||
2801 | static int active_load_balance_cpu_stop(void *data); | ||
2802 | |||
2801 | /* | 2803 | /* |
2802 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2804 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
2803 | * tasks if there is an imbalance. | 2805 | * tasks if there is an imbalance. |
@@ -2887,8 +2889,9 @@ redo: | |||
2887 | if (need_active_balance(sd, sd_idle, idle)) { | 2889 | if (need_active_balance(sd, sd_idle, idle)) { |
2888 | raw_spin_lock_irqsave(&busiest->lock, flags); | 2890 | raw_spin_lock_irqsave(&busiest->lock, flags); |
2889 | 2891 | ||
2890 | /* don't kick the migration_thread, if the curr | 2892 | /* don't kick the active_load_balance_cpu_stop, |
2891 | * task on busiest cpu can't be moved to this_cpu | 2893 | * if the curr task on busiest cpu can't be |
2894 | * moved to this_cpu | ||
2892 | */ | 2895 | */ |
2893 | if (!cpumask_test_cpu(this_cpu, | 2896 | if (!cpumask_test_cpu(this_cpu, |
2894 | &busiest->curr->cpus_allowed)) { | 2897 | &busiest->curr->cpus_allowed)) { |
@@ -2898,14 +2901,22 @@ redo: | |||
2898 | goto out_one_pinned; | 2901 | goto out_one_pinned; |
2899 | } | 2902 | } |
2900 | 2903 | ||
2904 | /* | ||
2905 | * ->active_balance synchronizes accesses to | ||
2906 | * ->active_balance_work. Once set, it's cleared | ||
2907 | * only after active load balance is finished. | ||
2908 | */ | ||
2901 | if (!busiest->active_balance) { | 2909 | if (!busiest->active_balance) { |
2902 | busiest->active_balance = 1; | 2910 | busiest->active_balance = 1; |
2903 | busiest->push_cpu = this_cpu; | 2911 | busiest->push_cpu = this_cpu; |
2904 | active_balance = 1; | 2912 | active_balance = 1; |
2905 | } | 2913 | } |
2906 | raw_spin_unlock_irqrestore(&busiest->lock, flags); | 2914 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
2915 | |||
2907 | if (active_balance) | 2916 | if (active_balance) |
2908 | wake_up_process(busiest->migration_thread); | 2917 | stop_one_cpu_nowait(cpu_of(busiest), |
2918 | active_load_balance_cpu_stop, busiest, | ||
2919 | &busiest->active_balance_work); | ||
2909 | 2920 | ||
2910 | /* | 2921 | /* |
2911 | * We've kicked active balancing, reset the failure | 2922 | * We've kicked active balancing, reset the failure |
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3012 | } | 3023 | } |
3013 | 3024 | ||
3014 | /* | 3025 | /* |
3015 | * active_load_balance is run by migration threads. It pushes running tasks | 3026 | * active_load_balance_cpu_stop is run by cpu stopper. It pushes |
3016 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 3027 | * running tasks off the busiest CPU onto idle CPUs. It requires at |
3017 | * running on each physical CPU where possible, and avoids physical / | 3028 | * least 1 task to be running on each physical CPU where possible, and |
3018 | * logical imbalances. | 3029 | * avoids physical / logical imbalances. |
3019 | * | ||
3020 | * Called with busiest_rq locked. | ||
3021 | */ | 3030 | */ |
3022 | static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | 3031 | static int active_load_balance_cpu_stop(void *data) |
3023 | { | 3032 | { |
3033 | struct rq *busiest_rq = data; | ||
3034 | int busiest_cpu = cpu_of(busiest_rq); | ||
3024 | int target_cpu = busiest_rq->push_cpu; | 3035 | int target_cpu = busiest_rq->push_cpu; |
3036 | struct rq *target_rq = cpu_rq(target_cpu); | ||
3025 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
3026 | struct rq *target_rq; | 3038 | |
3039 | raw_spin_lock_irq(&busiest_rq->lock); | ||
3040 | |||
3041 | /* make sure the requested cpu hasn't gone down in the meantime */ | ||
3042 | if (unlikely(busiest_cpu != smp_processor_id() || | ||
3043 | !busiest_rq->active_balance)) | ||
3044 | goto out_unlock; | ||
3027 | 3045 | ||
3028 | /* Is there any task to move? */ | 3046 | /* Is there any task to move? */ |
3029 | if (busiest_rq->nr_running <= 1) | 3047 | if (busiest_rq->nr_running <= 1) |
3030 | return; | 3048 | goto out_unlock; |
3031 | |||
3032 | target_rq = cpu_rq(target_cpu); | ||
3033 | 3049 | ||
3034 | /* | 3050 | /* |
3035 | * This condition is "impossible", if it occurs | 3051 | * This condition is "impossible", if it occurs |
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
3058 | schedstat_inc(sd, alb_failed); | 3074 | schedstat_inc(sd, alb_failed); |
3059 | } | 3075 | } |
3060 | double_unlock_balance(busiest_rq, target_rq); | 3076 | double_unlock_balance(busiest_rq, target_rq); |
3077 | out_unlock: | ||
3078 | busiest_rq->active_balance = 0; | ||
3079 | raw_spin_unlock_irq(&busiest_rq->lock); | ||
3080 | return 0; | ||
3061 | } | 3081 | } |
3062 | 3082 | ||
3063 | #ifdef CONFIG_NO_HZ | 3083 | #ifdef CONFIG_NO_HZ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 9bb9fb1bd79c..ef51d1fcf5e6 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
@@ -1,17 +1,381 @@ | |||
1 | /* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. | 1 | /* |
2 | * GPL v2 and any later version. | 2 | * kernel/stop_machine.c |
3 | * | ||
4 | * Copyright (C) 2008, 2005 IBM Corporation. | ||
5 | * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au | ||
6 | * Copyright (C) 2010 SUSE Linux Products GmbH | ||
7 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> | ||
8 | * | ||
9 | * This file is released under the GPLv2 and any later version. | ||
3 | */ | 10 | */ |
11 | #include <linux/completion.h> | ||
4 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
5 | #include <linux/err.h> | 13 | #include <linux/init.h> |
6 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
7 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/percpu.h> | ||
8 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
9 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
10 | #include <linux/syscalls.h> | ||
11 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/kallsyms.h> | ||
12 | 21 | ||
13 | #include <asm/atomic.h> | 22 | #include <asm/atomic.h> |
14 | #include <asm/uaccess.h> | 23 | |
24 | /* | ||
25 | * Structure to determine completion condition and record errors. May | ||
26 | * be shared by works on different cpus. | ||
27 | */ | ||
28 | struct cpu_stop_done { | ||
29 | atomic_t nr_todo; /* nr left to execute */ | ||
30 | bool executed; /* actually executed? */ | ||
31 | int ret; /* collected return value */ | ||
32 | struct completion completion; /* fired if nr_todo reaches 0 */ | ||
33 | }; | ||
34 | |||
35 | /* the actual stopper, one per every possible cpu, enabled on online cpus */ | ||
36 | struct cpu_stopper { | ||
37 | spinlock_t lock; | ||
38 | struct list_head works; /* list of pending works */ | ||
39 | struct task_struct *thread; /* stopper thread */ | ||
40 | bool enabled; /* is this stopper enabled? */ | ||
41 | }; | ||
42 | |||
43 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); | ||
44 | |||
45 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) | ||
46 | { | ||
47 | memset(done, 0, sizeof(*done)); | ||
48 | atomic_set(&done->nr_todo, nr_todo); | ||
49 | init_completion(&done->completion); | ||
50 | } | ||
51 | |||
52 | /* signal completion unless @done is NULL */ | ||
53 | static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) | ||
54 | { | ||
55 | if (done) { | ||
56 | if (executed) | ||
57 | done->executed = true; | ||
58 | if (atomic_dec_and_test(&done->nr_todo)) | ||
59 | complete(&done->completion); | ||
60 | } | ||
61 | } | ||
62 | |||
63 | /* queue @work to @stopper. if offline, @work is completed immediately */ | ||
64 | static void cpu_stop_queue_work(struct cpu_stopper *stopper, | ||
65 | struct cpu_stop_work *work) | ||
66 | { | ||
67 | unsigned long flags; | ||
68 | |||
69 | spin_lock_irqsave(&stopper->lock, flags); | ||
70 | |||
71 | if (stopper->enabled) { | ||
72 | list_add_tail(&work->list, &stopper->works); | ||
73 | wake_up_process(stopper->thread); | ||
74 | } else | ||
75 | cpu_stop_signal_done(work->done, false); | ||
76 | |||
77 | spin_unlock_irqrestore(&stopper->lock, flags); | ||
78 | } | ||
79 | |||
80 | /** | ||
81 | * stop_one_cpu - stop a cpu | ||
82 | * @cpu: cpu to stop | ||
83 | * @fn: function to execute | ||
84 | * @arg: argument to @fn | ||
85 | * | ||
86 | * Execute @fn(@arg) on @cpu. @fn is run in a process context with | ||
87 | * the highest priority preempting any task on the cpu and | ||
88 | * monopolizing it. This function returns after the execution is | ||
89 | * complete. | ||
90 | * | ||
91 | * This function doesn't guarantee @cpu stays online till @fn | ||
92 | * completes. If @cpu goes down in the middle, execution may happen | ||
93 | * partially or fully on different cpus. @fn should either be ready | ||
94 | * for that or the caller should ensure that @cpu stays online until | ||
95 | * this function completes. | ||
96 | * | ||
97 | * CONTEXT: | ||
98 | * Might sleep. | ||
99 | * | ||
100 | * RETURNS: | ||
101 | * -ENOENT if @fn(@arg) was not executed because @cpu was offline; | ||
102 | * otherwise, the return value of @fn. | ||
103 | */ | ||
104 | int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) | ||
105 | { | ||
106 | struct cpu_stop_done done; | ||
107 | struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; | ||
108 | |||
109 | cpu_stop_init_done(&done, 1); | ||
110 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); | ||
111 | wait_for_completion(&done.completion); | ||
112 | return done.executed ? done.ret : -ENOENT; | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * stop_one_cpu_nowait - stop a cpu but don't wait for completion | ||
117 | * @cpu: cpu to stop | ||
118 | * @fn: function to execute | ||
119 | * @arg: argument to @fn | ||
120 | * | ||
121 | * Similar to stop_one_cpu() but doesn't wait for completion. The | ||
122 | * caller is responsible for ensuring @work_buf is currently unused | ||
123 | * and will remain untouched until stopper starts executing @fn. | ||
124 | * | ||
125 | * CONTEXT: | ||
126 | * Don't care. | ||
127 | */ | ||
128 | void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, | ||
129 | struct cpu_stop_work *work_buf) | ||
130 | { | ||
131 | *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; | ||
132 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); | ||
133 | } | ||
134 | |||
135 | /* static data for stop_cpus */ | ||
136 | static DEFINE_MUTEX(stop_cpus_mutex); | ||
137 | static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); | ||
138 | |||
139 | int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
140 | { | ||
141 | struct cpu_stop_work *work; | ||
142 | struct cpu_stop_done done; | ||
143 | unsigned int cpu; | ||
144 | |||
145 | /* initialize works and done */ | ||
146 | for_each_cpu(cpu, cpumask) { | ||
147 | work = &per_cpu(stop_cpus_work, cpu); | ||
148 | work->fn = fn; | ||
149 | work->arg = arg; | ||
150 | work->done = &done; | ||
151 | } | ||
152 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | ||
153 | |||
154 | /* | ||
155 | * Disable preemption while queueing to avoid getting | ||
156 | * preempted by a stopper which might wait for other stoppers | ||
157 | * to enter @fn which can lead to deadlock. | ||
158 | */ | ||
159 | preempt_disable(); | ||
160 | for_each_cpu(cpu, cpumask) | ||
161 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), | ||
162 | &per_cpu(stop_cpus_work, cpu)); | ||
163 | preempt_enable(); | ||
164 | |||
165 | wait_for_completion(&done.completion); | ||
166 | return done.executed ? done.ret : -ENOENT; | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * stop_cpus - stop multiple cpus | ||
171 | * @cpumask: cpus to stop | ||
172 | * @fn: function to execute | ||
173 | * @arg: argument to @fn | ||
174 | * | ||
175 | * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, | ||
176 | * @fn is run in a process context with the highest priority | ||
177 | * preempting any task on the cpu and monopolizing it. This function | ||
178 | * returns after all executions are complete. | ||
179 | * | ||
180 | * This function doesn't guarantee the cpus in @cpumask stay online | ||
181 | * till @fn completes. If some cpus go down in the middle, execution | ||
182 | * on the cpu may happen partially or fully on different cpus. @fn | ||
183 | * should either be ready for that or the caller should ensure that | ||
184 | * the cpus stay online until this function completes. | ||
185 | * | ||
186 | * All stop_cpus() calls are serialized making it safe for @fn to wait | ||
187 | * for all cpus to start executing it. | ||
188 | * | ||
189 | * CONTEXT: | ||
190 | * Might sleep. | ||
191 | * | ||
192 | * RETURNS: | ||
193 | * -ENOENT if @fn(@arg) was not executed at all because all cpus in | ||
194 | * @cpumask were offline; otherwise, 0 if all executions of @fn | ||
195 | * returned 0, any non zero return value if any returned non zero. | ||
196 | */ | ||
197 | int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
198 | { | ||
199 | int ret; | ||
200 | |||
201 | /* static works are used, process one request at a time */ | ||
202 | mutex_lock(&stop_cpus_mutex); | ||
203 | ret = __stop_cpus(cpumask, fn, arg); | ||
204 | mutex_unlock(&stop_cpus_mutex); | ||
205 | return ret; | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * try_stop_cpus - try to stop multiple cpus | ||
210 | * @cpumask: cpus to stop | ||
211 | * @fn: function to execute | ||
212 | * @arg: argument to @fn | ||
213 | * | ||
214 | * Identical to stop_cpus() except that it fails with -EAGAIN if | ||
215 | * someone else is already using the facility. | ||
216 | * | ||
217 | * CONTEXT: | ||
218 | * Might sleep. | ||
219 | * | ||
220 | * RETURNS: | ||
221 | * -EAGAIN if someone else is already stopping cpus, -ENOENT if | ||
222 | * @fn(@arg) was not executed at all because all cpus in @cpumask were | ||
223 | * offline; otherwise, 0 if all executions of @fn returned 0, any non | ||
224 | * zero return value if any returned non zero. | ||
225 | */ | ||
226 | int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
227 | { | ||
228 | int ret; | ||
229 | |||
230 | /* static works are used, process one request at a time */ | ||
231 | if (!mutex_trylock(&stop_cpus_mutex)) | ||
232 | return -EAGAIN; | ||
233 | ret = __stop_cpus(cpumask, fn, arg); | ||
234 | mutex_unlock(&stop_cpus_mutex); | ||
235 | return ret; | ||
236 | } | ||
237 | |||
238 | static int cpu_stopper_thread(void *data) | ||
239 | { | ||
240 | struct cpu_stopper *stopper = data; | ||
241 | struct cpu_stop_work *work; | ||
242 | int ret; | ||
243 | |||
244 | repeat: | ||
245 | set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ | ||
246 | |||
247 | if (kthread_should_stop()) { | ||
248 | __set_current_state(TASK_RUNNING); | ||
249 | return 0; | ||
250 | } | ||
251 | |||
252 | work = NULL; | ||
253 | spin_lock_irq(&stopper->lock); | ||
254 | if (!list_empty(&stopper->works)) { | ||
255 | work = list_first_entry(&stopper->works, | ||
256 | struct cpu_stop_work, list); | ||
257 | list_del_init(&work->list); | ||
258 | } | ||
259 | spin_unlock_irq(&stopper->lock); | ||
260 | |||
261 | if (work) { | ||
262 | cpu_stop_fn_t fn = work->fn; | ||
263 | void *arg = work->arg; | ||
264 | struct cpu_stop_done *done = work->done; | ||
265 | char ksym_buf[KSYM_NAME_LEN]; | ||
266 | |||
267 | __set_current_state(TASK_RUNNING); | ||
268 | |||
269 | /* cpu stop callbacks are not allowed to sleep */ | ||
270 | preempt_disable(); | ||
271 | |||
272 | ret = fn(arg); | ||
273 | if (ret) | ||
274 | done->ret = ret; | ||
275 | |||
276 | /* restore preemption and check it's still balanced */ | ||
277 | preempt_enable(); | ||
278 | WARN_ONCE(preempt_count(), | ||
279 | "cpu_stop: %s(%p) leaked preempt count\n", | ||
280 | kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, | ||
281 | ksym_buf), arg); | ||
282 | |||
283 | cpu_stop_signal_done(done, true); | ||
284 | } else | ||
285 | schedule(); | ||
286 | |||
287 | goto repeat; | ||
288 | } | ||
289 | |||
290 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ | ||
291 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | ||
292 | unsigned long action, void *hcpu) | ||
293 | { | ||
294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
295 | unsigned int cpu = (unsigned long)hcpu; | ||
296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | ||
297 | struct cpu_stop_work *work; | ||
298 | struct task_struct *p; | ||
299 | |||
300 | switch (action & ~CPU_TASKS_FROZEN) { | ||
301 | case CPU_UP_PREPARE: | ||
302 | BUG_ON(stopper->thread || stopper->enabled || | ||
303 | !list_empty(&stopper->works)); | ||
304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | ||
305 | cpu); | ||
306 | if (IS_ERR(p)) | ||
307 | return NOTIFY_BAD; | ||
308 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
309 | get_task_struct(p); | ||
310 | stopper->thread = p; | ||
311 | break; | ||
312 | |||
313 | case CPU_ONLINE: | ||
314 | kthread_bind(stopper->thread, cpu); | ||
315 | /* strictly unnecessary, as first user will wake it */ | ||
316 | wake_up_process(stopper->thread); | ||
317 | /* mark enabled */ | ||
318 | spin_lock_irq(&stopper->lock); | ||
319 | stopper->enabled = true; | ||
320 | spin_unlock_irq(&stopper->lock); | ||
321 | break; | ||
322 | |||
323 | #ifdef CONFIG_HOTPLUG_CPU | ||
324 | case CPU_UP_CANCELED: | ||
325 | case CPU_DEAD: | ||
326 | /* kill the stopper */ | ||
327 | kthread_stop(stopper->thread); | ||
328 | /* drain remaining works */ | ||
329 | spin_lock_irq(&stopper->lock); | ||
330 | list_for_each_entry(work, &stopper->works, list) | ||
331 | cpu_stop_signal_done(work->done, false); | ||
332 | stopper->enabled = false; | ||
333 | spin_unlock_irq(&stopper->lock); | ||
334 | /* release the stopper */ | ||
335 | put_task_struct(stopper->thread); | ||
336 | stopper->thread = NULL; | ||
337 | break; | ||
338 | #endif | ||
339 | } | ||
340 | |||
341 | return NOTIFY_OK; | ||
342 | } | ||
343 | |||
344 | /* | ||
345 | * Give it a higher priority so that cpu stopper is available to other | ||
346 | * cpu notifiers. It currently shares the same priority as sched | ||
347 | * migration_notifier. | ||
348 | */ | ||
349 | static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { | ||
350 | .notifier_call = cpu_stop_cpu_callback, | ||
351 | .priority = 10, | ||
352 | }; | ||
353 | |||
354 | static int __init cpu_stop_init(void) | ||
355 | { | ||
356 | void *bcpu = (void *)(long)smp_processor_id(); | ||
357 | unsigned int cpu; | ||
358 | int err; | ||
359 | |||
360 | for_each_possible_cpu(cpu) { | ||
361 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | ||
362 | |||
363 | spin_lock_init(&stopper->lock); | ||
364 | INIT_LIST_HEAD(&stopper->works); | ||
365 | } | ||
366 | |||
367 | /* start one for the boot cpu */ | ||
368 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, | ||
369 | bcpu); | ||
370 | BUG_ON(err == NOTIFY_BAD); | ||
371 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); | ||
372 | register_cpu_notifier(&cpu_stop_cpu_notifier); | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | early_initcall(cpu_stop_init); | ||
377 | |||
378 | #ifdef CONFIG_STOP_MACHINE | ||
15 | 379 | ||
16 | /* This controls the threads on each CPU. */ | 380 | /* This controls the threads on each CPU. */ |
17 | enum stopmachine_state { | 381 | enum stopmachine_state { |
@@ -26,174 +390,94 @@ enum stopmachine_state { | |||
26 | /* Exit */ | 390 | /* Exit */ |
27 | STOPMACHINE_EXIT, | 391 | STOPMACHINE_EXIT, |
28 | }; | 392 | }; |
29 | static enum stopmachine_state state; | ||
30 | 393 | ||
31 | struct stop_machine_data { | 394 | struct stop_machine_data { |
32 | int (*fn)(void *); | 395 | int (*fn)(void *); |
33 | void *data; | 396 | void *data; |
34 | int fnret; | 397 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
398 | unsigned int num_threads; | ||
399 | const struct cpumask *active_cpus; | ||
400 | |||
401 | enum stopmachine_state state; | ||
402 | atomic_t thread_ack; | ||
35 | }; | 403 | }; |
36 | 404 | ||
37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 405 | static void set_state(struct stop_machine_data *smdata, |
38 | static unsigned int num_threads; | 406 | enum stopmachine_state newstate) |
39 | static atomic_t thread_ack; | ||
40 | static DEFINE_MUTEX(lock); | ||
41 | /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | ||
42 | static DEFINE_MUTEX(setup_lock); | ||
43 | /* Users of stop_machine. */ | ||
44 | static int refcount; | ||
45 | static struct workqueue_struct *stop_machine_wq; | ||
46 | static struct stop_machine_data active, idle; | ||
47 | static const struct cpumask *active_cpus; | ||
48 | static void __percpu *stop_machine_work; | ||
49 | |||
50 | static void set_state(enum stopmachine_state newstate) | ||
51 | { | 407 | { |
52 | /* Reset ack counter. */ | 408 | /* Reset ack counter. */ |
53 | atomic_set(&thread_ack, num_threads); | 409 | atomic_set(&smdata->thread_ack, smdata->num_threads); |
54 | smp_wmb(); | 410 | smp_wmb(); |
55 | state = newstate; | 411 | smdata->state = newstate; |
56 | } | 412 | } |
57 | 413 | ||
58 | /* Last one to ack a state moves to the next state. */ | 414 | /* Last one to ack a state moves to the next state. */ |
59 | static void ack_state(void) | 415 | static void ack_state(struct stop_machine_data *smdata) |
60 | { | 416 | { |
61 | if (atomic_dec_and_test(&thread_ack)) | 417 | if (atomic_dec_and_test(&smdata->thread_ack)) |
62 | set_state(state + 1); | 418 | set_state(smdata, smdata->state + 1); |
63 | } | 419 | } |
64 | 420 | ||
65 | /* This is the actual function which stops the CPU. It runs | 421 | /* This is the cpu_stop function which stops the CPU. */ |
66 | * in the context of a dedicated stopmachine workqueue. */ | 422 | static int stop_machine_cpu_stop(void *data) |
67 | static void stop_cpu(struct work_struct *unused) | ||
68 | { | 423 | { |
424 | struct stop_machine_data *smdata = data; | ||
69 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 425 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
70 | struct stop_machine_data *smdata = &idle; | 426 | int cpu = smp_processor_id(), err = 0; |
71 | int cpu = smp_processor_id(); | 427 | bool is_active; |
72 | int err; | 428 | |
429 | if (!smdata->active_cpus) | ||
430 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
431 | else | ||
432 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
73 | 433 | ||
74 | if (!active_cpus) { | ||
75 | if (cpu == cpumask_first(cpu_online_mask)) | ||
76 | smdata = &active; | ||
77 | } else { | ||
78 | if (cpumask_test_cpu(cpu, active_cpus)) | ||
79 | smdata = &active; | ||
80 | } | ||
81 | /* Simple state machine */ | 434 | /* Simple state machine */ |
82 | do { | 435 | do { |
83 | /* Chill out and ensure we re-read stopmachine_state. */ | 436 | /* Chill out and ensure we re-read stopmachine_state. */ |
84 | cpu_relax(); | 437 | cpu_relax(); |
85 | if (state != curstate) { | 438 | if (smdata->state != curstate) { |
86 | curstate = state; | 439 | curstate = smdata->state; |
87 | switch (curstate) { | 440 | switch (curstate) { |
88 | case STOPMACHINE_DISABLE_IRQ: | 441 | case STOPMACHINE_DISABLE_IRQ: |
89 | local_irq_disable(); | 442 | local_irq_disable(); |
90 | hard_irq_disable(); | 443 | hard_irq_disable(); |
91 | break; | 444 | break; |
92 | case STOPMACHINE_RUN: | 445 | case STOPMACHINE_RUN: |
93 | /* On multiple CPUs only a single error code | 446 | if (is_active) |
94 | * is needed to tell that something failed. */ | 447 | err = smdata->fn(smdata->data); |
95 | err = smdata->fn(smdata->data); | ||
96 | if (err) | ||
97 | smdata->fnret = err; | ||
98 | break; | 448 | break; |
99 | default: | 449 | default: |
100 | break; | 450 | break; |
101 | } | 451 | } |
102 | ack_state(); | 452 | ack_state(smdata); |
103 | } | 453 | } |
104 | } while (curstate != STOPMACHINE_EXIT); | 454 | } while (curstate != STOPMACHINE_EXIT); |
105 | 455 | ||
106 | local_irq_enable(); | 456 | local_irq_enable(); |
457 | return err; | ||
107 | } | 458 | } |
108 | 459 | ||
109 | /* Callback for CPUs which aren't supposed to do anything. */ | ||
110 | static int chill(void *unused) | ||
111 | { | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | int stop_machine_create(void) | ||
116 | { | ||
117 | mutex_lock(&setup_lock); | ||
118 | if (refcount) | ||
119 | goto done; | ||
120 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
121 | if (!stop_machine_wq) | ||
122 | goto err_out; | ||
123 | stop_machine_work = alloc_percpu(struct work_struct); | ||
124 | if (!stop_machine_work) | ||
125 | goto err_out; | ||
126 | done: | ||
127 | refcount++; | ||
128 | mutex_unlock(&setup_lock); | ||
129 | return 0; | ||
130 | |||
131 | err_out: | ||
132 | if (stop_machine_wq) | ||
133 | destroy_workqueue(stop_machine_wq); | ||
134 | mutex_unlock(&setup_lock); | ||
135 | return -ENOMEM; | ||
136 | } | ||
137 | EXPORT_SYMBOL_GPL(stop_machine_create); | ||
138 | |||
139 | void stop_machine_destroy(void) | ||
140 | { | ||
141 | mutex_lock(&setup_lock); | ||
142 | refcount--; | ||
143 | if (refcount) | ||
144 | goto done; | ||
145 | destroy_workqueue(stop_machine_wq); | ||
146 | free_percpu(stop_machine_work); | ||
147 | done: | ||
148 | mutex_unlock(&setup_lock); | ||
149 | } | ||
150 | EXPORT_SYMBOL_GPL(stop_machine_destroy); | ||
151 | |||
152 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 460 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
153 | { | 461 | { |
154 | struct work_struct *sm_work; | 462 | struct stop_machine_data smdata = { .fn = fn, .data = data, |
155 | int i, ret; | 463 | .num_threads = num_online_cpus(), |
156 | 464 | .active_cpus = cpus }; | |
157 | /* Set up initial state. */ | 465 | |
158 | mutex_lock(&lock); | 466 | /* Set the initial state and stop all online cpus. */ |
159 | num_threads = num_online_cpus(); | 467 | set_state(&smdata, STOPMACHINE_PREPARE); |
160 | active_cpus = cpus; | 468 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
161 | active.fn = fn; | ||
162 | active.data = data; | ||
163 | active.fnret = 0; | ||
164 | idle.fn = chill; | ||
165 | idle.data = NULL; | ||
166 | |||
167 | set_state(STOPMACHINE_PREPARE); | ||
168 | |||
169 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one | ||
170 | * doesn't hit this CPU until we're ready. */ | ||
171 | get_cpu(); | ||
172 | for_each_online_cpu(i) { | ||
173 | sm_work = per_cpu_ptr(stop_machine_work, i); | ||
174 | INIT_WORK(sm_work, stop_cpu); | ||
175 | queue_work_on(i, stop_machine_wq, sm_work); | ||
176 | } | ||
177 | /* This will release the thread on our CPU. */ | ||
178 | put_cpu(); | ||
179 | flush_workqueue(stop_machine_wq); | ||
180 | ret = active.fnret; | ||
181 | mutex_unlock(&lock); | ||
182 | return ret; | ||
183 | } | 469 | } |
184 | 470 | ||
185 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 471 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
186 | { | 472 | { |
187 | int ret; | 473 | int ret; |
188 | 474 | ||
189 | ret = stop_machine_create(); | ||
190 | if (ret) | ||
191 | return ret; | ||
192 | /* No CPUs can come up or down during this. */ | 475 | /* No CPUs can come up or down during this. */ |
193 | get_online_cpus(); | 476 | get_online_cpus(); |
194 | ret = __stop_machine(fn, data, cpus); | 477 | ret = __stop_machine(fn, data, cpus); |
195 | put_online_cpus(); | 478 | put_online_cpus(); |
196 | stop_machine_destroy(); | ||
197 | return ret; | 479 | return ret; |
198 | } | 480 | } |
199 | EXPORT_SYMBOL_GPL(stop_machine); | 481 | EXPORT_SYMBOL_GPL(stop_machine); |
482 | |||
483 | #endif /* CONFIG_STOP_MACHINE */ | ||