diff options
| author | Ingo Molnar <mingo@elte.hu> | 2010-05-08 12:11:19 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-05-08 12:11:19 -0400 |
| commit | e7858f52a5cb868289a72264534a1f05f3340c6c (patch) | |
| tree | aa7308603cf30d8aec6e45ecaddc6c8ed29d2edb | |
| parent | 27a9da6538ee18046d7bff8e36a9f783542c54c3 (diff) | |
| parent | bbf1bb3eee86f2eef2baa14e600be454d09109ee (diff) | |
Merge branch 'cpu_stop' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into sched/core
| -rw-r--r-- | Documentation/RCU/torture.txt | 10 | ||||
| -rw-r--r-- | arch/s390/kernel/time.c | 1 | ||||
| -rw-r--r-- | drivers/xen/manage.c | 14 | ||||
| -rw-r--r-- | include/linux/rcutiny.h | 2 | ||||
| -rw-r--r-- | include/linux/rcutree.h | 1 | ||||
| -rw-r--r-- | include/linux/stop_machine.h | 122 | ||||
| -rw-r--r-- | kernel/Makefile | 2 | ||||
| -rw-r--r-- | kernel/cpu.c | 8 | ||||
| -rw-r--r-- | kernel/module.c | 14 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 285 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 48 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 534 |
13 files changed, 604 insertions, 439 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 0e50bc2aa1e2..5d9016795fd8 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
| @@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following: | |||
| 182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 | 182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 |
| 183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 | 183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 |
| 184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 | 184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 |
| 185 | state: -1 / 0:0 3:0 4:0 | ||
| 186 | |||
| 187 | As before, the first four lines are similar to those for RCU. | ||
| 188 | The last line shows the task-migration state. The first number is | ||
| 189 | -1 if synchronize_sched_expedited() is idle, -2 if in the process of | ||
| 190 | posting wakeups to the migration kthreads, and N when waiting on CPU N. | ||
| 191 | Each of the colon-separated fields following the "/" is a CPU:state pair. | ||
| 192 | Valid states are "0" for idle, "1" for waiting for quiescent state, | ||
| 193 | "2" for passed through quiescent state, and "3" when a race with a | ||
| 194 | CPU-hotplug event forces use of the synchronize_sched() primitive. | ||
| 195 | 185 | ||
| 196 | 186 | ||
| 197 | USAGE | 187 | USAGE |
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index d906bf19c14a..a2163c95eb98 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c | |||
| @@ -391,7 +391,6 @@ static void __init time_init_wq(void) | |||
| 391 | if (time_sync_wq) | 391 | if (time_sync_wq) |
| 392 | return; | 392 | return; |
| 393 | time_sync_wq = create_singlethread_workqueue("timesync"); | 393 | time_sync_wq = create_singlethread_workqueue("timesync"); |
| 394 | stop_machine_create(); | ||
| 395 | } | 394 | } |
| 396 | 395 | ||
| 397 | /* | 396 | /* |
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c index 2ac4440e7b08..8943b8ccee1a 100644 --- a/drivers/xen/manage.c +++ b/drivers/xen/manage.c | |||
| @@ -80,12 +80,6 @@ static void do_suspend(void) | |||
| 80 | 80 | ||
| 81 | shutting_down = SHUTDOWN_SUSPEND; | 81 | shutting_down = SHUTDOWN_SUSPEND; |
| 82 | 82 | ||
| 83 | err = stop_machine_create(); | ||
| 84 | if (err) { | ||
| 85 | printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err); | ||
| 86 | goto out; | ||
| 87 | } | ||
| 88 | |||
| 89 | #ifdef CONFIG_PREEMPT | 83 | #ifdef CONFIG_PREEMPT |
| 90 | /* If the kernel is preemptible, we need to freeze all the processes | 84 | /* If the kernel is preemptible, we need to freeze all the processes |
| 91 | to prevent them from being in the middle of a pagetable update | 85 | to prevent them from being in the middle of a pagetable update |
| @@ -93,7 +87,7 @@ static void do_suspend(void) | |||
| 93 | err = freeze_processes(); | 87 | err = freeze_processes(); |
| 94 | if (err) { | 88 | if (err) { |
| 95 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); | 89 | printk(KERN_ERR "xen suspend: freeze failed %d\n", err); |
| 96 | goto out_destroy_sm; | 90 | goto out; |
| 97 | } | 91 | } |
| 98 | #endif | 92 | #endif |
| 99 | 93 | ||
| @@ -136,12 +130,8 @@ out_resume: | |||
| 136 | out_thaw: | 130 | out_thaw: |
| 137 | #ifdef CONFIG_PREEMPT | 131 | #ifdef CONFIG_PREEMPT |
| 138 | thaw_processes(); | 132 | thaw_processes(); |
| 139 | |||
| 140 | out_destroy_sm: | ||
| 141 | #endif | ||
| 142 | stop_machine_destroy(); | ||
| 143 | |||
| 144 | out: | 133 | out: |
| 134 | #endif | ||
| 145 | shutting_down = SHUTDOWN_INVALID; | 135 | shutting_down = SHUTDOWN_INVALID; |
| 146 | } | 136 | } |
| 147 | #endif /* CONFIG_PM_SLEEP */ | 137 | #endif /* CONFIG_PM_SLEEP */ |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a5195875480a..0006b2df00e1 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
| @@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void) | |||
| 60 | return 0; | 60 | return 0; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | extern int rcu_expedited_torture_stats(char *page); | ||
| 64 | |||
| 65 | static inline void rcu_force_quiescent_state(void) | 63 | static inline void rcu_force_quiescent_state(void) |
| 66 | { | 64 | { |
| 67 | } | 65 | } |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 42cc3a04779e..24e467e526b8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
| @@ -35,7 +35,6 @@ struct notifier_block; | |||
| 35 | extern void rcu_sched_qs(int cpu); | 35 | extern void rcu_sched_qs(int cpu); |
| 36 | extern void rcu_bh_qs(int cpu); | 36 | extern void rcu_bh_qs(int cpu); |
| 37 | extern int rcu_needs_cpu(int cpu); | 37 | extern int rcu_needs_cpu(int cpu); |
| 38 | extern int rcu_expedited_torture_stats(char *page); | ||
| 39 | 38 | ||
| 40 | #ifdef CONFIG_TREE_PREEMPT_RCU | 39 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 41 | 40 | ||
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index baba3a23a814..6b524a0d02e4 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h | |||
| @@ -1,13 +1,101 @@ | |||
| 1 | #ifndef _LINUX_STOP_MACHINE | 1 | #ifndef _LINUX_STOP_MACHINE |
| 2 | #define _LINUX_STOP_MACHINE | 2 | #define _LINUX_STOP_MACHINE |
| 3 | /* "Bogolock": stop the entire machine, disable interrupts. This is a | 3 | |
| 4 | very heavy lock, which is equivalent to grabbing every spinlock | ||
| 5 | (and more). So the "read" side to such a lock is anything which | ||
| 6 | disables preeempt. */ | ||
| 7 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
| 8 | #include <linux/cpumask.h> | 5 | #include <linux/cpumask.h> |
| 6 | #include <linux/list.h> | ||
| 9 | #include <asm/system.h> | 7 | #include <asm/system.h> |
| 10 | 8 | ||
| 9 | /* | ||
| 10 | * stop_cpu[s]() is simplistic per-cpu maximum priority cpu | ||
| 11 | * monopolization mechanism. The caller can specify a non-sleeping | ||
| 12 | * function to be executed on a single or multiple cpus preempting all | ||
| 13 | * other processes and monopolizing those cpus until it finishes. | ||
| 14 | * | ||
| 15 | * Resources for this mechanism are preallocated when a cpu is brought | ||
| 16 | * up and requests are guaranteed to be served as long as the target | ||
| 17 | * cpus are online. | ||
| 18 | */ | ||
| 19 | typedef int (*cpu_stop_fn_t)(void *arg); | ||
| 20 | |||
| 21 | #ifdef CONFIG_SMP | ||
| 22 | |||
| 23 | struct cpu_stop_work { | ||
| 24 | struct list_head list; /* cpu_stopper->works */ | ||
| 25 | cpu_stop_fn_t fn; | ||
| 26 | void *arg; | ||
| 27 | struct cpu_stop_done *done; | ||
| 28 | }; | ||
| 29 | |||
| 30 | int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); | ||
| 31 | void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, | ||
| 32 | struct cpu_stop_work *work_buf); | ||
| 33 | int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); | ||
| 34 | int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); | ||
| 35 | |||
| 36 | #else /* CONFIG_SMP */ | ||
| 37 | |||
| 38 | #include <linux/workqueue.h> | ||
| 39 | |||
| 40 | struct cpu_stop_work { | ||
| 41 | struct work_struct work; | ||
| 42 | cpu_stop_fn_t fn; | ||
| 43 | void *arg; | ||
| 44 | }; | ||
| 45 | |||
| 46 | static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) | ||
| 47 | { | ||
| 48 | int ret = -ENOENT; | ||
| 49 | preempt_disable(); | ||
| 50 | if (cpu == smp_processor_id()) | ||
| 51 | ret = fn(arg); | ||
| 52 | preempt_enable(); | ||
| 53 | return ret; | ||
| 54 | } | ||
| 55 | |||
| 56 | static void stop_one_cpu_nowait_workfn(struct work_struct *work) | ||
| 57 | { | ||
| 58 | struct cpu_stop_work *stwork = | ||
| 59 | container_of(work, struct cpu_stop_work, work); | ||
| 60 | preempt_disable(); | ||
| 61 | stwork->fn(stwork->arg); | ||
| 62 | preempt_enable(); | ||
| 63 | } | ||
| 64 | |||
| 65 | static inline void stop_one_cpu_nowait(unsigned int cpu, | ||
| 66 | cpu_stop_fn_t fn, void *arg, | ||
| 67 | struct cpu_stop_work *work_buf) | ||
| 68 | { | ||
| 69 | if (cpu == smp_processor_id()) { | ||
| 70 | INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn); | ||
| 71 | work_buf->fn = fn; | ||
| 72 | work_buf->arg = arg; | ||
| 73 | schedule_work(&work_buf->work); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | static inline int stop_cpus(const struct cpumask *cpumask, | ||
| 78 | cpu_stop_fn_t fn, void *arg) | ||
| 79 | { | ||
| 80 | if (cpumask_test_cpu(raw_smp_processor_id(), cpumask)) | ||
| 81 | return stop_one_cpu(raw_smp_processor_id(), fn, arg); | ||
| 82 | return -ENOENT; | ||
| 83 | } | ||
| 84 | |||
| 85 | static inline int try_stop_cpus(const struct cpumask *cpumask, | ||
| 86 | cpu_stop_fn_t fn, void *arg) | ||
| 87 | { | ||
| 88 | return stop_cpus(cpumask, fn, arg); | ||
| 89 | } | ||
| 90 | |||
| 91 | #endif /* CONFIG_SMP */ | ||
| 92 | |||
| 93 | /* | ||
| 94 | * stop_machine "Bogolock": stop the entire machine, disable | ||
| 95 | * interrupts. This is a very heavy lock, which is equivalent to | ||
| 96 | * grabbing every spinlock (and more). So the "read" side to such a | ||
| 97 | * lock is anything which disables preeempt. | ||
| 98 | */ | ||
| 11 | #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) | 99 | #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP) |
| 12 | 100 | ||
| 13 | /** | 101 | /** |
| @@ -36,24 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); | |||
| 36 | */ | 124 | */ |
| 37 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); | 125 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus); |
| 38 | 126 | ||
| 39 | /** | 127 | #else /* CONFIG_STOP_MACHINE && CONFIG_SMP */ |
| 40 | * stop_machine_create: create all stop_machine threads | ||
| 41 | * | ||
| 42 | * Description: This causes all stop_machine threads to be created before | ||
| 43 | * stop_machine actually gets called. This can be used by subsystems that | ||
| 44 | * need a non failing stop_machine infrastructure. | ||
| 45 | */ | ||
| 46 | int stop_machine_create(void); | ||
| 47 | |||
| 48 | /** | ||
| 49 | * stop_machine_destroy: destroy all stop_machine threads | ||
| 50 | * | ||
| 51 | * Description: This causes all stop_machine threads which were created with | ||
| 52 | * stop_machine_create to be destroyed again. | ||
| 53 | */ | ||
| 54 | void stop_machine_destroy(void); | ||
| 55 | |||
| 56 | #else | ||
| 57 | 128 | ||
| 58 | static inline int stop_machine(int (*fn)(void *), void *data, | 129 | static inline int stop_machine(int (*fn)(void *), void *data, |
| 59 | const struct cpumask *cpus) | 130 | const struct cpumask *cpus) |
| @@ -65,8 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data, | |||
| 65 | return ret; | 136 | return ret; |
| 66 | } | 137 | } |
| 67 | 138 | ||
| 68 | static inline int stop_machine_create(void) { return 0; } | 139 | #endif /* CONFIG_STOP_MACHINE && CONFIG_SMP */ |
| 69 | static inline void stop_machine_destroy(void) { } | 140 | #endif /* _LINUX_STOP_MACHINE */ |
| 70 | |||
| 71 | #endif /* CONFIG_SMP */ | ||
| 72 | #endif /* _LINUX_STOP_MACHINE */ | ||
diff --git a/kernel/Makefile b/kernel/Makefile index a987aa1676b5..149e18ef1ab1 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
| @@ -68,7 +68,7 @@ obj-$(CONFIG_USER_NS) += user_namespace.o | |||
| 68 | obj-$(CONFIG_PID_NS) += pid_namespace.o | 68 | obj-$(CONFIG_PID_NS) += pid_namespace.o |
| 69 | obj-$(CONFIG_IKCONFIG) += configs.o | 69 | obj-$(CONFIG_IKCONFIG) += configs.o |
| 70 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | 70 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o |
| 71 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 71 | obj-$(CONFIG_SMP) += stop_machine.o |
| 72 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 72 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
| 73 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o | 73 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o |
| 74 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o | 74 | obj-$(CONFIG_AUDITSYSCALL) += auditsc.o |
diff --git a/kernel/cpu.c b/kernel/cpu.c index 914aedcde849..545777574779 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu) | |||
| 266 | { | 266 | { |
| 267 | int err; | 267 | int err; |
| 268 | 268 | ||
| 269 | err = stop_machine_create(); | ||
| 270 | if (err) | ||
| 271 | return err; | ||
| 272 | cpu_maps_update_begin(); | 269 | cpu_maps_update_begin(); |
| 273 | 270 | ||
| 274 | if (cpu_hotplug_disabled) { | 271 | if (cpu_hotplug_disabled) { |
| @@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu) | |||
| 280 | 277 | ||
| 281 | out: | 278 | out: |
| 282 | cpu_maps_update_done(); | 279 | cpu_maps_update_done(); |
| 283 | stop_machine_destroy(); | ||
| 284 | return err; | 280 | return err; |
| 285 | } | 281 | } |
| 286 | EXPORT_SYMBOL(cpu_down); | 282 | EXPORT_SYMBOL(cpu_down); |
| @@ -361,9 +357,6 @@ int disable_nonboot_cpus(void) | |||
| 361 | { | 357 | { |
| 362 | int cpu, first_cpu, error; | 358 | int cpu, first_cpu, error; |
| 363 | 359 | ||
| 364 | error = stop_machine_create(); | ||
| 365 | if (error) | ||
| 366 | return error; | ||
| 367 | cpu_maps_update_begin(); | 360 | cpu_maps_update_begin(); |
| 368 | first_cpu = cpumask_first(cpu_online_mask); | 361 | first_cpu = cpumask_first(cpu_online_mask); |
| 369 | /* | 362 | /* |
| @@ -394,7 +387,6 @@ int disable_nonboot_cpus(void) | |||
| 394 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); | 387 | printk(KERN_ERR "Non-boot CPUs are not disabled\n"); |
| 395 | } | 388 | } |
| 396 | cpu_maps_update_done(); | 389 | cpu_maps_update_done(); |
| 397 | stop_machine_destroy(); | ||
| 398 | return error; | 390 | return error; |
| 399 | } | 391 | } |
| 400 | 392 | ||
diff --git a/kernel/module.c b/kernel/module.c index 1016b75b026a..0838246d8c94 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
| @@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
| 723 | return -EFAULT; | 723 | return -EFAULT; |
| 724 | name[MODULE_NAME_LEN-1] = '\0'; | 724 | name[MODULE_NAME_LEN-1] = '\0'; |
| 725 | 725 | ||
| 726 | /* Create stop_machine threads since free_module relies on | 726 | if (mutex_lock_interruptible(&module_mutex) != 0) |
| 727 | * a non-failing stop_machine call. */ | 727 | return -EINTR; |
| 728 | ret = stop_machine_create(); | ||
| 729 | if (ret) | ||
| 730 | return ret; | ||
| 731 | |||
| 732 | if (mutex_lock_interruptible(&module_mutex) != 0) { | ||
| 733 | ret = -EINTR; | ||
| 734 | goto out_stop; | ||
| 735 | } | ||
| 736 | 728 | ||
| 737 | mod = find_module(name); | 729 | mod = find_module(name); |
| 738 | if (!mod) { | 730 | if (!mod) { |
| @@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user, | |||
| 792 | 784 | ||
| 793 | out: | 785 | out: |
| 794 | mutex_unlock(&module_mutex); | 786 | mutex_unlock(&module_mutex); |
| 795 | out_stop: | ||
| 796 | stop_machine_destroy(); | ||
| 797 | return ret; | 787 | return ret; |
| 798 | } | 788 | } |
| 799 | 789 | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 58df55bf83ed..2b676f3a0f26 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
| 669 | .sync = synchronize_sched_expedited, | 669 | .sync = synchronize_sched_expedited, |
| 670 | .cb_barrier = NULL, | 670 | .cb_barrier = NULL, |
| 671 | .fqs = rcu_sched_force_quiescent_state, | 671 | .fqs = rcu_sched_force_quiescent_state, |
| 672 | .stats = rcu_expedited_torture_stats, | 672 | .stats = NULL, |
| 673 | .irq_capable = 1, | 673 | .irq_capable = 1, |
| 674 | .name = "sched_expedited" | 674 | .name = "sched_expedited" |
| 675 | }; | 675 | }; |
diff --git a/kernel/sched.c b/kernel/sched.c index 11ac0eb0bce7..39aa9c7e22c0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -55,9 +55,9 @@ | |||
| 55 | #include <linux/cpu.h> | 55 | #include <linux/cpu.h> |
| 56 | #include <linux/cpuset.h> | 56 | #include <linux/cpuset.h> |
| 57 | #include <linux/percpu.h> | 57 | #include <linux/percpu.h> |
| 58 | #include <linux/kthread.h> | ||
| 59 | #include <linux/proc_fs.h> | 58 | #include <linux/proc_fs.h> |
| 60 | #include <linux/seq_file.h> | 59 | #include <linux/seq_file.h> |
| 60 | #include <linux/stop_machine.h> | ||
| 61 | #include <linux/sysctl.h> | 61 | #include <linux/sysctl.h> |
| 62 | #include <linux/syscalls.h> | 62 | #include <linux/syscalls.h> |
| 63 | #include <linux/times.h> | 63 | #include <linux/times.h> |
| @@ -539,15 +539,13 @@ struct rq { | |||
| 539 | int post_schedule; | 539 | int post_schedule; |
| 540 | int active_balance; | 540 | int active_balance; |
| 541 | int push_cpu; | 541 | int push_cpu; |
| 542 | struct cpu_stop_work active_balance_work; | ||
| 542 | /* cpu of this runqueue: */ | 543 | /* cpu of this runqueue: */ |
| 543 | int cpu; | 544 | int cpu; |
| 544 | int online; | 545 | int online; |
| 545 | 546 | ||
| 546 | unsigned long avg_load_per_task; | 547 | unsigned long avg_load_per_task; |
| 547 | 548 | ||
| 548 | struct task_struct *migration_thread; | ||
| 549 | struct list_head migration_queue; | ||
| 550 | |||
| 551 | u64 rt_avg; | 549 | u64 rt_avg; |
| 552 | u64 age_stamp; | 550 | u64 age_stamp; |
| 553 | u64 idle_stamp; | 551 | u64 idle_stamp; |
| @@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2037 | __set_task_cpu(p, new_cpu); | 2035 | __set_task_cpu(p, new_cpu); |
| 2038 | } | 2036 | } |
| 2039 | 2037 | ||
| 2040 | struct migration_req { | 2038 | struct migration_arg { |
| 2041 | struct list_head list; | ||
| 2042 | |||
| 2043 | struct task_struct *task; | 2039 | struct task_struct *task; |
| 2044 | int dest_cpu; | 2040 | int dest_cpu; |
| 2045 | |||
| 2046 | struct completion done; | ||
| 2047 | }; | 2041 | }; |
| 2048 | 2042 | ||
| 2043 | static int migration_cpu_stop(void *data); | ||
| 2044 | |||
| 2049 | /* | 2045 | /* |
| 2050 | * The task's runqueue lock must be held. | 2046 | * The task's runqueue lock must be held. |
| 2051 | * Returns true if you have to wait for migration thread. | 2047 | * Returns true if you have to wait for migration thread. |
| 2052 | */ | 2048 | */ |
| 2053 | static int | 2049 | static bool migrate_task(struct task_struct *p, int dest_cpu) |
| 2054 | migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | ||
| 2055 | { | 2050 | { |
| 2056 | struct rq *rq = task_rq(p); | 2051 | struct rq *rq = task_rq(p); |
| 2057 | 2052 | ||
| @@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
| 2059 | * If the task is not on a runqueue (and not running), then | 2054 | * If the task is not on a runqueue (and not running), then |
| 2060 | * the next wake-up will properly place the task. | 2055 | * the next wake-up will properly place the task. |
| 2061 | */ | 2056 | */ |
| 2062 | if (!p->se.on_rq && !task_running(rq, p)) | 2057 | return p->se.on_rq || task_running(rq, p); |
| 2063 | return 0; | ||
| 2064 | |||
| 2065 | init_completion(&req->done); | ||
| 2066 | req->task = p; | ||
| 2067 | req->dest_cpu = dest_cpu; | ||
| 2068 | list_add(&req->list, &rq->migration_queue); | ||
| 2069 | |||
| 2070 | return 1; | ||
| 2071 | } | 2058 | } |
| 2072 | 2059 | ||
| 2073 | /* | 2060 | /* |
| @@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq) | |||
| 3110 | void sched_exec(void) | 3097 | void sched_exec(void) |
| 3111 | { | 3098 | { |
| 3112 | struct task_struct *p = current; | 3099 | struct task_struct *p = current; |
| 3113 | struct migration_req req; | ||
| 3114 | unsigned long flags; | 3100 | unsigned long flags; |
| 3115 | struct rq *rq; | 3101 | struct rq *rq; |
| 3116 | int dest_cpu; | 3102 | int dest_cpu; |
| @@ -3124,17 +3110,11 @@ void sched_exec(void) | |||
| 3124 | * select_task_rq() can race against ->cpus_allowed | 3110 | * select_task_rq() can race against ->cpus_allowed |
| 3125 | */ | 3111 | */ |
| 3126 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3112 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
| 3127 | likely(cpu_active(dest_cpu)) && | 3113 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { |
| 3128 | migrate_task(p, dest_cpu, &req)) { | 3114 | struct migration_arg arg = { p, dest_cpu }; |
| 3129 | /* Need to wait for migration thread (might exit: take ref). */ | ||
| 3130 | struct task_struct *mt = rq->migration_thread; | ||
| 3131 | 3115 | ||
| 3132 | get_task_struct(mt); | ||
| 3133 | task_rq_unlock(rq, &flags); | 3116 | task_rq_unlock(rq, &flags); |
| 3134 | wake_up_process(mt); | 3117 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
| 3135 | put_task_struct(mt); | ||
| 3136 | wait_for_completion(&req.done); | ||
| 3137 | |||
| 3138 | return; | 3118 | return; |
| 3139 | } | 3119 | } |
| 3140 | unlock: | 3120 | unlock: |
| @@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void) | |||
| 5290 | /* | 5270 | /* |
| 5291 | * This is how migration works: | 5271 | * This is how migration works: |
| 5292 | * | 5272 | * |
| 5293 | * 1) we queue a struct migration_req structure in the source CPU's | 5273 | * 1) we invoke migration_cpu_stop() on the target CPU using |
| 5294 | * runqueue and wake up that CPU's migration thread. | 5274 | * stop_one_cpu(). |
| 5295 | * 2) we down() the locked semaphore => thread blocks. | 5275 | * 2) stopper starts to run (implicitly forcing the migrated thread |
| 5296 | * 3) migration thread wakes up (implicitly it forces the migrated | 5276 | * off the CPU) |
| 5297 | * thread off the CPU) | 5277 | * 3) it checks whether the migrated task is still in the wrong runqueue. |
| 5298 | * 4) it gets the migration request and checks whether the migrated | 5278 | * 4) if it's in the wrong runqueue then the migration thread removes |
| 5299 | * task is still in the wrong runqueue. | ||
| 5300 | * 5) if it's in the wrong runqueue then the migration thread removes | ||
| 5301 | * it and puts it into the right queue. | 5279 | * it and puts it into the right queue. |
| 5302 | * 6) migration thread up()s the semaphore. | 5280 | * 5) stopper completes and stop_one_cpu() returns and the migration |
| 5303 | * 7) we wake up and the migration is done. | 5281 | * is done. |
| 5304 | */ | 5282 | */ |
| 5305 | 5283 | ||
| 5306 | /* | 5284 | /* |
| @@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void) | |||
| 5314 | */ | 5292 | */ |
| 5315 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | 5293 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
| 5316 | { | 5294 | { |
| 5317 | struct migration_req req; | ||
| 5318 | unsigned long flags; | 5295 | unsigned long flags; |
| 5319 | struct rq *rq; | 5296 | struct rq *rq; |
| 5297 | unsigned int dest_cpu; | ||
| 5320 | int ret = 0; | 5298 | int ret = 0; |
| 5321 | 5299 | ||
| 5322 | /* | 5300 | /* |
| @@ -5354,15 +5332,12 @@ again: | |||
| 5354 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 5332 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
| 5355 | goto out; | 5333 | goto out; |
| 5356 | 5334 | ||
| 5357 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { | 5335 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
| 5336 | if (migrate_task(p, dest_cpu)) { | ||
| 5337 | struct migration_arg arg = { p, dest_cpu }; | ||
| 5358 | /* Need help from migration thread: drop lock and wait. */ | 5338 | /* Need help from migration thread: drop lock and wait. */ |
| 5359 | struct task_struct *mt = rq->migration_thread; | ||
| 5360 | |||
| 5361 | get_task_struct(mt); | ||
| 5362 | task_rq_unlock(rq, &flags); | 5339 | task_rq_unlock(rq, &flags); |
| 5363 | wake_up_process(mt); | 5340 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
| 5364 | put_task_struct(mt); | ||
| 5365 | wait_for_completion(&req.done); | ||
| 5366 | tlb_migrate_finish(p->mm); | 5341 | tlb_migrate_finish(p->mm); |
| 5367 | return 0; | 5342 | return 0; |
| 5368 | } | 5343 | } |
| @@ -5420,70 +5395,22 @@ fail: | |||
| 5420 | return ret; | 5395 | return ret; |
| 5421 | } | 5396 | } |
| 5422 | 5397 | ||
| 5423 | #define RCU_MIGRATION_IDLE 0 | ||
| 5424 | #define RCU_MIGRATION_NEED_QS 1 | ||
| 5425 | #define RCU_MIGRATION_GOT_QS 2 | ||
| 5426 | #define RCU_MIGRATION_MUST_SYNC 3 | ||
| 5427 | |||
| 5428 | /* | 5398 | /* |
| 5429 | * migration_thread - this is a highprio system thread that performs | 5399 | * migration_cpu_stop - this will be executed by a highprio stopper thread |
| 5430 | * thread migration by bumping thread off CPU then 'pushing' onto | 5400 | * and performs thread migration by bumping thread off CPU then |
| 5431 | * another runqueue. | 5401 | * 'pushing' onto another runqueue. |
| 5432 | */ | 5402 | */ |
| 5433 | static int migration_thread(void *data) | 5403 | static int migration_cpu_stop(void *data) |
| 5434 | { | 5404 | { |
| 5435 | int badcpu; | 5405 | struct migration_arg *arg = data; |
| 5436 | int cpu = (long)data; | ||
| 5437 | struct rq *rq; | ||
| 5438 | |||
| 5439 | rq = cpu_rq(cpu); | ||
| 5440 | BUG_ON(rq->migration_thread != current); | ||
| 5441 | |||
| 5442 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 5443 | while (!kthread_should_stop()) { | ||
| 5444 | struct migration_req *req; | ||
| 5445 | struct list_head *head; | ||
| 5446 | |||
| 5447 | raw_spin_lock_irq(&rq->lock); | ||
| 5448 | |||
| 5449 | if (cpu_is_offline(cpu)) { | ||
| 5450 | raw_spin_unlock_irq(&rq->lock); | ||
| 5451 | break; | ||
| 5452 | } | ||
| 5453 | |||
| 5454 | if (rq->active_balance) { | ||
| 5455 | active_load_balance(rq, cpu); | ||
| 5456 | rq->active_balance = 0; | ||
| 5457 | } | ||
| 5458 | |||
| 5459 | head = &rq->migration_queue; | ||
| 5460 | |||
| 5461 | if (list_empty(head)) { | ||
| 5462 | raw_spin_unlock_irq(&rq->lock); | ||
| 5463 | schedule(); | ||
| 5464 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 5465 | continue; | ||
| 5466 | } | ||
| 5467 | req = list_entry(head->next, struct migration_req, list); | ||
| 5468 | list_del_init(head->next); | ||
| 5469 | |||
| 5470 | if (req->task != NULL) { | ||
| 5471 | raw_spin_unlock(&rq->lock); | ||
| 5472 | __migrate_task(req->task, cpu, req->dest_cpu); | ||
| 5473 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | ||
| 5474 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | ||
| 5475 | raw_spin_unlock(&rq->lock); | ||
| 5476 | } else { | ||
| 5477 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | ||
| 5478 | raw_spin_unlock(&rq->lock); | ||
| 5479 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | ||
| 5480 | } | ||
| 5481 | local_irq_enable(); | ||
| 5482 | |||
| 5483 | complete(&req->done); | ||
| 5484 | } | ||
| 5485 | __set_current_state(TASK_RUNNING); | ||
| 5486 | 5406 | ||
| 5407 | /* | ||
| 5408 | * The original target cpu might have gone down and we might | ||
| 5409 | * be on another cpu but it doesn't matter. | ||
| 5410 | */ | ||
| 5411 | local_irq_disable(); | ||
| 5412 | __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); | ||
| 5413 | local_irq_enable(); | ||
| 5487 | return 0; | 5414 | return 0; |
| 5488 | } | 5415 | } |
| 5489 | 5416 | ||
| @@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq) | |||
| 5850 | static int __cpuinit | 5777 | static int __cpuinit |
| 5851 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | 5778 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 5852 | { | 5779 | { |
| 5853 | struct task_struct *p; | ||
| 5854 | int cpu = (long)hcpu; | 5780 | int cpu = (long)hcpu; |
| 5855 | unsigned long flags; | 5781 | unsigned long flags; |
| 5856 | struct rq *rq; | 5782 | struct rq *rq = cpu_rq(cpu); |
| 5857 | 5783 | ||
| 5858 | switch (action) { | 5784 | switch (action) { |
| 5859 | 5785 | ||
| 5860 | case CPU_UP_PREPARE: | 5786 | case CPU_UP_PREPARE: |
| 5861 | case CPU_UP_PREPARE_FROZEN: | 5787 | case CPU_UP_PREPARE_FROZEN: |
| 5862 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | ||
| 5863 | if (IS_ERR(p)) | ||
| 5864 | return NOTIFY_BAD; | ||
| 5865 | kthread_bind(p, cpu); | ||
| 5866 | /* Must be high prio: stop_machine expects to yield to it. */ | ||
| 5867 | rq = task_rq_lock(p, &flags); | ||
| 5868 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
| 5869 | task_rq_unlock(rq, &flags); | ||
| 5870 | get_task_struct(p); | ||
| 5871 | cpu_rq(cpu)->migration_thread = p; | ||
| 5872 | rq->calc_load_update = calc_load_update; | 5788 | rq->calc_load_update = calc_load_update; |
| 5873 | break; | 5789 | break; |
| 5874 | 5790 | ||
| 5875 | case CPU_ONLINE: | 5791 | case CPU_ONLINE: |
| 5876 | case CPU_ONLINE_FROZEN: | 5792 | case CPU_ONLINE_FROZEN: |
| 5877 | /* Strictly unnecessary, as first user will wake it. */ | ||
| 5878 | wake_up_process(cpu_rq(cpu)->migration_thread); | ||
| 5879 | |||
| 5880 | /* Update our root-domain */ | 5793 | /* Update our root-domain */ |
| 5881 | rq = cpu_rq(cpu); | ||
| 5882 | raw_spin_lock_irqsave(&rq->lock, flags); | 5794 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 5883 | if (rq->rd) { | 5795 | if (rq->rd) { |
| 5884 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5796 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
| @@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5889 | break; | 5801 | break; |
| 5890 | 5802 | ||
| 5891 | #ifdef CONFIG_HOTPLUG_CPU | 5803 | #ifdef CONFIG_HOTPLUG_CPU |
| 5892 | case CPU_UP_CANCELED: | ||
| 5893 | case CPU_UP_CANCELED_FROZEN: | ||
| 5894 | if (!cpu_rq(cpu)->migration_thread) | ||
| 5895 | break; | ||
| 5896 | /* Unbind it from offline cpu so it can run. Fall thru. */ | ||
| 5897 | kthread_bind(cpu_rq(cpu)->migration_thread, | ||
| 5898 | cpumask_any(cpu_online_mask)); | ||
| 5899 | kthread_stop(cpu_rq(cpu)->migration_thread); | ||
| 5900 | put_task_struct(cpu_rq(cpu)->migration_thread); | ||
| 5901 | cpu_rq(cpu)->migration_thread = NULL; | ||
| 5902 | break; | ||
| 5903 | |||
| 5904 | case CPU_DEAD: | 5804 | case CPU_DEAD: |
| 5905 | case CPU_DEAD_FROZEN: | 5805 | case CPU_DEAD_FROZEN: |
| 5906 | migrate_live_tasks(cpu); | 5806 | migrate_live_tasks(cpu); |
| 5907 | rq = cpu_rq(cpu); | ||
| 5908 | kthread_stop(rq->migration_thread); | ||
| 5909 | put_task_struct(rq->migration_thread); | ||
| 5910 | rq->migration_thread = NULL; | ||
| 5911 | /* Idle task back to normal (off runqueue, low prio) */ | 5807 | /* Idle task back to normal (off runqueue, low prio) */ |
| 5912 | raw_spin_lock_irq(&rq->lock); | 5808 | raw_spin_lock_irq(&rq->lock); |
| 5913 | deactivate_task(rq, rq->idle, 0); | 5809 | deactivate_task(rq, rq->idle, 0); |
| @@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5918 | migrate_nr_uninterruptible(rq); | 5814 | migrate_nr_uninterruptible(rq); |
| 5919 | BUG_ON(rq->nr_running != 0); | 5815 | BUG_ON(rq->nr_running != 0); |
| 5920 | calc_global_load_remove(rq); | 5816 | calc_global_load_remove(rq); |
| 5921 | /* | ||
| 5922 | * No need to migrate the tasks: it was best-effort if | ||
| 5923 | * they didn't take sched_hotcpu_mutex. Just wake up | ||
| 5924 | * the requestors. | ||
| 5925 | */ | ||
| 5926 | raw_spin_lock_irq(&rq->lock); | ||
| 5927 | while (!list_empty(&rq->migration_queue)) { | ||
| 5928 | struct migration_req *req; | ||
| 5929 | |||
| 5930 | req = list_entry(rq->migration_queue.next, | ||
| 5931 | struct migration_req, list); | ||
| 5932 | list_del_init(&req->list); | ||
| 5933 | raw_spin_unlock_irq(&rq->lock); | ||
| 5934 | complete(&req->done); | ||
| 5935 | raw_spin_lock_irq(&rq->lock); | ||
| 5936 | } | ||
| 5937 | raw_spin_unlock_irq(&rq->lock); | ||
| 5938 | break; | 5817 | break; |
| 5939 | 5818 | ||
| 5940 | case CPU_DYING: | 5819 | case CPU_DYING: |
| 5941 | case CPU_DYING_FROZEN: | 5820 | case CPU_DYING_FROZEN: |
| 5942 | /* Update our root-domain */ | 5821 | /* Update our root-domain */ |
| 5943 | rq = cpu_rq(cpu); | ||
| 5944 | raw_spin_lock_irqsave(&rq->lock, flags); | 5822 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 5945 | if (rq->rd) { | 5823 | if (rq->rd) { |
| 5946 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5824 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
| @@ -7757,10 +7635,8 @@ void __init sched_init(void) | |||
| 7757 | rq->push_cpu = 0; | 7635 | rq->push_cpu = 0; |
| 7758 | rq->cpu = i; | 7636 | rq->cpu = i; |
| 7759 | rq->online = 0; | 7637 | rq->online = 0; |
| 7760 | rq->migration_thread = NULL; | ||
| 7761 | rq->idle_stamp = 0; | 7638 | rq->idle_stamp = 0; |
| 7762 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 7639 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
| 7763 | INIT_LIST_HEAD(&rq->migration_queue); | ||
| 7764 | rq_attach_root(rq, &def_root_domain); | 7640 | rq_attach_root(rq, &def_root_domain); |
| 7765 | #endif | 7641 | #endif |
| 7766 | init_rq_hrtick(rq); | 7642 | init_rq_hrtick(rq); |
| @@ -9054,43 +8930,32 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 9054 | 8930 | ||
| 9055 | #ifndef CONFIG_SMP | 8931 | #ifndef CONFIG_SMP |
| 9056 | 8932 | ||
| 9057 | int rcu_expedited_torture_stats(char *page) | ||
| 9058 | { | ||
| 9059 | return 0; | ||
| 9060 | } | ||
| 9061 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
| 9062 | |||
| 9063 | void synchronize_sched_expedited(void) | 8933 | void synchronize_sched_expedited(void) |
| 9064 | { | 8934 | { |
| 8935 | barrier(); | ||
| 9065 | } | 8936 | } |
| 9066 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 8937 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
| 9067 | 8938 | ||
| 9068 | #else /* #ifndef CONFIG_SMP */ | 8939 | #else /* #ifndef CONFIG_SMP */ |
| 9069 | 8940 | ||
| 9070 | static DEFINE_PER_CPU(struct migration_req, rcu_migration_req); | 8941 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); |
| 9071 | static DEFINE_MUTEX(rcu_sched_expedited_mutex); | ||
| 9072 | |||
| 9073 | #define RCU_EXPEDITED_STATE_POST -2 | ||
| 9074 | #define RCU_EXPEDITED_STATE_IDLE -1 | ||
| 9075 | |||
| 9076 | static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | ||
| 9077 | 8942 | ||
| 9078 | int rcu_expedited_torture_stats(char *page) | 8943 | static int synchronize_sched_expedited_cpu_stop(void *data) |
| 9079 | { | 8944 | { |
| 9080 | int cnt = 0; | 8945 | /* |
| 9081 | int cpu; | 8946 | * There must be a full memory barrier on each affected CPU |
| 9082 | 8947 | * between the time that try_stop_cpus() is called and the | |
| 9083 | cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state); | 8948 | * time that it returns. |
| 9084 | for_each_online_cpu(cpu) { | 8949 | * |
| 9085 | cnt += sprintf(&page[cnt], " %d:%d", | 8950 | * In the current initial implementation of cpu_stop, the |
| 9086 | cpu, per_cpu(rcu_migration_req, cpu).dest_cpu); | 8951 | * above condition is already met when the control reaches |
| 9087 | } | 8952 | * this point and the following smp_mb() is not strictly |
| 9088 | cnt += sprintf(&page[cnt], "\n"); | 8953 | * necessary. Do smp_mb() anyway for documentation and |
| 9089 | return cnt; | 8954 | * robustness against future implementation changes. |
| 8955 | */ | ||
| 8956 | smp_mb(); /* See above comment block. */ | ||
| 8957 | return 0; | ||
| 9090 | } | 8958 | } |
| 9091 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
| 9092 | |||
| 9093 | static long synchronize_sched_expedited_count; | ||
| 9094 | 8959 | ||
| 9095 | /* | 8960 | /* |
| 9096 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | 8961 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" |
| @@ -9104,18 +8969,14 @@ static long synchronize_sched_expedited_count; | |||
| 9104 | */ | 8969 | */ |
| 9105 | void synchronize_sched_expedited(void) | 8970 | void synchronize_sched_expedited(void) |
| 9106 | { | 8971 | { |
| 9107 | int cpu; | 8972 | int snap, trycount = 0; |
| 9108 | unsigned long flags; | ||
| 9109 | bool need_full_sync = 0; | ||
| 9110 | struct rq *rq; | ||
| 9111 | struct migration_req *req; | ||
| 9112 | long snap; | ||
| 9113 | int trycount = 0; | ||
| 9114 | 8973 | ||
| 9115 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | 8974 | smp_mb(); /* ensure prior mod happens before capturing snap. */ |
| 9116 | snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1; | 8975 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; |
| 9117 | get_online_cpus(); | 8976 | get_online_cpus(); |
| 9118 | while (!mutex_trylock(&rcu_sched_expedited_mutex)) { | 8977 | while (try_stop_cpus(cpu_online_mask, |
| 8978 | synchronize_sched_expedited_cpu_stop, | ||
| 8979 | NULL) == -EAGAIN) { | ||
| 9119 | put_online_cpus(); | 8980 | put_online_cpus(); |
| 9120 | if (trycount++ < 10) | 8981 | if (trycount++ < 10) |
| 9121 | udelay(trycount * num_online_cpus()); | 8982 | udelay(trycount * num_online_cpus()); |
| @@ -9123,41 +8984,15 @@ void synchronize_sched_expedited(void) | |||
| 9123 | synchronize_sched(); | 8984 | synchronize_sched(); |
| 9124 | return; | 8985 | return; |
| 9125 | } | 8986 | } |
| 9126 | if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) { | 8987 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { |
| 9127 | smp_mb(); /* ensure test happens before caller kfree */ | 8988 | smp_mb(); /* ensure test happens before caller kfree */ |
| 9128 | return; | 8989 | return; |
| 9129 | } | 8990 | } |
| 9130 | get_online_cpus(); | 8991 | get_online_cpus(); |
| 9131 | } | 8992 | } |
| 9132 | rcu_expedited_state = RCU_EXPEDITED_STATE_POST; | 8993 | atomic_inc(&synchronize_sched_expedited_count); |
| 9133 | for_each_online_cpu(cpu) { | 8994 | smp_mb__after_atomic_inc(); /* ensure post-GP actions seen after GP. */ |
| 9134 | rq = cpu_rq(cpu); | ||
| 9135 | req = &per_cpu(rcu_migration_req, cpu); | ||
| 9136 | init_completion(&req->done); | ||
| 9137 | req->task = NULL; | ||
| 9138 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | ||
| 9139 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 9140 | list_add(&req->list, &rq->migration_queue); | ||
| 9141 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
| 9142 | wake_up_process(rq->migration_thread); | ||
| 9143 | } | ||
| 9144 | for_each_online_cpu(cpu) { | ||
| 9145 | rcu_expedited_state = cpu; | ||
| 9146 | req = &per_cpu(rcu_migration_req, cpu); | ||
| 9147 | rq = cpu_rq(cpu); | ||
| 9148 | wait_for_completion(&req->done); | ||
| 9149 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 9150 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | ||
| 9151 | need_full_sync = 1; | ||
| 9152 | req->dest_cpu = RCU_MIGRATION_IDLE; | ||
| 9153 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
| 9154 | } | ||
| 9155 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | ||
| 9156 | synchronize_sched_expedited_count++; | ||
| 9157 | mutex_unlock(&rcu_sched_expedited_mutex); | ||
| 9158 | put_online_cpus(); | 8995 | put_online_cpus(); |
| 9159 | if (need_full_sync) | ||
| 9160 | synchronize_sched(); | ||
| 9161 | } | 8996 | } |
| 9162 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 8997 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
| 9163 | 8998 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cbd8b8a296d1..217e4a9393e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle) | |||
| 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
| 2799 | } | 2799 | } |
| 2800 | 2800 | ||
| 2801 | static int active_load_balance_cpu_stop(void *data); | ||
| 2802 | |||
| 2801 | /* | 2803 | /* |
| 2802 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2804 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
| 2803 | * tasks if there is an imbalance. | 2805 | * tasks if there is an imbalance. |
| @@ -2887,8 +2889,9 @@ redo: | |||
| 2887 | if (need_active_balance(sd, sd_idle, idle)) { | 2889 | if (need_active_balance(sd, sd_idle, idle)) { |
| 2888 | raw_spin_lock_irqsave(&busiest->lock, flags); | 2890 | raw_spin_lock_irqsave(&busiest->lock, flags); |
| 2889 | 2891 | ||
| 2890 | /* don't kick the migration_thread, if the curr | 2892 | /* don't kick the active_load_balance_cpu_stop, |
| 2891 | * task on busiest cpu can't be moved to this_cpu | 2893 | * if the curr task on busiest cpu can't be |
| 2894 | * moved to this_cpu | ||
| 2892 | */ | 2895 | */ |
| 2893 | if (!cpumask_test_cpu(this_cpu, | 2896 | if (!cpumask_test_cpu(this_cpu, |
| 2894 | &busiest->curr->cpus_allowed)) { | 2897 | &busiest->curr->cpus_allowed)) { |
| @@ -2898,14 +2901,22 @@ redo: | |||
| 2898 | goto out_one_pinned; | 2901 | goto out_one_pinned; |
| 2899 | } | 2902 | } |
| 2900 | 2903 | ||
| 2904 | /* | ||
| 2905 | * ->active_balance synchronizes accesses to | ||
| 2906 | * ->active_balance_work. Once set, it's cleared | ||
| 2907 | * only after active load balance is finished. | ||
| 2908 | */ | ||
| 2901 | if (!busiest->active_balance) { | 2909 | if (!busiest->active_balance) { |
| 2902 | busiest->active_balance = 1; | 2910 | busiest->active_balance = 1; |
| 2903 | busiest->push_cpu = this_cpu; | 2911 | busiest->push_cpu = this_cpu; |
| 2904 | active_balance = 1; | 2912 | active_balance = 1; |
| 2905 | } | 2913 | } |
| 2906 | raw_spin_unlock_irqrestore(&busiest->lock, flags); | 2914 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
| 2915 | |||
| 2907 | if (active_balance) | 2916 | if (active_balance) |
| 2908 | wake_up_process(busiest->migration_thread); | 2917 | stop_one_cpu_nowait(cpu_of(busiest), |
| 2918 | active_load_balance_cpu_stop, busiest, | ||
| 2919 | &busiest->active_balance_work); | ||
| 2909 | 2920 | ||
| 2910 | /* | 2921 | /* |
| 2911 | * We've kicked active balancing, reset the failure | 2922 | * We've kicked active balancing, reset the failure |
| @@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3012 | } | 3023 | } |
| 3013 | 3024 | ||
| 3014 | /* | 3025 | /* |
| 3015 | * active_load_balance is run by migration threads. It pushes running tasks | 3026 | * active_load_balance_cpu_stop is run by cpu stopper. It pushes |
| 3016 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 3027 | * running tasks off the busiest CPU onto idle CPUs. It requires at |
| 3017 | * running on each physical CPU where possible, and avoids physical / | 3028 | * least 1 task to be running on each physical CPU where possible, and |
| 3018 | * logical imbalances. | 3029 | * avoids physical / logical imbalances. |
| 3019 | * | ||
| 3020 | * Called with busiest_rq locked. | ||
| 3021 | */ | 3030 | */ |
| 3022 | static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | 3031 | static int active_load_balance_cpu_stop(void *data) |
| 3023 | { | 3032 | { |
| 3033 | struct rq *busiest_rq = data; | ||
| 3034 | int busiest_cpu = cpu_of(busiest_rq); | ||
| 3024 | int target_cpu = busiest_rq->push_cpu; | 3035 | int target_cpu = busiest_rq->push_cpu; |
| 3036 | struct rq *target_rq = cpu_rq(target_cpu); | ||
| 3025 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
| 3026 | struct rq *target_rq; | 3038 | |
| 3039 | raw_spin_lock_irq(&busiest_rq->lock); | ||
| 3040 | |||
| 3041 | /* make sure the requested cpu hasn't gone down in the meantime */ | ||
| 3042 | if (unlikely(busiest_cpu != smp_processor_id() || | ||
| 3043 | !busiest_rq->active_balance)) | ||
| 3044 | goto out_unlock; | ||
| 3027 | 3045 | ||
| 3028 | /* Is there any task to move? */ | 3046 | /* Is there any task to move? */ |
| 3029 | if (busiest_rq->nr_running <= 1) | 3047 | if (busiest_rq->nr_running <= 1) |
| 3030 | return; | 3048 | goto out_unlock; |
| 3031 | |||
| 3032 | target_rq = cpu_rq(target_cpu); | ||
| 3033 | 3049 | ||
| 3034 | /* | 3050 | /* |
| 3035 | * This condition is "impossible", if it occurs | 3051 | * This condition is "impossible", if it occurs |
| @@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3058 | schedstat_inc(sd, alb_failed); | 3074 | schedstat_inc(sd, alb_failed); |
| 3059 | } | 3075 | } |
| 3060 | double_unlock_balance(busiest_rq, target_rq); | 3076 | double_unlock_balance(busiest_rq, target_rq); |
| 3077 | out_unlock: | ||
| 3078 | busiest_rq->active_balance = 0; | ||
| 3079 | raw_spin_unlock_irq(&busiest_rq->lock); | ||
| 3080 | return 0; | ||
| 3061 | } | 3081 | } |
| 3062 | 3082 | ||
| 3063 | #ifdef CONFIG_NO_HZ | 3083 | #ifdef CONFIG_NO_HZ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 9bb9fb1bd79c..ef51d1fcf5e6 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -1,17 +1,381 @@ | |||
| 1 | /* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation. | 1 | /* |
| 2 | * GPL v2 and any later version. | 2 | * kernel/stop_machine.c |
| 3 | * | ||
| 4 | * Copyright (C) 2008, 2005 IBM Corporation. | ||
| 5 | * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au | ||
| 6 | * Copyright (C) 2010 SUSE Linux Products GmbH | ||
| 7 | * Copyright (C) 2010 Tejun Heo <tj@kernel.org> | ||
| 8 | * | ||
| 9 | * This file is released under the GPLv2 and any later version. | ||
| 3 | */ | 10 | */ |
| 11 | #include <linux/completion.h> | ||
| 4 | #include <linux/cpu.h> | 12 | #include <linux/cpu.h> |
| 5 | #include <linux/err.h> | 13 | #include <linux/init.h> |
| 6 | #include <linux/kthread.h> | 14 | #include <linux/kthread.h> |
| 7 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 16 | #include <linux/percpu.h> | ||
| 8 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
| 9 | #include <linux/stop_machine.h> | 18 | #include <linux/stop_machine.h> |
| 10 | #include <linux/syscalls.h> | ||
| 11 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
| 20 | #include <linux/kallsyms.h> | ||
| 12 | 21 | ||
| 13 | #include <asm/atomic.h> | 22 | #include <asm/atomic.h> |
| 14 | #include <asm/uaccess.h> | 23 | |
| 24 | /* | ||
| 25 | * Structure to determine completion condition and record errors. May | ||
| 26 | * be shared by works on different cpus. | ||
| 27 | */ | ||
| 28 | struct cpu_stop_done { | ||
| 29 | atomic_t nr_todo; /* nr left to execute */ | ||
| 30 | bool executed; /* actually executed? */ | ||
| 31 | int ret; /* collected return value */ | ||
| 32 | struct completion completion; /* fired if nr_todo reaches 0 */ | ||
| 33 | }; | ||
| 34 | |||
| 35 | /* the actual stopper, one per every possible cpu, enabled on online cpus */ | ||
| 36 | struct cpu_stopper { | ||
| 37 | spinlock_t lock; | ||
| 38 | struct list_head works; /* list of pending works */ | ||
| 39 | struct task_struct *thread; /* stopper thread */ | ||
| 40 | bool enabled; /* is this stopper enabled? */ | ||
| 41 | }; | ||
| 42 | |||
| 43 | static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); | ||
| 44 | |||
| 45 | static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) | ||
| 46 | { | ||
| 47 | memset(done, 0, sizeof(*done)); | ||
| 48 | atomic_set(&done->nr_todo, nr_todo); | ||
| 49 | init_completion(&done->completion); | ||
| 50 | } | ||
| 51 | |||
| 52 | /* signal completion unless @done is NULL */ | ||
| 53 | static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) | ||
| 54 | { | ||
| 55 | if (done) { | ||
| 56 | if (executed) | ||
| 57 | done->executed = true; | ||
| 58 | if (atomic_dec_and_test(&done->nr_todo)) | ||
| 59 | complete(&done->completion); | ||
| 60 | } | ||
| 61 | } | ||
| 62 | |||
| 63 | /* queue @work to @stopper. if offline, @work is completed immediately */ | ||
| 64 | static void cpu_stop_queue_work(struct cpu_stopper *stopper, | ||
| 65 | struct cpu_stop_work *work) | ||
| 66 | { | ||
| 67 | unsigned long flags; | ||
| 68 | |||
| 69 | spin_lock_irqsave(&stopper->lock, flags); | ||
| 70 | |||
| 71 | if (stopper->enabled) { | ||
| 72 | list_add_tail(&work->list, &stopper->works); | ||
| 73 | wake_up_process(stopper->thread); | ||
| 74 | } else | ||
| 75 | cpu_stop_signal_done(work->done, false); | ||
| 76 | |||
| 77 | spin_unlock_irqrestore(&stopper->lock, flags); | ||
| 78 | } | ||
| 79 | |||
| 80 | /** | ||
| 81 | * stop_one_cpu - stop a cpu | ||
| 82 | * @cpu: cpu to stop | ||
| 83 | * @fn: function to execute | ||
| 84 | * @arg: argument to @fn | ||
| 85 | * | ||
| 86 | * Execute @fn(@arg) on @cpu. @fn is run in a process context with | ||
| 87 | * the highest priority preempting any task on the cpu and | ||
| 88 | * monopolizing it. This function returns after the execution is | ||
| 89 | * complete. | ||
| 90 | * | ||
| 91 | * This function doesn't guarantee @cpu stays online till @fn | ||
| 92 | * completes. If @cpu goes down in the middle, execution may happen | ||
| 93 | * partially or fully on different cpus. @fn should either be ready | ||
| 94 | * for that or the caller should ensure that @cpu stays online until | ||
| 95 | * this function completes. | ||
| 96 | * | ||
| 97 | * CONTEXT: | ||
| 98 | * Might sleep. | ||
| 99 | * | ||
| 100 | * RETURNS: | ||
| 101 | * -ENOENT if @fn(@arg) was not executed because @cpu was offline; | ||
| 102 | * otherwise, the return value of @fn. | ||
| 103 | */ | ||
| 104 | int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) | ||
| 105 | { | ||
| 106 | struct cpu_stop_done done; | ||
| 107 | struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; | ||
| 108 | |||
| 109 | cpu_stop_init_done(&done, 1); | ||
| 110 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); | ||
| 111 | wait_for_completion(&done.completion); | ||
| 112 | return done.executed ? done.ret : -ENOENT; | ||
| 113 | } | ||
| 114 | |||
| 115 | /** | ||
| 116 | * stop_one_cpu_nowait - stop a cpu but don't wait for completion | ||
| 117 | * @cpu: cpu to stop | ||
| 118 | * @fn: function to execute | ||
| 119 | * @arg: argument to @fn | ||
| 120 | * | ||
| 121 | * Similar to stop_one_cpu() but doesn't wait for completion. The | ||
| 122 | * caller is responsible for ensuring @work_buf is currently unused | ||
| 123 | * and will remain untouched until stopper starts executing @fn. | ||
| 124 | * | ||
| 125 | * CONTEXT: | ||
| 126 | * Don't care. | ||
| 127 | */ | ||
| 128 | void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, | ||
| 129 | struct cpu_stop_work *work_buf) | ||
| 130 | { | ||
| 131 | *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; | ||
| 132 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); | ||
| 133 | } | ||
| 134 | |||
| 135 | /* static data for stop_cpus */ | ||
| 136 | static DEFINE_MUTEX(stop_cpus_mutex); | ||
| 137 | static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); | ||
| 138 | |||
| 139 | int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
| 140 | { | ||
| 141 | struct cpu_stop_work *work; | ||
| 142 | struct cpu_stop_done done; | ||
| 143 | unsigned int cpu; | ||
| 144 | |||
| 145 | /* initialize works and done */ | ||
| 146 | for_each_cpu(cpu, cpumask) { | ||
| 147 | work = &per_cpu(stop_cpus_work, cpu); | ||
| 148 | work->fn = fn; | ||
| 149 | work->arg = arg; | ||
| 150 | work->done = &done; | ||
| 151 | } | ||
| 152 | cpu_stop_init_done(&done, cpumask_weight(cpumask)); | ||
| 153 | |||
| 154 | /* | ||
| 155 | * Disable preemption while queueing to avoid getting | ||
| 156 | * preempted by a stopper which might wait for other stoppers | ||
| 157 | * to enter @fn which can lead to deadlock. | ||
| 158 | */ | ||
| 159 | preempt_disable(); | ||
| 160 | for_each_cpu(cpu, cpumask) | ||
| 161 | cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), | ||
| 162 | &per_cpu(stop_cpus_work, cpu)); | ||
| 163 | preempt_enable(); | ||
| 164 | |||
| 165 | wait_for_completion(&done.completion); | ||
| 166 | return done.executed ? done.ret : -ENOENT; | ||
| 167 | } | ||
| 168 | |||
| 169 | /** | ||
| 170 | * stop_cpus - stop multiple cpus | ||
| 171 | * @cpumask: cpus to stop | ||
| 172 | * @fn: function to execute | ||
| 173 | * @arg: argument to @fn | ||
| 174 | * | ||
| 175 | * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, | ||
| 176 | * @fn is run in a process context with the highest priority | ||
| 177 | * preempting any task on the cpu and monopolizing it. This function | ||
| 178 | * returns after all executions are complete. | ||
| 179 | * | ||
| 180 | * This function doesn't guarantee the cpus in @cpumask stay online | ||
| 181 | * till @fn completes. If some cpus go down in the middle, execution | ||
| 182 | * on the cpu may happen partially or fully on different cpus. @fn | ||
| 183 | * should either be ready for that or the caller should ensure that | ||
| 184 | * the cpus stay online until this function completes. | ||
| 185 | * | ||
| 186 | * All stop_cpus() calls are serialized making it safe for @fn to wait | ||
| 187 | * for all cpus to start executing it. | ||
| 188 | * | ||
| 189 | * CONTEXT: | ||
| 190 | * Might sleep. | ||
| 191 | * | ||
| 192 | * RETURNS: | ||
| 193 | * -ENOENT if @fn(@arg) was not executed at all because all cpus in | ||
| 194 | * @cpumask were offline; otherwise, 0 if all executions of @fn | ||
| 195 | * returned 0, any non zero return value if any returned non zero. | ||
| 196 | */ | ||
| 197 | int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
| 198 | { | ||
| 199 | int ret; | ||
| 200 | |||
| 201 | /* static works are used, process one request at a time */ | ||
| 202 | mutex_lock(&stop_cpus_mutex); | ||
| 203 | ret = __stop_cpus(cpumask, fn, arg); | ||
| 204 | mutex_unlock(&stop_cpus_mutex); | ||
| 205 | return ret; | ||
| 206 | } | ||
| 207 | |||
| 208 | /** | ||
| 209 | * try_stop_cpus - try to stop multiple cpus | ||
| 210 | * @cpumask: cpus to stop | ||
| 211 | * @fn: function to execute | ||
| 212 | * @arg: argument to @fn | ||
| 213 | * | ||
| 214 | * Identical to stop_cpus() except that it fails with -EAGAIN if | ||
| 215 | * someone else is already using the facility. | ||
| 216 | * | ||
| 217 | * CONTEXT: | ||
| 218 | * Might sleep. | ||
| 219 | * | ||
| 220 | * RETURNS: | ||
| 221 | * -EAGAIN if someone else is already stopping cpus, -ENOENT if | ||
| 222 | * @fn(@arg) was not executed at all because all cpus in @cpumask were | ||
| 223 | * offline; otherwise, 0 if all executions of @fn returned 0, any non | ||
| 224 | * zero return value if any returned non zero. | ||
| 225 | */ | ||
| 226 | int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) | ||
| 227 | { | ||
| 228 | int ret; | ||
| 229 | |||
| 230 | /* static works are used, process one request at a time */ | ||
| 231 | if (!mutex_trylock(&stop_cpus_mutex)) | ||
| 232 | return -EAGAIN; | ||
| 233 | ret = __stop_cpus(cpumask, fn, arg); | ||
| 234 | mutex_unlock(&stop_cpus_mutex); | ||
| 235 | return ret; | ||
| 236 | } | ||
| 237 | |||
| 238 | static int cpu_stopper_thread(void *data) | ||
| 239 | { | ||
| 240 | struct cpu_stopper *stopper = data; | ||
| 241 | struct cpu_stop_work *work; | ||
| 242 | int ret; | ||
| 243 | |||
| 244 | repeat: | ||
| 245 | set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ | ||
| 246 | |||
| 247 | if (kthread_should_stop()) { | ||
| 248 | __set_current_state(TASK_RUNNING); | ||
| 249 | return 0; | ||
| 250 | } | ||
| 251 | |||
| 252 | work = NULL; | ||
| 253 | spin_lock_irq(&stopper->lock); | ||
| 254 | if (!list_empty(&stopper->works)) { | ||
| 255 | work = list_first_entry(&stopper->works, | ||
| 256 | struct cpu_stop_work, list); | ||
| 257 | list_del_init(&work->list); | ||
| 258 | } | ||
| 259 | spin_unlock_irq(&stopper->lock); | ||
| 260 | |||
| 261 | if (work) { | ||
| 262 | cpu_stop_fn_t fn = work->fn; | ||
| 263 | void *arg = work->arg; | ||
| 264 | struct cpu_stop_done *done = work->done; | ||
| 265 | char ksym_buf[KSYM_NAME_LEN]; | ||
| 266 | |||
| 267 | __set_current_state(TASK_RUNNING); | ||
| 268 | |||
| 269 | /* cpu stop callbacks are not allowed to sleep */ | ||
| 270 | preempt_disable(); | ||
| 271 | |||
| 272 | ret = fn(arg); | ||
| 273 | if (ret) | ||
| 274 | done->ret = ret; | ||
| 275 | |||
| 276 | /* restore preemption and check it's still balanced */ | ||
| 277 | preempt_enable(); | ||
| 278 | WARN_ONCE(preempt_count(), | ||
| 279 | "cpu_stop: %s(%p) leaked preempt count\n", | ||
| 280 | kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, | ||
| 281 | ksym_buf), arg); | ||
| 282 | |||
| 283 | cpu_stop_signal_done(done, true); | ||
| 284 | } else | ||
| 285 | schedule(); | ||
| 286 | |||
| 287 | goto repeat; | ||
| 288 | } | ||
| 289 | |||
| 290 | /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ | ||
| 291 | static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | ||
| 292 | unsigned long action, void *hcpu) | ||
| 293 | { | ||
| 294 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
| 295 | unsigned int cpu = (unsigned long)hcpu; | ||
| 296 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | ||
| 297 | struct cpu_stop_work *work; | ||
| 298 | struct task_struct *p; | ||
| 299 | |||
| 300 | switch (action & ~CPU_TASKS_FROZEN) { | ||
| 301 | case CPU_UP_PREPARE: | ||
| 302 | BUG_ON(stopper->thread || stopper->enabled || | ||
| 303 | !list_empty(&stopper->works)); | ||
| 304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", | ||
| 305 | cpu); | ||
| 306 | if (IS_ERR(p)) | ||
| 307 | return NOTIFY_BAD; | ||
| 308 | sched_setscheduler_nocheck(p, SCHED_FIFO, ¶m); | ||
| 309 | get_task_struct(p); | ||
| 310 | stopper->thread = p; | ||
| 311 | break; | ||
| 312 | |||
| 313 | case CPU_ONLINE: | ||
| 314 | kthread_bind(stopper->thread, cpu); | ||
| 315 | /* strictly unnecessary, as first user will wake it */ | ||
| 316 | wake_up_process(stopper->thread); | ||
| 317 | /* mark enabled */ | ||
| 318 | spin_lock_irq(&stopper->lock); | ||
| 319 | stopper->enabled = true; | ||
| 320 | spin_unlock_irq(&stopper->lock); | ||
| 321 | break; | ||
| 322 | |||
| 323 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 324 | case CPU_UP_CANCELED: | ||
| 325 | case CPU_DEAD: | ||
| 326 | /* kill the stopper */ | ||
| 327 | kthread_stop(stopper->thread); | ||
| 328 | /* drain remaining works */ | ||
| 329 | spin_lock_irq(&stopper->lock); | ||
| 330 | list_for_each_entry(work, &stopper->works, list) | ||
| 331 | cpu_stop_signal_done(work->done, false); | ||
| 332 | stopper->enabled = false; | ||
| 333 | spin_unlock_irq(&stopper->lock); | ||
| 334 | /* release the stopper */ | ||
| 335 | put_task_struct(stopper->thread); | ||
| 336 | stopper->thread = NULL; | ||
| 337 | break; | ||
| 338 | #endif | ||
| 339 | } | ||
| 340 | |||
| 341 | return NOTIFY_OK; | ||
| 342 | } | ||
| 343 | |||
| 344 | /* | ||
| 345 | * Give it a higher priority so that cpu stopper is available to other | ||
| 346 | * cpu notifiers. It currently shares the same priority as sched | ||
| 347 | * migration_notifier. | ||
| 348 | */ | ||
| 349 | static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { | ||
| 350 | .notifier_call = cpu_stop_cpu_callback, | ||
| 351 | .priority = 10, | ||
| 352 | }; | ||
| 353 | |||
| 354 | static int __init cpu_stop_init(void) | ||
| 355 | { | ||
| 356 | void *bcpu = (void *)(long)smp_processor_id(); | ||
| 357 | unsigned int cpu; | ||
| 358 | int err; | ||
| 359 | |||
| 360 | for_each_possible_cpu(cpu) { | ||
| 361 | struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); | ||
| 362 | |||
| 363 | spin_lock_init(&stopper->lock); | ||
| 364 | INIT_LIST_HEAD(&stopper->works); | ||
| 365 | } | ||
| 366 | |||
| 367 | /* start one for the boot cpu */ | ||
| 368 | err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, | ||
| 369 | bcpu); | ||
| 370 | BUG_ON(err == NOTIFY_BAD); | ||
| 371 | cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); | ||
| 372 | register_cpu_notifier(&cpu_stop_cpu_notifier); | ||
| 373 | |||
| 374 | return 0; | ||
| 375 | } | ||
| 376 | early_initcall(cpu_stop_init); | ||
| 377 | |||
| 378 | #ifdef CONFIG_STOP_MACHINE | ||
| 15 | 379 | ||
| 16 | /* This controls the threads on each CPU. */ | 380 | /* This controls the threads on each CPU. */ |
| 17 | enum stopmachine_state { | 381 | enum stopmachine_state { |
| @@ -26,174 +390,94 @@ enum stopmachine_state { | |||
| 26 | /* Exit */ | 390 | /* Exit */ |
| 27 | STOPMACHINE_EXIT, | 391 | STOPMACHINE_EXIT, |
| 28 | }; | 392 | }; |
| 29 | static enum stopmachine_state state; | ||
| 30 | 393 | ||
| 31 | struct stop_machine_data { | 394 | struct stop_machine_data { |
| 32 | int (*fn)(void *); | 395 | int (*fn)(void *); |
| 33 | void *data; | 396 | void *data; |
| 34 | int fnret; | 397 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ |
| 398 | unsigned int num_threads; | ||
| 399 | const struct cpumask *active_cpus; | ||
| 400 | |||
| 401 | enum stopmachine_state state; | ||
| 402 | atomic_t thread_ack; | ||
| 35 | }; | 403 | }; |
| 36 | 404 | ||
| 37 | /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ | 405 | static void set_state(struct stop_machine_data *smdata, |
| 38 | static unsigned int num_threads; | 406 | enum stopmachine_state newstate) |
| 39 | static atomic_t thread_ack; | ||
| 40 | static DEFINE_MUTEX(lock); | ||
| 41 | /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */ | ||
| 42 | static DEFINE_MUTEX(setup_lock); | ||
| 43 | /* Users of stop_machine. */ | ||
| 44 | static int refcount; | ||
| 45 | static struct workqueue_struct *stop_machine_wq; | ||
| 46 | static struct stop_machine_data active, idle; | ||
| 47 | static const struct cpumask *active_cpus; | ||
| 48 | static void __percpu *stop_machine_work; | ||
| 49 | |||
| 50 | static void set_state(enum stopmachine_state newstate) | ||
| 51 | { | 407 | { |
| 52 | /* Reset ack counter. */ | 408 | /* Reset ack counter. */ |
| 53 | atomic_set(&thread_ack, num_threads); | 409 | atomic_set(&smdata->thread_ack, smdata->num_threads); |
| 54 | smp_wmb(); | 410 | smp_wmb(); |
| 55 | state = newstate; | 411 | smdata->state = newstate; |
| 56 | } | 412 | } |
| 57 | 413 | ||
| 58 | /* Last one to ack a state moves to the next state. */ | 414 | /* Last one to ack a state moves to the next state. */ |
| 59 | static void ack_state(void) | 415 | static void ack_state(struct stop_machine_data *smdata) |
| 60 | { | 416 | { |
| 61 | if (atomic_dec_and_test(&thread_ack)) | 417 | if (atomic_dec_and_test(&smdata->thread_ack)) |
| 62 | set_state(state + 1); | 418 | set_state(smdata, smdata->state + 1); |
| 63 | } | 419 | } |
| 64 | 420 | ||
| 65 | /* This is the actual function which stops the CPU. It runs | 421 | /* This is the cpu_stop function which stops the CPU. */ |
| 66 | * in the context of a dedicated stopmachine workqueue. */ | 422 | static int stop_machine_cpu_stop(void *data) |
| 67 | static void stop_cpu(struct work_struct *unused) | ||
| 68 | { | 423 | { |
| 424 | struct stop_machine_data *smdata = data; | ||
| 69 | enum stopmachine_state curstate = STOPMACHINE_NONE; | 425 | enum stopmachine_state curstate = STOPMACHINE_NONE; |
| 70 | struct stop_machine_data *smdata = &idle; | 426 | int cpu = smp_processor_id(), err = 0; |
| 71 | int cpu = smp_processor_id(); | 427 | bool is_active; |
| 72 | int err; | 428 | |
| 429 | if (!smdata->active_cpus) | ||
| 430 | is_active = cpu == cpumask_first(cpu_online_mask); | ||
| 431 | else | ||
| 432 | is_active = cpumask_test_cpu(cpu, smdata->active_cpus); | ||
| 73 | 433 | ||
| 74 | if (!active_cpus) { | ||
| 75 | if (cpu == cpumask_first(cpu_online_mask)) | ||
| 76 | smdata = &active; | ||
| 77 | } else { | ||
| 78 | if (cpumask_test_cpu(cpu, active_cpus)) | ||
| 79 | smdata = &active; | ||
| 80 | } | ||
| 81 | /* Simple state machine */ | 434 | /* Simple state machine */ |
| 82 | do { | 435 | do { |
| 83 | /* Chill out and ensure we re-read stopmachine_state. */ | 436 | /* Chill out and ensure we re-read stopmachine_state. */ |
| 84 | cpu_relax(); | 437 | cpu_relax(); |
| 85 | if (state != curstate) { | 438 | if (smdata->state != curstate) { |
| 86 | curstate = state; | 439 | curstate = smdata->state; |
| 87 | switch (curstate) { | 440 | switch (curstate) { |
| 88 | case STOPMACHINE_DISABLE_IRQ: | 441 | case STOPMACHINE_DISABLE_IRQ: |
| 89 | local_irq_disable(); | 442 | local_irq_disable(); |
| 90 | hard_irq_disable(); | 443 | hard_irq_disable(); |
| 91 | break; | 444 | break; |
| 92 | case STOPMACHINE_RUN: | 445 | case STOPMACHINE_RUN: |
| 93 | /* On multiple CPUs only a single error code | 446 | if (is_active) |
| 94 | * is needed to tell that something failed. */ | 447 | err = smdata->fn(smdata->data); |
| 95 | err = smdata->fn(smdata->data); | ||
| 96 | if (err) | ||
| 97 | smdata->fnret = err; | ||
| 98 | break; | 448 | break; |
| 99 | default: | 449 | default: |
| 100 | break; | 450 | break; |
| 101 | } | 451 | } |
| 102 | ack_state(); | 452 | ack_state(smdata); |
| 103 | } | 453 | } |
| 104 | } while (curstate != STOPMACHINE_EXIT); | 454 | } while (curstate != STOPMACHINE_EXIT); |
| 105 | 455 | ||
| 106 | local_irq_enable(); | 456 | local_irq_enable(); |
| 457 | return err; | ||
| 107 | } | 458 | } |
| 108 | 459 | ||
| 109 | /* Callback for CPUs which aren't supposed to do anything. */ | ||
| 110 | static int chill(void *unused) | ||
| 111 | { | ||
| 112 | return 0; | ||
| 113 | } | ||
| 114 | |||
| 115 | int stop_machine_create(void) | ||
| 116 | { | ||
| 117 | mutex_lock(&setup_lock); | ||
| 118 | if (refcount) | ||
| 119 | goto done; | ||
| 120 | stop_machine_wq = create_rt_workqueue("kstop"); | ||
| 121 | if (!stop_machine_wq) | ||
| 122 | goto err_out; | ||
| 123 | stop_machine_work = alloc_percpu(struct work_struct); | ||
| 124 | if (!stop_machine_work) | ||
| 125 | goto err_out; | ||
| 126 | done: | ||
| 127 | refcount++; | ||
| 128 | mutex_unlock(&setup_lock); | ||
| 129 | return 0; | ||
| 130 | |||
| 131 | err_out: | ||
| 132 | if (stop_machine_wq) | ||
| 133 | destroy_workqueue(stop_machine_wq); | ||
| 134 | mutex_unlock(&setup_lock); | ||
| 135 | return -ENOMEM; | ||
| 136 | } | ||
| 137 | EXPORT_SYMBOL_GPL(stop_machine_create); | ||
| 138 | |||
| 139 | void stop_machine_destroy(void) | ||
| 140 | { | ||
| 141 | mutex_lock(&setup_lock); | ||
| 142 | refcount--; | ||
| 143 | if (refcount) | ||
| 144 | goto done; | ||
| 145 | destroy_workqueue(stop_machine_wq); | ||
| 146 | free_percpu(stop_machine_work); | ||
| 147 | done: | ||
| 148 | mutex_unlock(&setup_lock); | ||
| 149 | } | ||
| 150 | EXPORT_SYMBOL_GPL(stop_machine_destroy); | ||
| 151 | |||
| 152 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 460 | int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| 153 | { | 461 | { |
| 154 | struct work_struct *sm_work; | 462 | struct stop_machine_data smdata = { .fn = fn, .data = data, |
| 155 | int i, ret; | 463 | .num_threads = num_online_cpus(), |
| 156 | 464 | .active_cpus = cpus }; | |
| 157 | /* Set up initial state. */ | 465 | |
| 158 | mutex_lock(&lock); | 466 | /* Set the initial state and stop all online cpus. */ |
| 159 | num_threads = num_online_cpus(); | 467 | set_state(&smdata, STOPMACHINE_PREPARE); |
| 160 | active_cpus = cpus; | 468 | return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); |
| 161 | active.fn = fn; | ||
| 162 | active.data = data; | ||
| 163 | active.fnret = 0; | ||
| 164 | idle.fn = chill; | ||
| 165 | idle.data = NULL; | ||
| 166 | |||
| 167 | set_state(STOPMACHINE_PREPARE); | ||
| 168 | |||
| 169 | /* Schedule the stop_cpu work on all cpus: hold this CPU so one | ||
| 170 | * doesn't hit this CPU until we're ready. */ | ||
| 171 | get_cpu(); | ||
| 172 | for_each_online_cpu(i) { | ||
| 173 | sm_work = per_cpu_ptr(stop_machine_work, i); | ||
| 174 | INIT_WORK(sm_work, stop_cpu); | ||
| 175 | queue_work_on(i, stop_machine_wq, sm_work); | ||
| 176 | } | ||
| 177 | /* This will release the thread on our CPU. */ | ||
| 178 | put_cpu(); | ||
| 179 | flush_workqueue(stop_machine_wq); | ||
| 180 | ret = active.fnret; | ||
| 181 | mutex_unlock(&lock); | ||
| 182 | return ret; | ||
| 183 | } | 469 | } |
| 184 | 470 | ||
| 185 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) | 471 | int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) |
| 186 | { | 472 | { |
| 187 | int ret; | 473 | int ret; |
| 188 | 474 | ||
| 189 | ret = stop_machine_create(); | ||
| 190 | if (ret) | ||
| 191 | return ret; | ||
| 192 | /* No CPUs can come up or down during this. */ | 475 | /* No CPUs can come up or down during this. */ |
| 193 | get_online_cpus(); | 476 | get_online_cpus(); |
| 194 | ret = __stop_machine(fn, data, cpus); | 477 | ret = __stop_machine(fn, data, cpus); |
| 195 | put_online_cpus(); | 478 | put_online_cpus(); |
| 196 | stop_machine_destroy(); | ||
| 197 | return ret; | 479 | return ret; |
| 198 | } | 480 | } |
| 199 | EXPORT_SYMBOL_GPL(stop_machine); | 481 | EXPORT_SYMBOL_GPL(stop_machine); |
| 482 | |||
| 483 | #endif /* CONFIG_STOP_MACHINE */ | ||
