aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/torture.txt10
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--kernel/rcutorture.c2
-rw-r--r--kernel/sched.c315
-rw-r--r--kernel/sched_fair.c48
-rw-r--r--kernel/stop_machine.c2
7 files changed, 127 insertions, 253 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 0e50bc2aa1e2..5d9016795fd8 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following:
182 sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 182 sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
183 sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 183 sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
184 sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 184 sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
185 state: -1 / 0:0 3:0 4:0
186
187As before, the first four lines are similar to those for RCU.
188The last line shows the task-migration state. The first number is
189-1 if synchronize_sched_expedited() is idle, -2 if in the process of
190posting wakeups to the migration kthreads, and N when waiting on CPU N.
191Each of the colon-separated fields following the "/" is a CPU:state pair.
192Valid states are "0" for idle, "1" for waiting for quiescent state,
193"2" for passed through quiescent state, and "3" when a race with a
194CPU-hotplug event forces use of the synchronize_sched() primitive.
195 185
196 186
197USAGE 187USAGE
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index a5195875480a..0006b2df00e1 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void)
60 return 0; 60 return 0;
61} 61}
62 62
63extern int rcu_expedited_torture_stats(char *page);
64
65static inline void rcu_force_quiescent_state(void) 63static inline void rcu_force_quiescent_state(void)
66{ 64{
67} 65}
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 42cc3a04779e..24e467e526b8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,7 +35,6 @@ struct notifier_block;
35extern void rcu_sched_qs(int cpu); 35extern void rcu_sched_qs(int cpu);
36extern void rcu_bh_qs(int cpu); 36extern void rcu_bh_qs(int cpu);
37extern int rcu_needs_cpu(int cpu); 37extern int rcu_needs_cpu(int cpu);
38extern int rcu_expedited_torture_stats(char *page);
39 38
40#ifdef CONFIG_TREE_PREEMPT_RCU 39#ifdef CONFIG_TREE_PREEMPT_RCU
41 40
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 58df55bf83ed..2b676f3a0f26 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
669 .sync = synchronize_sched_expedited, 669 .sync = synchronize_sched_expedited,
670 .cb_barrier = NULL, 670 .cb_barrier = NULL,
671 .fqs = rcu_sched_force_quiescent_state, 671 .fqs = rcu_sched_force_quiescent_state,
672 .stats = rcu_expedited_torture_stats, 672 .stats = NULL,
673 .irq_capable = 1, 673 .irq_capable = 1,
674 .name = "sched_expedited" 674 .name = "sched_expedited"
675}; 675};
diff --git a/kernel/sched.c b/kernel/sched.c
index 4956ed092838..f1d577a0a8ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -55,9 +55,9 @@
55#include <linux/cpu.h> 55#include <linux/cpu.h>
56#include <linux/cpuset.h> 56#include <linux/cpuset.h>
57#include <linux/percpu.h> 57#include <linux/percpu.h>
58#include <linux/kthread.h>
59#include <linux/proc_fs.h> 58#include <linux/proc_fs.h>
60#include <linux/seq_file.h> 59#include <linux/seq_file.h>
60#include <linux/stop_machine.h>
61#include <linux/sysctl.h> 61#include <linux/sysctl.h>
62#include <linux/syscalls.h> 62#include <linux/syscalls.h>
63#include <linux/times.h> 63#include <linux/times.h>
@@ -539,15 +539,13 @@ struct rq {
539 int post_schedule; 539 int post_schedule;
540 int active_balance; 540 int active_balance;
541 int push_cpu; 541 int push_cpu;
542 struct cpu_stop_work active_balance_work;
542 /* cpu of this runqueue: */ 543 /* cpu of this runqueue: */
543 int cpu; 544 int cpu;
544 int online; 545 int online;
545 546
546 unsigned long avg_load_per_task; 547 unsigned long avg_load_per_task;
547 548
548 struct task_struct *migration_thread;
549 struct list_head migration_queue;
550
551 u64 rt_avg; 549 u64 rt_avg;
552 u64 age_stamp; 550 u64 age_stamp;
553 u64 idle_stamp; 551 u64 idle_stamp;
@@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2037 __set_task_cpu(p, new_cpu); 2035 __set_task_cpu(p, new_cpu);
2038} 2036}
2039 2037
2040struct migration_req { 2038struct migration_arg {
2041 struct list_head list;
2042
2043 struct task_struct *task; 2039 struct task_struct *task;
2044 int dest_cpu; 2040 int dest_cpu;
2045
2046 struct completion done;
2047}; 2041};
2048 2042
2043static int migration_cpu_stop(void *data);
2044
2049/* 2045/*
2050 * The task's runqueue lock must be held. 2046 * The task's runqueue lock must be held.
2051 * Returns true if you have to wait for migration thread. 2047 * Returns true if you have to wait for migration thread.
2052 */ 2048 */
2053static int 2049static bool migrate_task(struct task_struct *p, int dest_cpu)
2054migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2055{ 2050{
2056 struct rq *rq = task_rq(p); 2051 struct rq *rq = task_rq(p);
2057 2052
@@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2059 * If the task is not on a runqueue (and not running), then 2054 * If the task is not on a runqueue (and not running), then
2060 * the next wake-up will properly place the task. 2055 * the next wake-up will properly place the task.
2061 */ 2056 */
2062 if (!p->se.on_rq && !task_running(rq, p)) 2057 return p->se.on_rq || task_running(rq, p);
2063 return 0;
2064
2065 init_completion(&req->done);
2066 req->task = p;
2067 req->dest_cpu = dest_cpu;
2068 list_add(&req->list, &rq->migration_queue);
2069
2070 return 1;
2071} 2058}
2072 2059
2073/* 2060/*
@@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq)
3110void sched_exec(void) 3097void sched_exec(void)
3111{ 3098{
3112 struct task_struct *p = current; 3099 struct task_struct *p = current;
3113 struct migration_req req;
3114 unsigned long flags; 3100 unsigned long flags;
3115 struct rq *rq; 3101 struct rq *rq;
3116 int dest_cpu; 3102 int dest_cpu;
@@ -3124,17 +3110,11 @@ void sched_exec(void)
3124 * select_task_rq() can race against ->cpus_allowed 3110 * select_task_rq() can race against ->cpus_allowed
3125 */ 3111 */
3126 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && 3112 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
3127 likely(cpu_active(dest_cpu)) && 3113 likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
3128 migrate_task(p, dest_cpu, &req)) { 3114 struct migration_arg arg = { p, dest_cpu };
3129 /* Need to wait for migration thread (might exit: take ref). */
3130 struct task_struct *mt = rq->migration_thread;
3131 3115
3132 get_task_struct(mt);
3133 task_rq_unlock(rq, &flags); 3116 task_rq_unlock(rq, &flags);
3134 wake_up_process(mt); 3117 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
3135 put_task_struct(mt);
3136 wait_for_completion(&req.done);
3137
3138 return; 3118 return;
3139 } 3119 }
3140unlock: 3120unlock:
@@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void)
5290/* 5270/*
5291 * This is how migration works: 5271 * This is how migration works:
5292 * 5272 *
5293 * 1) we queue a struct migration_req structure in the source CPU's 5273 * 1) we invoke migration_cpu_stop() on the target CPU using
5294 * runqueue and wake up that CPU's migration thread. 5274 * stop_one_cpu().
5295 * 2) we down() the locked semaphore => thread blocks. 5275 * 2) stopper starts to run (implicitly forcing the migrated thread
5296 * 3) migration thread wakes up (implicitly it forces the migrated 5276 * off the CPU)
5297 * thread off the CPU) 5277 * 3) it checks whether the migrated task is still in the wrong runqueue.
5298 * 4) it gets the migration request and checks whether the migrated 5278 * 4) if it's in the wrong runqueue then the migration thread removes
5299 * task is still in the wrong runqueue.
5300 * 5) if it's in the wrong runqueue then the migration thread removes
5301 * it and puts it into the right queue. 5279 * it and puts it into the right queue.
5302 * 6) migration thread up()s the semaphore. 5280 * 5) stopper completes and stop_one_cpu() returns and the migration
5303 * 7) we wake up and the migration is done. 5281 * is done.
5304 */ 5282 */
5305 5283
5306/* 5284/*
@@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void)
5314 */ 5292 */
5315int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) 5293int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5316{ 5294{
5317 struct migration_req req;
5318 unsigned long flags; 5295 unsigned long flags;
5319 struct rq *rq; 5296 struct rq *rq;
5297 unsigned int dest_cpu;
5320 int ret = 0; 5298 int ret = 0;
5321 5299
5322 /* 5300 /*
@@ -5354,15 +5332,12 @@ again:
5354 if (cpumask_test_cpu(task_cpu(p), new_mask)) 5332 if (cpumask_test_cpu(task_cpu(p), new_mask))
5355 goto out; 5333 goto out;
5356 5334
5357 if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { 5335 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5336 if (migrate_task(p, dest_cpu)) {
5337 struct migration_arg arg = { p, dest_cpu };
5358 /* Need help from migration thread: drop lock and wait. */ 5338 /* Need help from migration thread: drop lock and wait. */
5359 struct task_struct *mt = rq->migration_thread;
5360
5361 get_task_struct(mt);
5362 task_rq_unlock(rq, &flags); 5339 task_rq_unlock(rq, &flags);
5363 wake_up_process(mt); 5340 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5364 put_task_struct(mt);
5365 wait_for_completion(&req.done);
5366 tlb_migrate_finish(p->mm); 5341 tlb_migrate_finish(p->mm);
5367 return 0; 5342 return 0;
5368 } 5343 }
@@ -5420,70 +5395,22 @@ fail:
5420 return ret; 5395 return ret;
5421} 5396}
5422 5397
5423#define RCU_MIGRATION_IDLE 0
5424#define RCU_MIGRATION_NEED_QS 1
5425#define RCU_MIGRATION_GOT_QS 2
5426#define RCU_MIGRATION_MUST_SYNC 3
5427
5428/* 5398/*
5429 * migration_thread - this is a highprio system thread that performs 5399 * migration_cpu_stop - this will be executed by a highprio stopper thread
5430 * thread migration by bumping thread off CPU then 'pushing' onto 5400 * and performs thread migration by bumping thread off CPU then
5431 * another runqueue. 5401 * 'pushing' onto another runqueue.
5432 */ 5402 */
5433static int migration_thread(void *data) 5403static int migration_cpu_stop(void *data)
5434{ 5404{
5435 int badcpu; 5405 struct migration_arg *arg = data;
5436 int cpu = (long)data;
5437 struct rq *rq;
5438
5439 rq = cpu_rq(cpu);
5440 BUG_ON(rq->migration_thread != current);
5441
5442 set_current_state(TASK_INTERRUPTIBLE);
5443 while (!kthread_should_stop()) {
5444 struct migration_req *req;
5445 struct list_head *head;
5446
5447 raw_spin_lock_irq(&rq->lock);
5448
5449 if (cpu_is_offline(cpu)) {
5450 raw_spin_unlock_irq(&rq->lock);
5451 break;
5452 }
5453
5454 if (rq->active_balance) {
5455 active_load_balance(rq, cpu);
5456 rq->active_balance = 0;
5457 }
5458
5459 head = &rq->migration_queue;
5460
5461 if (list_empty(head)) {
5462 raw_spin_unlock_irq(&rq->lock);
5463 schedule();
5464 set_current_state(TASK_INTERRUPTIBLE);
5465 continue;
5466 }
5467 req = list_entry(head->next, struct migration_req, list);
5468 list_del_init(head->next);
5469
5470 if (req->task != NULL) {
5471 raw_spin_unlock(&rq->lock);
5472 __migrate_task(req->task, cpu, req->dest_cpu);
5473 } else if (likely(cpu == (badcpu = smp_processor_id()))) {
5474 req->dest_cpu = RCU_MIGRATION_GOT_QS;
5475 raw_spin_unlock(&rq->lock);
5476 } else {
5477 req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
5478 raw_spin_unlock(&rq->lock);
5479 WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
5480 }
5481 local_irq_enable();
5482
5483 complete(&req->done);
5484 }
5485 __set_current_state(TASK_RUNNING);
5486 5406
5407 /*
5408 * The original target cpu might have gone down and we might
5409 * be on another cpu but it doesn't matter.
5410 */
5411 local_irq_disable();
5412 __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
5413 local_irq_enable();
5487 return 0; 5414 return 0;
5488} 5415}
5489 5416
@@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq)
5850static int __cpuinit 5777static int __cpuinit
5851migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) 5778migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5852{ 5779{
5853 struct task_struct *p;
5854 int cpu = (long)hcpu; 5780 int cpu = (long)hcpu;
5855 unsigned long flags; 5781 unsigned long flags;
5856 struct rq *rq; 5782 struct rq *rq = cpu_rq(cpu);
5857 5783
5858 switch (action) { 5784 switch (action) {
5859 5785
5860 case CPU_UP_PREPARE: 5786 case CPU_UP_PREPARE:
5861 case CPU_UP_PREPARE_FROZEN: 5787 case CPU_UP_PREPARE_FROZEN:
5862 p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
5863 if (IS_ERR(p))
5864 return NOTIFY_BAD;
5865 kthread_bind(p, cpu);
5866 /* Must be high prio: stop_machine expects to yield to it. */
5867 rq = task_rq_lock(p, &flags);
5868 __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
5869 task_rq_unlock(rq, &flags);
5870 get_task_struct(p);
5871 cpu_rq(cpu)->migration_thread = p;
5872 rq->calc_load_update = calc_load_update; 5788 rq->calc_load_update = calc_load_update;
5873 break; 5789 break;
5874 5790
5875 case CPU_ONLINE: 5791 case CPU_ONLINE:
5876 case CPU_ONLINE_FROZEN: 5792 case CPU_ONLINE_FROZEN:
5877 /* Strictly unnecessary, as first user will wake it. */
5878 wake_up_process(cpu_rq(cpu)->migration_thread);
5879
5880 /* Update our root-domain */ 5793 /* Update our root-domain */
5881 rq = cpu_rq(cpu);
5882 raw_spin_lock_irqsave(&rq->lock, flags); 5794 raw_spin_lock_irqsave(&rq->lock, flags);
5883 if (rq->rd) { 5795 if (rq->rd) {
5884 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 5796 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5889 break; 5801 break;
5890 5802
5891#ifdef CONFIG_HOTPLUG_CPU 5803#ifdef CONFIG_HOTPLUG_CPU
5892 case CPU_UP_CANCELED:
5893 case CPU_UP_CANCELED_FROZEN:
5894 if (!cpu_rq(cpu)->migration_thread)
5895 break;
5896 /* Unbind it from offline cpu so it can run. Fall thru. */
5897 kthread_bind(cpu_rq(cpu)->migration_thread,
5898 cpumask_any(cpu_online_mask));
5899 kthread_stop(cpu_rq(cpu)->migration_thread);
5900 put_task_struct(cpu_rq(cpu)->migration_thread);
5901 cpu_rq(cpu)->migration_thread = NULL;
5902 break;
5903
5904 case CPU_DEAD: 5804 case CPU_DEAD:
5905 case CPU_DEAD_FROZEN: 5805 case CPU_DEAD_FROZEN:
5906 migrate_live_tasks(cpu); 5806 migrate_live_tasks(cpu);
5907 rq = cpu_rq(cpu);
5908 kthread_stop(rq->migration_thread);
5909 put_task_struct(rq->migration_thread);
5910 rq->migration_thread = NULL;
5911 /* Idle task back to normal (off runqueue, low prio) */ 5807 /* Idle task back to normal (off runqueue, low prio) */
5912 raw_spin_lock_irq(&rq->lock); 5808 raw_spin_lock_irq(&rq->lock);
5913 deactivate_task(rq, rq->idle, 0); 5809 deactivate_task(rq, rq->idle, 0);
@@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5918 migrate_nr_uninterruptible(rq); 5814 migrate_nr_uninterruptible(rq);
5919 BUG_ON(rq->nr_running != 0); 5815 BUG_ON(rq->nr_running != 0);
5920 calc_global_load_remove(rq); 5816 calc_global_load_remove(rq);
5921 /*
5922 * No need to migrate the tasks: it was best-effort if
5923 * they didn't take sched_hotcpu_mutex. Just wake up
5924 * the requestors.
5925 */
5926 raw_spin_lock_irq(&rq->lock);
5927 while (!list_empty(&rq->migration_queue)) {
5928 struct migration_req *req;
5929
5930 req = list_entry(rq->migration_queue.next,
5931 struct migration_req, list);
5932 list_del_init(&req->list);
5933 raw_spin_unlock_irq(&rq->lock);
5934 complete(&req->done);
5935 raw_spin_lock_irq(&rq->lock);
5936 }
5937 raw_spin_unlock_irq(&rq->lock);
5938 break; 5817 break;
5939 5818
5940 case CPU_DYING: 5819 case CPU_DYING:
5941 case CPU_DYING_FROZEN: 5820 case CPU_DYING_FROZEN:
5942 /* Update our root-domain */ 5821 /* Update our root-domain */
5943 rq = cpu_rq(cpu);
5944 raw_spin_lock_irqsave(&rq->lock, flags); 5822 raw_spin_lock_irqsave(&rq->lock, flags);
5945 if (rq->rd) { 5823 if (rq->rd) {
5946 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); 5824 BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -7757,10 +7635,8 @@ void __init sched_init(void)
7757 rq->push_cpu = 0; 7635 rq->push_cpu = 0;
7758 rq->cpu = i; 7636 rq->cpu = i;
7759 rq->online = 0; 7637 rq->online = 0;
7760 rq->migration_thread = NULL;
7761 rq->idle_stamp = 0; 7638 rq->idle_stamp = 0;
7762 rq->avg_idle = 2*sysctl_sched_migration_cost; 7639 rq->avg_idle = 2*sysctl_sched_migration_cost;
7763 INIT_LIST_HEAD(&rq->migration_queue);
7764 rq_attach_root(rq, &def_root_domain); 7640 rq_attach_root(rq, &def_root_domain);
7765#endif 7641#endif
7766 init_rq_hrtick(rq); 7642 init_rq_hrtick(rq);
@@ -9054,43 +8930,39 @@ struct cgroup_subsys cpuacct_subsys = {
9054 8930
9055#ifndef CONFIG_SMP 8931#ifndef CONFIG_SMP
9056 8932
9057int rcu_expedited_torture_stats(char *page)
9058{
9059 return 0;
9060}
9061EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
9062
9063void synchronize_sched_expedited(void) 8933void synchronize_sched_expedited(void)
9064{ 8934{
8935 /*
8936 * There must be a full memory barrier on each affected CPU
8937 * between the time that try_stop_cpus() is called and the
8938 * time that it returns.
8939 *
8940 * In the current initial implementation of cpu_stop, the
8941 * above condition is already met when the control reaches
8942 * this point and the following smp_mb() is not strictly
8943 * necessary. Do smp_mb() anyway for documentation and
8944 * robustness against future implementation changes.
8945 */
8946 smp_mb();
9065} 8947}
9066EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 8948EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
9067 8949
9068#else /* #ifndef CONFIG_SMP */ 8950#else /* #ifndef CONFIG_SMP */
9069 8951
9070static DEFINE_PER_CPU(struct migration_req, rcu_migration_req); 8952static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
9071static DEFINE_MUTEX(rcu_sched_expedited_mutex);
9072
9073#define RCU_EXPEDITED_STATE_POST -2
9074#define RCU_EXPEDITED_STATE_IDLE -1
9075 8953
9076static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; 8954static int synchronize_sched_expedited_cpu_stop(void *data)
9077
9078int rcu_expedited_torture_stats(char *page)
9079{ 8955{
9080 int cnt = 0; 8956 static DEFINE_SPINLOCK(done_mask_lock);
9081 int cpu; 8957 struct cpumask *done_mask = data;
9082 8958
9083 cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state); 8959 if (done_mask) {
9084 for_each_online_cpu(cpu) { 8960 spin_lock(&done_mask_lock);
9085 cnt += sprintf(&page[cnt], " %d:%d", 8961 cpumask_set_cpu(smp_processor_id(), done_mask);
9086 cpu, per_cpu(rcu_migration_req, cpu).dest_cpu); 8962 spin_unlock(&done_mask_lock);
9087 } 8963 }
9088 cnt += sprintf(&page[cnt], "\n"); 8964 return 0;
9089 return cnt;
9090} 8965}
9091EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
9092
9093static long synchronize_sched_expedited_count;
9094 8966
9095/* 8967/*
9096 * Wait for an rcu-sched grace period to elapse, but use "big hammer" 8968 * Wait for an rcu-sched grace period to elapse, but use "big hammer"
@@ -9104,60 +8976,55 @@ static long synchronize_sched_expedited_count;
9104 */ 8976 */
9105void synchronize_sched_expedited(void) 8977void synchronize_sched_expedited(void)
9106{ 8978{
9107 int cpu; 8979 cpumask_var_t done_mask_var;
9108 unsigned long flags; 8980 struct cpumask *done_mask = NULL;
9109 bool need_full_sync = 0; 8981 int snap, trycount = 0;
9110 struct rq *rq; 8982
9111 struct migration_req *req; 8983 /*
9112 long snap; 8984 * done_mask is used to check that all cpus actually have
9113 int trycount = 0; 8985 * finished running the stopper, which is guaranteed by
8986 * stop_cpus() if it's called with cpu hotplug blocked. Keep
8987 * the paranoia for now but it's best effort if cpumask is off
8988 * stack.
8989 */
8990 if (zalloc_cpumask_var(&done_mask_var, GFP_ATOMIC))
8991 done_mask = done_mask_var;
9114 8992
9115 smp_mb(); /* ensure prior mod happens before capturing snap. */ 8993 smp_mb(); /* ensure prior mod happens before capturing snap. */
9116 snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1; 8994 snap = atomic_read(&synchronize_sched_expedited_count) + 1;
9117 get_online_cpus(); 8995 get_online_cpus();
9118 while (!mutex_trylock(&rcu_sched_expedited_mutex)) { 8996 while (try_stop_cpus(cpu_online_mask,
8997 synchronize_sched_expedited_cpu_stop,
8998 done_mask) == -EAGAIN) {
9119 put_online_cpus(); 8999 put_online_cpus();
9120 if (trycount++ < 10) 9000 if (trycount++ < 10)
9121 udelay(trycount * num_online_cpus()); 9001 udelay(trycount * num_online_cpus());
9122 else { 9002 else {
9123 synchronize_sched(); 9003 synchronize_sched();
9124 return; 9004 goto free_out;
9125 } 9005 }
9126 if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) { 9006 if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
9127 smp_mb(); /* ensure test happens before caller kfree */ 9007 smp_mb(); /* ensure test happens before caller kfree */
9128 return; 9008 goto free_out;
9129 } 9009 }
9130 get_online_cpus(); 9010 get_online_cpus();
9131 } 9011 }
9132 rcu_expedited_state = RCU_EXPEDITED_STATE_POST; 9012 atomic_inc(&synchronize_sched_expedited_count);
9133 for_each_online_cpu(cpu) { 9013 if (done_mask)
9134 rq = cpu_rq(cpu); 9014 cpumask_xor(done_mask, done_mask, cpu_online_mask);
9135 req = &per_cpu(rcu_migration_req, cpu);
9136 init_completion(&req->done);
9137 req->task = NULL;
9138 req->dest_cpu = RCU_MIGRATION_NEED_QS;
9139 raw_spin_lock_irqsave(&rq->lock, flags);
9140 list_add(&req->list, &rq->migration_queue);
9141 raw_spin_unlock_irqrestore(&rq->lock, flags);
9142 wake_up_process(rq->migration_thread);
9143 }
9144 for_each_online_cpu(cpu) {
9145 rcu_expedited_state = cpu;
9146 req = &per_cpu(rcu_migration_req, cpu);
9147 rq = cpu_rq(cpu);
9148 wait_for_completion(&req->done);
9149 raw_spin_lock_irqsave(&rq->lock, flags);
9150 if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
9151 need_full_sync = 1;
9152 req->dest_cpu = RCU_MIGRATION_IDLE;
9153 raw_spin_unlock_irqrestore(&rq->lock, flags);
9154 }
9155 rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
9156 synchronize_sched_expedited_count++;
9157 mutex_unlock(&rcu_sched_expedited_mutex);
9158 put_online_cpus(); 9015 put_online_cpus();
9159 if (need_full_sync) 9016
9017 /* paranoia - this can't happen */
9018 if (done_mask && cpumask_weight(done_mask)) {
9019 char buf[80];
9020
9021 cpulist_scnprintf(buf, sizeof(buf), done_mask);
9022 WARN_ONCE(1, "synchronize_sched_expedited: cpu online and done masks disagree on %d cpus: %s\n",
9023 cpumask_weight(done_mask), buf);
9160 synchronize_sched(); 9024 synchronize_sched();
9025 }
9026free_out:
9027 free_cpumask_var(done_mask_var);
9161} 9028}
9162EXPORT_SYMBOL_GPL(synchronize_sched_expedited); 9029EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
9163 9030
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cbd8b8a296d1..217e4a9393e4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
2798 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); 2798 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
2799} 2799}
2800 2800
2801static int active_load_balance_cpu_stop(void *data);
2802
2801/* 2803/*
2802 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2804 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2803 * tasks if there is an imbalance. 2805 * tasks if there is an imbalance.
@@ -2887,8 +2889,9 @@ redo:
2887 if (need_active_balance(sd, sd_idle, idle)) { 2889 if (need_active_balance(sd, sd_idle, idle)) {
2888 raw_spin_lock_irqsave(&busiest->lock, flags); 2890 raw_spin_lock_irqsave(&busiest->lock, flags);
2889 2891
2890 /* don't kick the migration_thread, if the curr 2892 /* don't kick the active_load_balance_cpu_stop,
2891 * task on busiest cpu can't be moved to this_cpu 2893 * if the curr task on busiest cpu can't be
2894 * moved to this_cpu
2892 */ 2895 */
2893 if (!cpumask_test_cpu(this_cpu, 2896 if (!cpumask_test_cpu(this_cpu,
2894 &busiest->curr->cpus_allowed)) { 2897 &busiest->curr->cpus_allowed)) {
@@ -2898,14 +2901,22 @@ redo:
2898 goto out_one_pinned; 2901 goto out_one_pinned;
2899 } 2902 }
2900 2903
2904 /*
2905 * ->active_balance synchronizes accesses to
2906 * ->active_balance_work. Once set, it's cleared
2907 * only after active load balance is finished.
2908 */
2901 if (!busiest->active_balance) { 2909 if (!busiest->active_balance) {
2902 busiest->active_balance = 1; 2910 busiest->active_balance = 1;
2903 busiest->push_cpu = this_cpu; 2911 busiest->push_cpu = this_cpu;
2904 active_balance = 1; 2912 active_balance = 1;
2905 } 2913 }
2906 raw_spin_unlock_irqrestore(&busiest->lock, flags); 2914 raw_spin_unlock_irqrestore(&busiest->lock, flags);
2915
2907 if (active_balance) 2916 if (active_balance)
2908 wake_up_process(busiest->migration_thread); 2917 stop_one_cpu_nowait(cpu_of(busiest),
2918 active_load_balance_cpu_stop, busiest,
2919 &busiest->active_balance_work);
2909 2920
2910 /* 2921 /*
2911 * We've kicked active balancing, reset the failure 2922 * We've kicked active balancing, reset the failure
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3012} 3023}
3013 3024
3014/* 3025/*
3015 * active_load_balance is run by migration threads. It pushes running tasks 3026 * active_load_balance_cpu_stop is run by cpu stopper. It pushes
3016 * off the busiest CPU onto idle CPUs. It requires at least 1 task to be 3027 * running tasks off the busiest CPU onto idle CPUs. It requires at
3017 * running on each physical CPU where possible, and avoids physical / 3028 * least 1 task to be running on each physical CPU where possible, and
3018 * logical imbalances. 3029 * avoids physical / logical imbalances.
3019 *
3020 * Called with busiest_rq locked.
3021 */ 3030 */
3022static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) 3031static int active_load_balance_cpu_stop(void *data)
3023{ 3032{
3033 struct rq *busiest_rq = data;
3034 int busiest_cpu = cpu_of(busiest_rq);
3024 int target_cpu = busiest_rq->push_cpu; 3035 int target_cpu = busiest_rq->push_cpu;
3036 struct rq *target_rq = cpu_rq(target_cpu);
3025 struct sched_domain *sd; 3037 struct sched_domain *sd;
3026 struct rq *target_rq; 3038
3039 raw_spin_lock_irq(&busiest_rq->lock);
3040
3041 /* make sure the requested cpu hasn't gone down in the meantime */
3042 if (unlikely(busiest_cpu != smp_processor_id() ||
3043 !busiest_rq->active_balance))
3044 goto out_unlock;
3027 3045
3028 /* Is there any task to move? */ 3046 /* Is there any task to move? */
3029 if (busiest_rq->nr_running <= 1) 3047 if (busiest_rq->nr_running <= 1)
3030 return; 3048 goto out_unlock;
3031
3032 target_rq = cpu_rq(target_cpu);
3033 3049
3034 /* 3050 /*
3035 * This condition is "impossible", if it occurs 3051 * This condition is "impossible", if it occurs
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3058 schedstat_inc(sd, alb_failed); 3074 schedstat_inc(sd, alb_failed);
3059 } 3075 }
3060 double_unlock_balance(busiest_rq, target_rq); 3076 double_unlock_balance(busiest_rq, target_rq);
3077out_unlock:
3078 busiest_rq->active_balance = 0;
3079 raw_spin_unlock_irq(&busiest_rq->lock);
3080 return 0;
3061} 3081}
3062 3082
3063#ifdef CONFIG_NO_HZ 3083#ifdef CONFIG_NO_HZ
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 884c7a1afeed..5b20141a5ec1 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,7 +301,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
301 case CPU_UP_PREPARE: 301 case CPU_UP_PREPARE:
302 BUG_ON(stopper->thread || stopper->enabled || 302 BUG_ON(stopper->thread || stopper->enabled ||
303 !list_empty(&stopper->works)); 303 !list_empty(&stopper->works));
304 p = kthread_create(cpu_stopper_thread, stopper, "stopper/%d", 304 p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
305 cpu); 305 cpu);
306 if (IS_ERR(p)) 306 if (IS_ERR(p))
307 return NOTIFY_BAD; 307 return NOTIFY_BAD;