diff options
| -rw-r--r-- | Documentation/RCU/torture.txt | 10 | ||||
| -rw-r--r-- | include/linux/rcutiny.h | 2 | ||||
| -rw-r--r-- | include/linux/rcutree.h | 1 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 2 | ||||
| -rw-r--r-- | kernel/sched.c | 315 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 48 | ||||
| -rw-r--r-- | kernel/stop_machine.c | 2 |
7 files changed, 127 insertions, 253 deletions
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 0e50bc2aa1e2..5d9016795fd8 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
| @@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following: | |||
| 182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 | 182 | sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0 |
| 183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 | 183 | sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0 |
| 184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 | 184 | sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0 |
| 185 | state: -1 / 0:0 3:0 4:0 | ||
| 186 | |||
| 187 | As before, the first four lines are similar to those for RCU. | ||
| 188 | The last line shows the task-migration state. The first number is | ||
| 189 | -1 if synchronize_sched_expedited() is idle, -2 if in the process of | ||
| 190 | posting wakeups to the migration kthreads, and N when waiting on CPU N. | ||
| 191 | Each of the colon-separated fields following the "/" is a CPU:state pair. | ||
| 192 | Valid states are "0" for idle, "1" for waiting for quiescent state, | ||
| 193 | "2" for passed through quiescent state, and "3" when a race with a | ||
| 194 | CPU-hotplug event forces use of the synchronize_sched() primitive. | ||
| 195 | 185 | ||
| 196 | 186 | ||
| 197 | USAGE | 187 | USAGE |
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index a5195875480a..0006b2df00e1 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h | |||
| @@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void) | |||
| 60 | return 0; | 60 | return 0; |
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | extern int rcu_expedited_torture_stats(char *page); | ||
| 64 | |||
| 65 | static inline void rcu_force_quiescent_state(void) | 63 | static inline void rcu_force_quiescent_state(void) |
| 66 | { | 64 | { |
| 67 | } | 65 | } |
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 42cc3a04779e..24e467e526b8 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h | |||
| @@ -35,7 +35,6 @@ struct notifier_block; | |||
| 35 | extern void rcu_sched_qs(int cpu); | 35 | extern void rcu_sched_qs(int cpu); |
| 36 | extern void rcu_bh_qs(int cpu); | 36 | extern void rcu_bh_qs(int cpu); |
| 37 | extern int rcu_needs_cpu(int cpu); | 37 | extern int rcu_needs_cpu(int cpu); |
| 38 | extern int rcu_expedited_torture_stats(char *page); | ||
| 39 | 38 | ||
| 40 | #ifdef CONFIG_TREE_PREEMPT_RCU | 39 | #ifdef CONFIG_TREE_PREEMPT_RCU |
| 41 | 40 | ||
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 58df55bf83ed..2b676f3a0f26 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = { | |||
| 669 | .sync = synchronize_sched_expedited, | 669 | .sync = synchronize_sched_expedited, |
| 670 | .cb_barrier = NULL, | 670 | .cb_barrier = NULL, |
| 671 | .fqs = rcu_sched_force_quiescent_state, | 671 | .fqs = rcu_sched_force_quiescent_state, |
| 672 | .stats = rcu_expedited_torture_stats, | 672 | .stats = NULL, |
| 673 | .irq_capable = 1, | 673 | .irq_capable = 1, |
| 674 | .name = "sched_expedited" | 674 | .name = "sched_expedited" |
| 675 | }; | 675 | }; |
diff --git a/kernel/sched.c b/kernel/sched.c index 4956ed092838..f1d577a0a8ab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -55,9 +55,9 @@ | |||
| 55 | #include <linux/cpu.h> | 55 | #include <linux/cpu.h> |
| 56 | #include <linux/cpuset.h> | 56 | #include <linux/cpuset.h> |
| 57 | #include <linux/percpu.h> | 57 | #include <linux/percpu.h> |
| 58 | #include <linux/kthread.h> | ||
| 59 | #include <linux/proc_fs.h> | 58 | #include <linux/proc_fs.h> |
| 60 | #include <linux/seq_file.h> | 59 | #include <linux/seq_file.h> |
| 60 | #include <linux/stop_machine.h> | ||
| 61 | #include <linux/sysctl.h> | 61 | #include <linux/sysctl.h> |
| 62 | #include <linux/syscalls.h> | 62 | #include <linux/syscalls.h> |
| 63 | #include <linux/times.h> | 63 | #include <linux/times.h> |
| @@ -539,15 +539,13 @@ struct rq { | |||
| 539 | int post_schedule; | 539 | int post_schedule; |
| 540 | int active_balance; | 540 | int active_balance; |
| 541 | int push_cpu; | 541 | int push_cpu; |
| 542 | struct cpu_stop_work active_balance_work; | ||
| 542 | /* cpu of this runqueue: */ | 543 | /* cpu of this runqueue: */ |
| 543 | int cpu; | 544 | int cpu; |
| 544 | int online; | 545 | int online; |
| 545 | 546 | ||
| 546 | unsigned long avg_load_per_task; | 547 | unsigned long avg_load_per_task; |
| 547 | 548 | ||
| 548 | struct task_struct *migration_thread; | ||
| 549 | struct list_head migration_queue; | ||
| 550 | |||
| 551 | u64 rt_avg; | 549 | u64 rt_avg; |
| 552 | u64 age_stamp; | 550 | u64 age_stamp; |
| 553 | u64 idle_stamp; | 551 | u64 idle_stamp; |
| @@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2037 | __set_task_cpu(p, new_cpu); | 2035 | __set_task_cpu(p, new_cpu); |
| 2038 | } | 2036 | } |
| 2039 | 2037 | ||
| 2040 | struct migration_req { | 2038 | struct migration_arg { |
| 2041 | struct list_head list; | ||
| 2042 | |||
| 2043 | struct task_struct *task; | 2039 | struct task_struct *task; |
| 2044 | int dest_cpu; | 2040 | int dest_cpu; |
| 2045 | |||
| 2046 | struct completion done; | ||
| 2047 | }; | 2041 | }; |
| 2048 | 2042 | ||
| 2043 | static int migration_cpu_stop(void *data); | ||
| 2044 | |||
| 2049 | /* | 2045 | /* |
| 2050 | * The task's runqueue lock must be held. | 2046 | * The task's runqueue lock must be held. |
| 2051 | * Returns true if you have to wait for migration thread. | 2047 | * Returns true if you have to wait for migration thread. |
| 2052 | */ | 2048 | */ |
| 2053 | static int | 2049 | static bool migrate_task(struct task_struct *p, int dest_cpu) |
| 2054 | migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | ||
| 2055 | { | 2050 | { |
| 2056 | struct rq *rq = task_rq(p); | 2051 | struct rq *rq = task_rq(p); |
| 2057 | 2052 | ||
| @@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
| 2059 | * If the task is not on a runqueue (and not running), then | 2054 | * If the task is not on a runqueue (and not running), then |
| 2060 | * the next wake-up will properly place the task. | 2055 | * the next wake-up will properly place the task. |
| 2061 | */ | 2056 | */ |
| 2062 | if (!p->se.on_rq && !task_running(rq, p)) | 2057 | return p->se.on_rq || task_running(rq, p); |
| 2063 | return 0; | ||
| 2064 | |||
| 2065 | init_completion(&req->done); | ||
| 2066 | req->task = p; | ||
| 2067 | req->dest_cpu = dest_cpu; | ||
| 2068 | list_add(&req->list, &rq->migration_queue); | ||
| 2069 | |||
| 2070 | return 1; | ||
| 2071 | } | 2058 | } |
| 2072 | 2059 | ||
| 2073 | /* | 2060 | /* |
| @@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq) | |||
| 3110 | void sched_exec(void) | 3097 | void sched_exec(void) |
| 3111 | { | 3098 | { |
| 3112 | struct task_struct *p = current; | 3099 | struct task_struct *p = current; |
| 3113 | struct migration_req req; | ||
| 3114 | unsigned long flags; | 3100 | unsigned long flags; |
| 3115 | struct rq *rq; | 3101 | struct rq *rq; |
| 3116 | int dest_cpu; | 3102 | int dest_cpu; |
| @@ -3124,17 +3110,11 @@ void sched_exec(void) | |||
| 3124 | * select_task_rq() can race against ->cpus_allowed | 3110 | * select_task_rq() can race against ->cpus_allowed |
| 3125 | */ | 3111 | */ |
| 3126 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | 3112 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && |
| 3127 | likely(cpu_active(dest_cpu)) && | 3113 | likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) { |
| 3128 | migrate_task(p, dest_cpu, &req)) { | 3114 | struct migration_arg arg = { p, dest_cpu }; |
| 3129 | /* Need to wait for migration thread (might exit: take ref). */ | ||
| 3130 | struct task_struct *mt = rq->migration_thread; | ||
| 3131 | 3115 | ||
| 3132 | get_task_struct(mt); | ||
| 3133 | task_rq_unlock(rq, &flags); | 3116 | task_rq_unlock(rq, &flags); |
| 3134 | wake_up_process(mt); | 3117 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
| 3135 | put_task_struct(mt); | ||
| 3136 | wait_for_completion(&req.done); | ||
| 3137 | |||
| 3138 | return; | 3118 | return; |
| 3139 | } | 3119 | } |
| 3140 | unlock: | 3120 | unlock: |
| @@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void) | |||
| 5290 | /* | 5270 | /* |
| 5291 | * This is how migration works: | 5271 | * This is how migration works: |
| 5292 | * | 5272 | * |
| 5293 | * 1) we queue a struct migration_req structure in the source CPU's | 5273 | * 1) we invoke migration_cpu_stop() on the target CPU using |
| 5294 | * runqueue and wake up that CPU's migration thread. | 5274 | * stop_one_cpu(). |
| 5295 | * 2) we down() the locked semaphore => thread blocks. | 5275 | * 2) stopper starts to run (implicitly forcing the migrated thread |
| 5296 | * 3) migration thread wakes up (implicitly it forces the migrated | 5276 | * off the CPU) |
| 5297 | * thread off the CPU) | 5277 | * 3) it checks whether the migrated task is still in the wrong runqueue. |
| 5298 | * 4) it gets the migration request and checks whether the migrated | 5278 | * 4) if it's in the wrong runqueue then the migration thread removes |
| 5299 | * task is still in the wrong runqueue. | ||
| 5300 | * 5) if it's in the wrong runqueue then the migration thread removes | ||
| 5301 | * it and puts it into the right queue. | 5279 | * it and puts it into the right queue. |
| 5302 | * 6) migration thread up()s the semaphore. | 5280 | * 5) stopper completes and stop_one_cpu() returns and the migration |
| 5303 | * 7) we wake up and the migration is done. | 5281 | * is done. |
| 5304 | */ | 5282 | */ |
| 5305 | 5283 | ||
| 5306 | /* | 5284 | /* |
| @@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void) | |||
| 5314 | */ | 5292 | */ |
| 5315 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | 5293 | int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) |
| 5316 | { | 5294 | { |
| 5317 | struct migration_req req; | ||
| 5318 | unsigned long flags; | 5295 | unsigned long flags; |
| 5319 | struct rq *rq; | 5296 | struct rq *rq; |
| 5297 | unsigned int dest_cpu; | ||
| 5320 | int ret = 0; | 5298 | int ret = 0; |
| 5321 | 5299 | ||
| 5322 | /* | 5300 | /* |
| @@ -5354,15 +5332,12 @@ again: | |||
| 5354 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 5332 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
| 5355 | goto out; | 5333 | goto out; |
| 5356 | 5334 | ||
| 5357 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { | 5335 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
| 5336 | if (migrate_task(p, dest_cpu)) { | ||
| 5337 | struct migration_arg arg = { p, dest_cpu }; | ||
| 5358 | /* Need help from migration thread: drop lock and wait. */ | 5338 | /* Need help from migration thread: drop lock and wait. */ |
| 5359 | struct task_struct *mt = rq->migration_thread; | ||
| 5360 | |||
| 5361 | get_task_struct(mt); | ||
| 5362 | task_rq_unlock(rq, &flags); | 5339 | task_rq_unlock(rq, &flags); |
| 5363 | wake_up_process(mt); | 5340 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
| 5364 | put_task_struct(mt); | ||
| 5365 | wait_for_completion(&req.done); | ||
| 5366 | tlb_migrate_finish(p->mm); | 5341 | tlb_migrate_finish(p->mm); |
| 5367 | return 0; | 5342 | return 0; |
| 5368 | } | 5343 | } |
| @@ -5420,70 +5395,22 @@ fail: | |||
| 5420 | return ret; | 5395 | return ret; |
| 5421 | } | 5396 | } |
| 5422 | 5397 | ||
| 5423 | #define RCU_MIGRATION_IDLE 0 | ||
| 5424 | #define RCU_MIGRATION_NEED_QS 1 | ||
| 5425 | #define RCU_MIGRATION_GOT_QS 2 | ||
| 5426 | #define RCU_MIGRATION_MUST_SYNC 3 | ||
| 5427 | |||
| 5428 | /* | 5398 | /* |
| 5429 | * migration_thread - this is a highprio system thread that performs | 5399 | * migration_cpu_stop - this will be executed by a highprio stopper thread |
| 5430 | * thread migration by bumping thread off CPU then 'pushing' onto | 5400 | * and performs thread migration by bumping thread off CPU then |
| 5431 | * another runqueue. | 5401 | * 'pushing' onto another runqueue. |
| 5432 | */ | 5402 | */ |
| 5433 | static int migration_thread(void *data) | 5403 | static int migration_cpu_stop(void *data) |
| 5434 | { | 5404 | { |
| 5435 | int badcpu; | 5405 | struct migration_arg *arg = data; |
| 5436 | int cpu = (long)data; | ||
| 5437 | struct rq *rq; | ||
| 5438 | |||
| 5439 | rq = cpu_rq(cpu); | ||
| 5440 | BUG_ON(rq->migration_thread != current); | ||
| 5441 | |||
| 5442 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 5443 | while (!kthread_should_stop()) { | ||
| 5444 | struct migration_req *req; | ||
| 5445 | struct list_head *head; | ||
| 5446 | |||
| 5447 | raw_spin_lock_irq(&rq->lock); | ||
| 5448 | |||
| 5449 | if (cpu_is_offline(cpu)) { | ||
| 5450 | raw_spin_unlock_irq(&rq->lock); | ||
| 5451 | break; | ||
| 5452 | } | ||
| 5453 | |||
| 5454 | if (rq->active_balance) { | ||
| 5455 | active_load_balance(rq, cpu); | ||
| 5456 | rq->active_balance = 0; | ||
| 5457 | } | ||
| 5458 | |||
| 5459 | head = &rq->migration_queue; | ||
| 5460 | |||
| 5461 | if (list_empty(head)) { | ||
| 5462 | raw_spin_unlock_irq(&rq->lock); | ||
| 5463 | schedule(); | ||
| 5464 | set_current_state(TASK_INTERRUPTIBLE); | ||
| 5465 | continue; | ||
| 5466 | } | ||
| 5467 | req = list_entry(head->next, struct migration_req, list); | ||
| 5468 | list_del_init(head->next); | ||
| 5469 | |||
| 5470 | if (req->task != NULL) { | ||
| 5471 | raw_spin_unlock(&rq->lock); | ||
| 5472 | __migrate_task(req->task, cpu, req->dest_cpu); | ||
| 5473 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | ||
| 5474 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | ||
| 5475 | raw_spin_unlock(&rq->lock); | ||
| 5476 | } else { | ||
| 5477 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | ||
| 5478 | raw_spin_unlock(&rq->lock); | ||
| 5479 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | ||
| 5480 | } | ||
| 5481 | local_irq_enable(); | ||
| 5482 | |||
| 5483 | complete(&req->done); | ||
| 5484 | } | ||
| 5485 | __set_current_state(TASK_RUNNING); | ||
| 5486 | 5406 | ||
| 5407 | /* | ||
| 5408 | * The original target cpu might have gone down and we might | ||
| 5409 | * be on another cpu but it doesn't matter. | ||
| 5410 | */ | ||
| 5411 | local_irq_disable(); | ||
| 5412 | __migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu); | ||
| 5413 | local_irq_enable(); | ||
| 5487 | return 0; | 5414 | return 0; |
| 5488 | } | 5415 | } |
| 5489 | 5416 | ||
| @@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq) | |||
| 5850 | static int __cpuinit | 5777 | static int __cpuinit |
| 5851 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | 5778 | migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 5852 | { | 5779 | { |
| 5853 | struct task_struct *p; | ||
| 5854 | int cpu = (long)hcpu; | 5780 | int cpu = (long)hcpu; |
| 5855 | unsigned long flags; | 5781 | unsigned long flags; |
| 5856 | struct rq *rq; | 5782 | struct rq *rq = cpu_rq(cpu); |
| 5857 | 5783 | ||
| 5858 | switch (action) { | 5784 | switch (action) { |
| 5859 | 5785 | ||
| 5860 | case CPU_UP_PREPARE: | 5786 | case CPU_UP_PREPARE: |
| 5861 | case CPU_UP_PREPARE_FROZEN: | 5787 | case CPU_UP_PREPARE_FROZEN: |
| 5862 | p = kthread_create(migration_thread, hcpu, "migration/%d", cpu); | ||
| 5863 | if (IS_ERR(p)) | ||
| 5864 | return NOTIFY_BAD; | ||
| 5865 | kthread_bind(p, cpu); | ||
| 5866 | /* Must be high prio: stop_machine expects to yield to it. */ | ||
| 5867 | rq = task_rq_lock(p, &flags); | ||
| 5868 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | ||
| 5869 | task_rq_unlock(rq, &flags); | ||
| 5870 | get_task_struct(p); | ||
| 5871 | cpu_rq(cpu)->migration_thread = p; | ||
| 5872 | rq->calc_load_update = calc_load_update; | 5788 | rq->calc_load_update = calc_load_update; |
| 5873 | break; | 5789 | break; |
| 5874 | 5790 | ||
| 5875 | case CPU_ONLINE: | 5791 | case CPU_ONLINE: |
| 5876 | case CPU_ONLINE_FROZEN: | 5792 | case CPU_ONLINE_FROZEN: |
| 5877 | /* Strictly unnecessary, as first user will wake it. */ | ||
| 5878 | wake_up_process(cpu_rq(cpu)->migration_thread); | ||
| 5879 | |||
| 5880 | /* Update our root-domain */ | 5793 | /* Update our root-domain */ |
| 5881 | rq = cpu_rq(cpu); | ||
| 5882 | raw_spin_lock_irqsave(&rq->lock, flags); | 5794 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 5883 | if (rq->rd) { | 5795 | if (rq->rd) { |
| 5884 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5796 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
| @@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5889 | break; | 5801 | break; |
| 5890 | 5802 | ||
| 5891 | #ifdef CONFIG_HOTPLUG_CPU | 5803 | #ifdef CONFIG_HOTPLUG_CPU |
| 5892 | case CPU_UP_CANCELED: | ||
| 5893 | case CPU_UP_CANCELED_FROZEN: | ||
| 5894 | if (!cpu_rq(cpu)->migration_thread) | ||
| 5895 | break; | ||
| 5896 | /* Unbind it from offline cpu so it can run. Fall thru. */ | ||
| 5897 | kthread_bind(cpu_rq(cpu)->migration_thread, | ||
| 5898 | cpumask_any(cpu_online_mask)); | ||
| 5899 | kthread_stop(cpu_rq(cpu)->migration_thread); | ||
| 5900 | put_task_struct(cpu_rq(cpu)->migration_thread); | ||
| 5901 | cpu_rq(cpu)->migration_thread = NULL; | ||
| 5902 | break; | ||
| 5903 | |||
| 5904 | case CPU_DEAD: | 5804 | case CPU_DEAD: |
| 5905 | case CPU_DEAD_FROZEN: | 5805 | case CPU_DEAD_FROZEN: |
| 5906 | migrate_live_tasks(cpu); | 5806 | migrate_live_tasks(cpu); |
| 5907 | rq = cpu_rq(cpu); | ||
| 5908 | kthread_stop(rq->migration_thread); | ||
| 5909 | put_task_struct(rq->migration_thread); | ||
| 5910 | rq->migration_thread = NULL; | ||
| 5911 | /* Idle task back to normal (off runqueue, low prio) */ | 5807 | /* Idle task back to normal (off runqueue, low prio) */ |
| 5912 | raw_spin_lock_irq(&rq->lock); | 5808 | raw_spin_lock_irq(&rq->lock); |
| 5913 | deactivate_task(rq, rq->idle, 0); | 5809 | deactivate_task(rq, rq->idle, 0); |
| @@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 5918 | migrate_nr_uninterruptible(rq); | 5814 | migrate_nr_uninterruptible(rq); |
| 5919 | BUG_ON(rq->nr_running != 0); | 5815 | BUG_ON(rq->nr_running != 0); |
| 5920 | calc_global_load_remove(rq); | 5816 | calc_global_load_remove(rq); |
| 5921 | /* | ||
| 5922 | * No need to migrate the tasks: it was best-effort if | ||
| 5923 | * they didn't take sched_hotcpu_mutex. Just wake up | ||
| 5924 | * the requestors. | ||
| 5925 | */ | ||
| 5926 | raw_spin_lock_irq(&rq->lock); | ||
| 5927 | while (!list_empty(&rq->migration_queue)) { | ||
| 5928 | struct migration_req *req; | ||
| 5929 | |||
| 5930 | req = list_entry(rq->migration_queue.next, | ||
| 5931 | struct migration_req, list); | ||
| 5932 | list_del_init(&req->list); | ||
| 5933 | raw_spin_unlock_irq(&rq->lock); | ||
| 5934 | complete(&req->done); | ||
| 5935 | raw_spin_lock_irq(&rq->lock); | ||
| 5936 | } | ||
| 5937 | raw_spin_unlock_irq(&rq->lock); | ||
| 5938 | break; | 5817 | break; |
| 5939 | 5818 | ||
| 5940 | case CPU_DYING: | 5819 | case CPU_DYING: |
| 5941 | case CPU_DYING_FROZEN: | 5820 | case CPU_DYING_FROZEN: |
| 5942 | /* Update our root-domain */ | 5821 | /* Update our root-domain */ |
| 5943 | rq = cpu_rq(cpu); | ||
| 5944 | raw_spin_lock_irqsave(&rq->lock, flags); | 5822 | raw_spin_lock_irqsave(&rq->lock, flags); |
| 5945 | if (rq->rd) { | 5823 | if (rq->rd) { |
| 5946 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 5824 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
| @@ -7757,10 +7635,8 @@ void __init sched_init(void) | |||
| 7757 | rq->push_cpu = 0; | 7635 | rq->push_cpu = 0; |
| 7758 | rq->cpu = i; | 7636 | rq->cpu = i; |
| 7759 | rq->online = 0; | 7637 | rq->online = 0; |
| 7760 | rq->migration_thread = NULL; | ||
| 7761 | rq->idle_stamp = 0; | 7638 | rq->idle_stamp = 0; |
| 7762 | rq->avg_idle = 2*sysctl_sched_migration_cost; | 7639 | rq->avg_idle = 2*sysctl_sched_migration_cost; |
| 7763 | INIT_LIST_HEAD(&rq->migration_queue); | ||
| 7764 | rq_attach_root(rq, &def_root_domain); | 7640 | rq_attach_root(rq, &def_root_domain); |
| 7765 | #endif | 7641 | #endif |
| 7766 | init_rq_hrtick(rq); | 7642 | init_rq_hrtick(rq); |
| @@ -9054,43 +8930,39 @@ struct cgroup_subsys cpuacct_subsys = { | |||
| 9054 | 8930 | ||
| 9055 | #ifndef CONFIG_SMP | 8931 | #ifndef CONFIG_SMP |
| 9056 | 8932 | ||
| 9057 | int rcu_expedited_torture_stats(char *page) | ||
| 9058 | { | ||
| 9059 | return 0; | ||
| 9060 | } | ||
| 9061 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
| 9062 | |||
| 9063 | void synchronize_sched_expedited(void) | 8933 | void synchronize_sched_expedited(void) |
| 9064 | { | 8934 | { |
| 8935 | /* | ||
| 8936 | * There must be a full memory barrier on each affected CPU | ||
| 8937 | * between the time that try_stop_cpus() is called and the | ||
| 8938 | * time that it returns. | ||
| 8939 | * | ||
| 8940 | * In the current initial implementation of cpu_stop, the | ||
| 8941 | * above condition is already met when the control reaches | ||
| 8942 | * this point and the following smp_mb() is not strictly | ||
| 8943 | * necessary. Do smp_mb() anyway for documentation and | ||
| 8944 | * robustness against future implementation changes. | ||
| 8945 | */ | ||
| 8946 | smp_mb(); | ||
| 9065 | } | 8947 | } |
| 9066 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 8948 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
| 9067 | 8949 | ||
| 9068 | #else /* #ifndef CONFIG_SMP */ | 8950 | #else /* #ifndef CONFIG_SMP */ |
| 9069 | 8951 | ||
| 9070 | static DEFINE_PER_CPU(struct migration_req, rcu_migration_req); | 8952 | static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0); |
| 9071 | static DEFINE_MUTEX(rcu_sched_expedited_mutex); | ||
| 9072 | |||
| 9073 | #define RCU_EXPEDITED_STATE_POST -2 | ||
| 9074 | #define RCU_EXPEDITED_STATE_IDLE -1 | ||
| 9075 | 8953 | ||
| 9076 | static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | 8954 | static int synchronize_sched_expedited_cpu_stop(void *data) |
| 9077 | |||
| 9078 | int rcu_expedited_torture_stats(char *page) | ||
| 9079 | { | 8955 | { |
| 9080 | int cnt = 0; | 8956 | static DEFINE_SPINLOCK(done_mask_lock); |
| 9081 | int cpu; | 8957 | struct cpumask *done_mask = data; |
| 9082 | 8958 | ||
| 9083 | cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state); | 8959 | if (done_mask) { |
| 9084 | for_each_online_cpu(cpu) { | 8960 | spin_lock(&done_mask_lock); |
| 9085 | cnt += sprintf(&page[cnt], " %d:%d", | 8961 | cpumask_set_cpu(smp_processor_id(), done_mask); |
| 9086 | cpu, per_cpu(rcu_migration_req, cpu).dest_cpu); | 8962 | spin_unlock(&done_mask_lock); |
| 9087 | } | 8963 | } |
| 9088 | cnt += sprintf(&page[cnt], "\n"); | 8964 | return 0; |
| 9089 | return cnt; | ||
| 9090 | } | 8965 | } |
| 9091 | EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats); | ||
| 9092 | |||
| 9093 | static long synchronize_sched_expedited_count; | ||
| 9094 | 8966 | ||
| 9095 | /* | 8967 | /* |
| 9096 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" | 8968 | * Wait for an rcu-sched grace period to elapse, but use "big hammer" |
| @@ -9104,60 +8976,55 @@ static long synchronize_sched_expedited_count; | |||
| 9104 | */ | 8976 | */ |
| 9105 | void synchronize_sched_expedited(void) | 8977 | void synchronize_sched_expedited(void) |
| 9106 | { | 8978 | { |
| 9107 | int cpu; | 8979 | cpumask_var_t done_mask_var; |
| 9108 | unsigned long flags; | 8980 | struct cpumask *done_mask = NULL; |
| 9109 | bool need_full_sync = 0; | 8981 | int snap, trycount = 0; |
| 9110 | struct rq *rq; | 8982 | |
| 9111 | struct migration_req *req; | 8983 | /* |
| 9112 | long snap; | 8984 | * done_mask is used to check that all cpus actually have |
| 9113 | int trycount = 0; | 8985 | * finished running the stopper, which is guaranteed by |
| 8986 | * stop_cpus() if it's called with cpu hotplug blocked. Keep | ||
| 8987 | * the paranoia for now but it's best effort if cpumask is off | ||
| 8988 | * stack. | ||
| 8989 | */ | ||
| 8990 | if (zalloc_cpumask_var(&done_mask_var, GFP_ATOMIC)) | ||
| 8991 | done_mask = done_mask_var; | ||
| 9114 | 8992 | ||
| 9115 | smp_mb(); /* ensure prior mod happens before capturing snap. */ | 8993 | smp_mb(); /* ensure prior mod happens before capturing snap. */ |
| 9116 | snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1; | 8994 | snap = atomic_read(&synchronize_sched_expedited_count) + 1; |
| 9117 | get_online_cpus(); | 8995 | get_online_cpus(); |
| 9118 | while (!mutex_trylock(&rcu_sched_expedited_mutex)) { | 8996 | while (try_stop_cpus(cpu_online_mask, |
| 8997 | synchronize_sched_expedited_cpu_stop, | ||
| 8998 | done_mask) == -EAGAIN) { | ||
| 9119 | put_online_cpus(); | 8999 | put_online_cpus(); |
| 9120 | if (trycount++ < 10) | 9000 | if (trycount++ < 10) |
| 9121 | udelay(trycount * num_online_cpus()); | 9001 | udelay(trycount * num_online_cpus()); |
| 9122 | else { | 9002 | else { |
| 9123 | synchronize_sched(); | 9003 | synchronize_sched(); |
| 9124 | return; | 9004 | goto free_out; |
| 9125 | } | 9005 | } |
| 9126 | if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) { | 9006 | if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) { |
| 9127 | smp_mb(); /* ensure test happens before caller kfree */ | 9007 | smp_mb(); /* ensure test happens before caller kfree */ |
| 9128 | return; | 9008 | goto free_out; |
| 9129 | } | 9009 | } |
| 9130 | get_online_cpus(); | 9010 | get_online_cpus(); |
| 9131 | } | 9011 | } |
| 9132 | rcu_expedited_state = RCU_EXPEDITED_STATE_POST; | 9012 | atomic_inc(&synchronize_sched_expedited_count); |
| 9133 | for_each_online_cpu(cpu) { | 9013 | if (done_mask) |
| 9134 | rq = cpu_rq(cpu); | 9014 | cpumask_xor(done_mask, done_mask, cpu_online_mask); |
| 9135 | req = &per_cpu(rcu_migration_req, cpu); | ||
| 9136 | init_completion(&req->done); | ||
| 9137 | req->task = NULL; | ||
| 9138 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | ||
| 9139 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 9140 | list_add(&req->list, &rq->migration_queue); | ||
| 9141 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
| 9142 | wake_up_process(rq->migration_thread); | ||
| 9143 | } | ||
| 9144 | for_each_online_cpu(cpu) { | ||
| 9145 | rcu_expedited_state = cpu; | ||
| 9146 | req = &per_cpu(rcu_migration_req, cpu); | ||
| 9147 | rq = cpu_rq(cpu); | ||
| 9148 | wait_for_completion(&req->done); | ||
| 9149 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 9150 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | ||
| 9151 | need_full_sync = 1; | ||
| 9152 | req->dest_cpu = RCU_MIGRATION_IDLE; | ||
| 9153 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
| 9154 | } | ||
| 9155 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | ||
| 9156 | synchronize_sched_expedited_count++; | ||
| 9157 | mutex_unlock(&rcu_sched_expedited_mutex); | ||
| 9158 | put_online_cpus(); | 9015 | put_online_cpus(); |
| 9159 | if (need_full_sync) | 9016 | |
| 9017 | /* paranoia - this can't happen */ | ||
| 9018 | if (done_mask && cpumask_weight(done_mask)) { | ||
| 9019 | char buf[80]; | ||
| 9020 | |||
| 9021 | cpulist_scnprintf(buf, sizeof(buf), done_mask); | ||
| 9022 | WARN_ONCE(1, "synchronize_sched_expedited: cpu online and done masks disagree on %d cpus: %s\n", | ||
| 9023 | cpumask_weight(done_mask), buf); | ||
| 9160 | synchronize_sched(); | 9024 | synchronize_sched(); |
| 9025 | } | ||
| 9026 | free_out: | ||
| 9027 | free_cpumask_var(done_mask_var); | ||
| 9161 | } | 9028 | } |
| 9162 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); | 9029 | EXPORT_SYMBOL_GPL(synchronize_sched_expedited); |
| 9163 | 9030 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cbd8b8a296d1..217e4a9393e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle) | |||
| 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
| 2799 | } | 2799 | } |
| 2800 | 2800 | ||
| 2801 | static int active_load_balance_cpu_stop(void *data); | ||
| 2802 | |||
| 2801 | /* | 2803 | /* |
| 2802 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2804 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
| 2803 | * tasks if there is an imbalance. | 2805 | * tasks if there is an imbalance. |
| @@ -2887,8 +2889,9 @@ redo: | |||
| 2887 | if (need_active_balance(sd, sd_idle, idle)) { | 2889 | if (need_active_balance(sd, sd_idle, idle)) { |
| 2888 | raw_spin_lock_irqsave(&busiest->lock, flags); | 2890 | raw_spin_lock_irqsave(&busiest->lock, flags); |
| 2889 | 2891 | ||
| 2890 | /* don't kick the migration_thread, if the curr | 2892 | /* don't kick the active_load_balance_cpu_stop, |
| 2891 | * task on busiest cpu can't be moved to this_cpu | 2893 | * if the curr task on busiest cpu can't be |
| 2894 | * moved to this_cpu | ||
| 2892 | */ | 2895 | */ |
| 2893 | if (!cpumask_test_cpu(this_cpu, | 2896 | if (!cpumask_test_cpu(this_cpu, |
| 2894 | &busiest->curr->cpus_allowed)) { | 2897 | &busiest->curr->cpus_allowed)) { |
| @@ -2898,14 +2901,22 @@ redo: | |||
| 2898 | goto out_one_pinned; | 2901 | goto out_one_pinned; |
| 2899 | } | 2902 | } |
| 2900 | 2903 | ||
| 2904 | /* | ||
| 2905 | * ->active_balance synchronizes accesses to | ||
| 2906 | * ->active_balance_work. Once set, it's cleared | ||
| 2907 | * only after active load balance is finished. | ||
| 2908 | */ | ||
| 2901 | if (!busiest->active_balance) { | 2909 | if (!busiest->active_balance) { |
| 2902 | busiest->active_balance = 1; | 2910 | busiest->active_balance = 1; |
| 2903 | busiest->push_cpu = this_cpu; | 2911 | busiest->push_cpu = this_cpu; |
| 2904 | active_balance = 1; | 2912 | active_balance = 1; |
| 2905 | } | 2913 | } |
| 2906 | raw_spin_unlock_irqrestore(&busiest->lock, flags); | 2914 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
| 2915 | |||
| 2907 | if (active_balance) | 2916 | if (active_balance) |
| 2908 | wake_up_process(busiest->migration_thread); | 2917 | stop_one_cpu_nowait(cpu_of(busiest), |
| 2918 | active_load_balance_cpu_stop, busiest, | ||
| 2919 | &busiest->active_balance_work); | ||
| 2909 | 2920 | ||
| 2910 | /* | 2921 | /* |
| 2911 | * We've kicked active balancing, reset the failure | 2922 | * We've kicked active balancing, reset the failure |
| @@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
| 3012 | } | 3023 | } |
| 3013 | 3024 | ||
| 3014 | /* | 3025 | /* |
| 3015 | * active_load_balance is run by migration threads. It pushes running tasks | 3026 | * active_load_balance_cpu_stop is run by cpu stopper. It pushes |
| 3016 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 3027 | * running tasks off the busiest CPU onto idle CPUs. It requires at |
| 3017 | * running on each physical CPU where possible, and avoids physical / | 3028 | * least 1 task to be running on each physical CPU where possible, and |
| 3018 | * logical imbalances. | 3029 | * avoids physical / logical imbalances. |
| 3019 | * | ||
| 3020 | * Called with busiest_rq locked. | ||
| 3021 | */ | 3030 | */ |
| 3022 | static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | 3031 | static int active_load_balance_cpu_stop(void *data) |
| 3023 | { | 3032 | { |
| 3033 | struct rq *busiest_rq = data; | ||
| 3034 | int busiest_cpu = cpu_of(busiest_rq); | ||
| 3024 | int target_cpu = busiest_rq->push_cpu; | 3035 | int target_cpu = busiest_rq->push_cpu; |
| 3036 | struct rq *target_rq = cpu_rq(target_cpu); | ||
| 3025 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
| 3026 | struct rq *target_rq; | 3038 | |
| 3039 | raw_spin_lock_irq(&busiest_rq->lock); | ||
| 3040 | |||
| 3041 | /* make sure the requested cpu hasn't gone down in the meantime */ | ||
| 3042 | if (unlikely(busiest_cpu != smp_processor_id() || | ||
| 3043 | !busiest_rq->active_balance)) | ||
| 3044 | goto out_unlock; | ||
| 3027 | 3045 | ||
| 3028 | /* Is there any task to move? */ | 3046 | /* Is there any task to move? */ |
| 3029 | if (busiest_rq->nr_running <= 1) | 3047 | if (busiest_rq->nr_running <= 1) |
| 3030 | return; | 3048 | goto out_unlock; |
| 3031 | |||
| 3032 | target_rq = cpu_rq(target_cpu); | ||
| 3033 | 3049 | ||
| 3034 | /* | 3050 | /* |
| 3035 | * This condition is "impossible", if it occurs | 3051 | * This condition is "impossible", if it occurs |
| @@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
| 3058 | schedstat_inc(sd, alb_failed); | 3074 | schedstat_inc(sd, alb_failed); |
| 3059 | } | 3075 | } |
| 3060 | double_unlock_balance(busiest_rq, target_rq); | 3076 | double_unlock_balance(busiest_rq, target_rq); |
| 3077 | out_unlock: | ||
| 3078 | busiest_rq->active_balance = 0; | ||
| 3079 | raw_spin_unlock_irq(&busiest_rq->lock); | ||
| 3080 | return 0; | ||
| 3061 | } | 3081 | } |
| 3062 | 3082 | ||
| 3063 | #ifdef CONFIG_NO_HZ | 3083 | #ifdef CONFIG_NO_HZ |
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 884c7a1afeed..5b20141a5ec1 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c | |||
| @@ -301,7 +301,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, | |||
| 301 | case CPU_UP_PREPARE: | 301 | case CPU_UP_PREPARE: |
| 302 | BUG_ON(stopper->thread || stopper->enabled || | 302 | BUG_ON(stopper->thread || stopper->enabled || |
| 303 | !list_empty(&stopper->works)); | 303 | !list_empty(&stopper->works)); |
| 304 | p = kthread_create(cpu_stopper_thread, stopper, "stopper/%d", | 304 | p = kthread_create(cpu_stopper_thread, stopper, "migration/%d", |
| 305 | cpu); | 305 | cpu); |
| 306 | if (IS_ERR(p)) | 306 | if (IS_ERR(p)) |
| 307 | return NOTIFY_BAD; | 307 | return NOTIFY_BAD; |
