aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-01-13 03:58:37 -0500
committerIngo Molnar <mingo@elte.hu>2010-01-13 04:08:50 -0500
commit61405fea92c42d072d9b8bd189689f1502a838af (patch)
tree013ea3e7ed71f4114004d5852d40b6e89e128f76 /kernel/sched.c
parent9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b (diff)
parent1703f2c321a8a531c393e137a82602e16c6061cb (diff)
Merge branch 'perf/urgent' into perf/core
Merge reason: queue up dependent patch, update to -rc4 Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c307
1 files changed, 164 insertions, 143 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index d6527ac0f6e7..e507af086b42 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2002,39 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
2002 p->sched_class->prio_changed(rq, p, oldprio, running); 2002 p->sched_class->prio_changed(rq, p, oldprio, running);
2003} 2003}
2004 2004
2005/**
2006 * kthread_bind - bind a just-created kthread to a cpu.
2007 * @p: thread created by kthread_create().
2008 * @cpu: cpu (might not be online, must be possible) for @k to run on.
2009 *
2010 * Description: This function is equivalent to set_cpus_allowed(),
2011 * except that @cpu doesn't need to be online, and the thread must be
2012 * stopped (i.e., just returned from kthread_create()).
2013 *
2014 * Function lives here instead of kthread.c because it messes with
2015 * scheduler internals which require locking.
2016 */
2017void kthread_bind(struct task_struct *p, unsigned int cpu)
2018{
2019 struct rq *rq = cpu_rq(cpu);
2020 unsigned long flags;
2021
2022 /* Must have done schedule() in kthread() before we set_task_cpu */
2023 if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
2024 WARN_ON(1);
2025 return;
2026 }
2027
2028 raw_spin_lock_irqsave(&rq->lock, flags);
2029 update_rq_clock(rq);
2030 set_task_cpu(p, cpu);
2031 p->cpus_allowed = cpumask_of_cpu(cpu);
2032 p->rt.nr_cpus_allowed = 1;
2033 p->flags |= PF_THREAD_BOUND;
2034 raw_spin_unlock_irqrestore(&rq->lock, flags);
2035}
2036EXPORT_SYMBOL(kthread_bind);
2037
2038#ifdef CONFIG_SMP 2005#ifdef CONFIG_SMP
2039/* 2006/*
2040 * Is this task likely cache-hot: 2007 * Is this task likely cache-hot:
@@ -2044,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2044{ 2011{
2045 s64 delta; 2012 s64 delta;
2046 2013
2014 if (p->sched_class != &fair_sched_class)
2015 return 0;
2016
2047 /* 2017 /*
2048 * Buddy candidates are cache hot: 2018 * Buddy candidates are cache hot:
2049 */ 2019 */
@@ -2052,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2052 &p->se == cfs_rq_of(&p->se)->last)) 2022 &p->se == cfs_rq_of(&p->se)->last))
2053 return 1; 2023 return 1;
2054 2024
2055 if (p->sched_class != &fair_sched_class)
2056 return 0;
2057
2058 if (sysctl_sched_migration_cost == -1) 2025 if (sysctl_sched_migration_cost == -1)
2059 return 1; 2026 return 1;
2060 if (sysctl_sched_migration_cost == 0) 2027 if (sysctl_sched_migration_cost == 0)
@@ -2065,22 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2065 return delta < (s64)sysctl_sched_migration_cost; 2032 return delta < (s64)sysctl_sched_migration_cost;
2066} 2033}
2067 2034
2068
2069void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2035void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2070{ 2036{
2071 int old_cpu = task_cpu(p); 2037#ifdef CONFIG_SCHED_DEBUG
2072 struct cfs_rq *old_cfsrq = task_cfs_rq(p), 2038 /*
2073 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); 2039 * We should never call set_task_cpu() on a blocked task,
2040 * ttwu() will sort out the placement.
2041 */
2042 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2043 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2044#endif
2074 2045
2075 trace_sched_migrate_task(p, new_cpu); 2046 trace_sched_migrate_task(p, new_cpu);
2076 2047
2077 if (old_cpu != new_cpu) { 2048 if (task_cpu(p) != new_cpu) {
2078 p->se.nr_migrations++; 2049 p->se.nr_migrations++;
2079 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2050 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0);
2080 1, 1, NULL, 0);
2081 } 2051 }
2082 p->se.vruntime -= old_cfsrq->min_vruntime -
2083 new_cfsrq->min_vruntime;
2084 2052
2085 __set_task_cpu(p, new_cpu); 2053 __set_task_cpu(p, new_cpu);
2086} 2054}
@@ -2105,13 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
2105 2073
2106 /* 2074 /*
2107 * If the task is not on a runqueue (and not running), then 2075 * If the task is not on a runqueue (and not running), then
2108 * it is sufficient to simply update the task's cpu field. 2076 * the next wake-up will properly place the task.
2109 */ 2077 */
2110 if (!p->se.on_rq && !task_running(rq, p)) { 2078 if (!p->se.on_rq && !task_running(rq, p))
2111 update_rq_clock(rq);
2112 set_task_cpu(p, dest_cpu);
2113 return 0; 2079 return 0;
2114 }
2115 2080
2116 init_completion(&req->done); 2081 init_completion(&req->done);
2117 req->task = p; 2082 req->task = p;
@@ -2317,10 +2282,73 @@ void task_oncpu_function_call(struct task_struct *p,
2317} 2282}
2318 2283
2319#ifdef CONFIG_SMP 2284#ifdef CONFIG_SMP
2285static int select_fallback_rq(int cpu, struct task_struct *p)
2286{
2287 int dest_cpu;
2288 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
2289
2290 /* Look for allowed, online CPU in same node. */
2291 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
2292 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
2293 return dest_cpu;
2294
2295 /* Any allowed, online CPU? */
2296 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
2297 if (dest_cpu < nr_cpu_ids)
2298 return dest_cpu;
2299
2300 /* No more Mr. Nice Guy. */
2301 if (dest_cpu >= nr_cpu_ids) {
2302 rcu_read_lock();
2303 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
2304 rcu_read_unlock();
2305 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
2306
2307 /*
2308 * Don't tell them about moving exiting tasks or
2309 * kernel threads (both mm NULL), since they never
2310 * leave kernel.
2311 */
2312 if (p->mm && printk_ratelimit()) {
2313 printk(KERN_INFO "process %d (%s) no "
2314 "longer affine to cpu%d\n",
2315 task_pid_nr(p), p->comm, cpu);
2316 }
2317 }
2318
2319 return dest_cpu;
2320}
2321
2322/*
2323 * Called from:
2324 *
2325 * - fork, @p is stable because it isn't on the tasklist yet
2326 *
2327 * - exec, @p is unstable, retry loop
2328 *
2329 * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so
2330 * we should be good.
2331 */
2320static inline 2332static inline
2321int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) 2333int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2322{ 2334{
2323 return p->sched_class->select_task_rq(p, sd_flags, wake_flags); 2335 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2336
2337 /*
2338 * In order not to call set_task_cpu() on a blocking task we need
2339 * to rely on ttwu() to place the task on a valid ->cpus_allowed
2340 * cpu.
2341 *
2342 * Since this is common to all placement strategies, this lives here.
2343 *
2344 * [ this allows ->select_task() to simply return task_cpu(p) and
2345 * not worry about this generic constraint ]
2346 */
2347 if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
2348 !cpu_online(cpu)))
2349 cpu = select_fallback_rq(task_cpu(p), p);
2350
2351 return cpu;
2324} 2352}
2325#endif 2353#endif
2326 2354
@@ -2375,6 +2403,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2375 if (task_contributes_to_load(p)) 2403 if (task_contributes_to_load(p))
2376 rq->nr_uninterruptible--; 2404 rq->nr_uninterruptible--;
2377 p->state = TASK_WAKING; 2405 p->state = TASK_WAKING;
2406
2407 if (p->sched_class->task_waking)
2408 p->sched_class->task_waking(rq, p);
2409
2378 __task_rq_unlock(rq); 2410 __task_rq_unlock(rq);
2379 2411
2380 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); 2412 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
@@ -2438,8 +2470,8 @@ out_running:
2438 2470
2439 p->state = TASK_RUNNING; 2471 p->state = TASK_RUNNING;
2440#ifdef CONFIG_SMP 2472#ifdef CONFIG_SMP
2441 if (p->sched_class->task_wake_up) 2473 if (p->sched_class->task_woken)
2442 p->sched_class->task_wake_up(rq, p); 2474 p->sched_class->task_woken(rq, p);
2443 2475
2444 if (unlikely(rq->idle_stamp)) { 2476 if (unlikely(rq->idle_stamp)) {
2445 u64 delta = rq->clock - rq->idle_stamp; 2477 u64 delta = rq->clock - rq->idle_stamp;
@@ -2538,14 +2570,6 @@ static void __sched_fork(struct task_struct *p)
2538#ifdef CONFIG_PREEMPT_NOTIFIERS 2570#ifdef CONFIG_PREEMPT_NOTIFIERS
2539 INIT_HLIST_HEAD(&p->preempt_notifiers); 2571 INIT_HLIST_HEAD(&p->preempt_notifiers);
2540#endif 2572#endif
2541
2542 /*
2543 * We mark the process as running here, but have not actually
2544 * inserted it onto the runqueue yet. This guarantees that
2545 * nobody will actually run it, and a signal or other external
2546 * event cannot wake it up and insert it on the runqueue either.
2547 */
2548 p->state = TASK_RUNNING;
2549} 2573}
2550 2574
2551/* 2575/*
@@ -2556,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
2556 int cpu = get_cpu(); 2580 int cpu = get_cpu();
2557 2581
2558 __sched_fork(p); 2582 __sched_fork(p);
2583 /*
2584 * We mark the process as waking here. This guarantees that
2585 * nobody will actually run it, and a signal or other external
2586 * event cannot wake it up and insert it on the runqueue either.
2587 */
2588 p->state = TASK_WAKING;
2559 2589
2560 /* 2590 /*
2561 * Revert to default priority/policy on fork if requested. 2591 * Revert to default priority/policy on fork if requested.
@@ -2624,14 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2624 struct rq *rq; 2654 struct rq *rq;
2625 2655
2626 rq = task_rq_lock(p, &flags); 2656 rq = task_rq_lock(p, &flags);
2627 BUG_ON(p->state != TASK_RUNNING); 2657 BUG_ON(p->state != TASK_WAKING);
2658 p->state = TASK_RUNNING;
2628 update_rq_clock(rq); 2659 update_rq_clock(rq);
2629 activate_task(rq, p, 0); 2660 activate_task(rq, p, 0);
2630 trace_sched_wakeup_new(rq, p, 1); 2661 trace_sched_wakeup_new(rq, p, 1);
2631 check_preempt_curr(rq, p, WF_FORK); 2662 check_preempt_curr(rq, p, WF_FORK);
2632#ifdef CONFIG_SMP 2663#ifdef CONFIG_SMP
2633 if (p->sched_class->task_wake_up) 2664 if (p->sched_class->task_woken)
2634 p->sched_class->task_wake_up(rq, p); 2665 p->sched_class->task_woken(rq, p);
2635#endif 2666#endif
2636 task_rq_unlock(rq, &flags); 2667 task_rq_unlock(rq, &flags);
2637} 2668}
@@ -3101,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
3101} 3132}
3102 3133
3103/* 3134/*
3104 * If dest_cpu is allowed for this process, migrate the task to it. 3135 * sched_exec - execve() is a valuable balancing opportunity, because at
3105 * This is accomplished by forcing the cpu_allowed mask to only 3136 * this point the task has the smallest effective memory and cache footprint.
3106 * allow dest_cpu, which will force the cpu onto dest_cpu. Then
3107 * the cpu_allowed mask is restored.
3108 */ 3137 */
3109static void sched_migrate_task(struct task_struct *p, int dest_cpu) 3138void sched_exec(void)
3110{ 3139{
3140 struct task_struct *p = current;
3111 struct migration_req req; 3141 struct migration_req req;
3142 int dest_cpu, this_cpu;
3112 unsigned long flags; 3143 unsigned long flags;
3113 struct rq *rq; 3144 struct rq *rq;
3114 3145
3146again:
3147 this_cpu = get_cpu();
3148 dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0);
3149 if (dest_cpu == this_cpu) {
3150 put_cpu();
3151 return;
3152 }
3153
3115 rq = task_rq_lock(p, &flags); 3154 rq = task_rq_lock(p, &flags);
3155 put_cpu();
3156
3157 /*
3158 * select_task_rq() can race against ->cpus_allowed
3159 */
3116 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) 3160 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
3117 || unlikely(!cpu_active(dest_cpu))) 3161 || unlikely(!cpu_active(dest_cpu))) {
3118 goto out; 3162 task_rq_unlock(rq, &flags);
3163 goto again;
3164 }
3119 3165
3120 /* force the process onto the specified CPU */ 3166 /* force the process onto the specified CPU */
3121 if (migrate_task(p, dest_cpu, &req)) { 3167 if (migrate_task(p, dest_cpu, &req)) {
@@ -3130,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
3130 3176
3131 return; 3177 return;
3132 } 3178 }
3133out:
3134 task_rq_unlock(rq, &flags); 3179 task_rq_unlock(rq, &flags);
3135} 3180}
3136 3181
3137/* 3182/*
3138 * sched_exec - execve() is a valuable balancing opportunity, because at
3139 * this point the task has the smallest effective memory and cache footprint.
3140 */
3141void sched_exec(void)
3142{
3143 int new_cpu, this_cpu = get_cpu();
3144 new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0);
3145 put_cpu();
3146 if (new_cpu != this_cpu)
3147 sched_migrate_task(current, new_cpu);
3148}
3149
3150/*
3151 * pull_task - move a task from a remote runqueue to the local runqueue. 3183 * pull_task - move a task from a remote runqueue to the local runqueue.
3152 * Both runqueues must be locked. 3184 * Both runqueues must be locked.
3153 */ 3185 */
@@ -5911,14 +5943,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
5911 */ 5943 */
5912bool try_wait_for_completion(struct completion *x) 5944bool try_wait_for_completion(struct completion *x)
5913{ 5945{
5946 unsigned long flags;
5914 int ret = 1; 5947 int ret = 1;
5915 5948
5916 spin_lock_irq(&x->wait.lock); 5949 spin_lock_irqsave(&x->wait.lock, flags);
5917 if (!x->done) 5950 if (!x->done)
5918 ret = 0; 5951 ret = 0;
5919 else 5952 else
5920 x->done--; 5953 x->done--;
5921 spin_unlock_irq(&x->wait.lock); 5954 spin_unlock_irqrestore(&x->wait.lock, flags);
5922 return ret; 5955 return ret;
5923} 5956}
5924EXPORT_SYMBOL(try_wait_for_completion); 5957EXPORT_SYMBOL(try_wait_for_completion);
@@ -5933,12 +5966,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
5933 */ 5966 */
5934bool completion_done(struct completion *x) 5967bool completion_done(struct completion *x)
5935{ 5968{
5969 unsigned long flags;
5936 int ret = 1; 5970 int ret = 1;
5937 5971
5938 spin_lock_irq(&x->wait.lock); 5972 spin_lock_irqsave(&x->wait.lock, flags);
5939 if (!x->done) 5973 if (!x->done)
5940 ret = 0; 5974 ret = 0;
5941 spin_unlock_irq(&x->wait.lock); 5975 spin_unlock_irqrestore(&x->wait.lock, flags);
5942 return ret; 5976 return ret;
5943} 5977}
5944EXPORT_SYMBOL(completion_done); 5978EXPORT_SYMBOL(completion_done);
@@ -6457,7 +6491,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6457 return -EINVAL; 6491 return -EINVAL;
6458 6492
6459 retval = -ESRCH; 6493 retval = -ESRCH;
6460 read_lock(&tasklist_lock); 6494 rcu_read_lock();
6461 p = find_process_by_pid(pid); 6495 p = find_process_by_pid(pid);
6462 if (p) { 6496 if (p) {
6463 retval = security_task_getscheduler(p); 6497 retval = security_task_getscheduler(p);
@@ -6465,7 +6499,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
6465 retval = p->policy 6499 retval = p->policy
6466 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); 6500 | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
6467 } 6501 }
6468 read_unlock(&tasklist_lock); 6502 rcu_read_unlock();
6469 return retval; 6503 return retval;
6470} 6504}
6471 6505
@@ -6483,7 +6517,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6483 if (!param || pid < 0) 6517 if (!param || pid < 0)
6484 return -EINVAL; 6518 return -EINVAL;
6485 6519
6486 read_lock(&tasklist_lock); 6520 rcu_read_lock();
6487 p = find_process_by_pid(pid); 6521 p = find_process_by_pid(pid);
6488 retval = -ESRCH; 6522 retval = -ESRCH;
6489 if (!p) 6523 if (!p)
@@ -6494,7 +6528,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6494 goto out_unlock; 6528 goto out_unlock;
6495 6529
6496 lp.sched_priority = p->rt_priority; 6530 lp.sched_priority = p->rt_priority;
6497 read_unlock(&tasklist_lock); 6531 rcu_read_unlock();
6498 6532
6499 /* 6533 /*
6500 * This one might sleep, we cannot do it with a spinlock held ... 6534 * This one might sleep, we cannot do it with a spinlock held ...
@@ -6504,7 +6538,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
6504 return retval; 6538 return retval;
6505 6539
6506out_unlock: 6540out_unlock:
6507 read_unlock(&tasklist_lock); 6541 rcu_read_unlock();
6508 return retval; 6542 return retval;
6509} 6543}
6510 6544
@@ -6515,22 +6549,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
6515 int retval; 6549 int retval;
6516 6550
6517 get_online_cpus(); 6551 get_online_cpus();
6518 read_lock(&tasklist_lock); 6552 rcu_read_lock();
6519 6553
6520 p = find_process_by_pid(pid); 6554 p = find_process_by_pid(pid);
6521 if (!p) { 6555 if (!p) {
6522 read_unlock(&tasklist_lock); 6556 rcu_read_unlock();
6523 put_online_cpus(); 6557 put_online_cpus();
6524 return -ESRCH; 6558 return -ESRCH;
6525 } 6559 }
6526 6560
6527 /* 6561 /* Prevent p going away */
6528 * It is not safe to call set_cpus_allowed with the
6529 * tasklist_lock held. We will bump the task_struct's
6530 * usage count and then drop tasklist_lock.
6531 */
6532 get_task_struct(p); 6562 get_task_struct(p);
6533 read_unlock(&tasklist_lock); 6563 rcu_read_unlock();
6534 6564
6535 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { 6565 if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
6536 retval = -ENOMEM; 6566 retval = -ENOMEM;
@@ -6616,7 +6646,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6616 int retval; 6646 int retval;
6617 6647
6618 get_online_cpus(); 6648 get_online_cpus();
6619 read_lock(&tasklist_lock); 6649 rcu_read_lock();
6620 6650
6621 retval = -ESRCH; 6651 retval = -ESRCH;
6622 p = find_process_by_pid(pid); 6652 p = find_process_by_pid(pid);
@@ -6632,7 +6662,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
6632 task_rq_unlock(rq, &flags); 6662 task_rq_unlock(rq, &flags);
6633 6663
6634out_unlock: 6664out_unlock:
6635 read_unlock(&tasklist_lock); 6665 rcu_read_unlock();
6636 put_online_cpus(); 6666 put_online_cpus();
6637 6667
6638 return retval; 6668 return retval;
@@ -6876,7 +6906,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6876 return -EINVAL; 6906 return -EINVAL;
6877 6907
6878 retval = -ESRCH; 6908 retval = -ESRCH;
6879 read_lock(&tasklist_lock); 6909 rcu_read_lock();
6880 p = find_process_by_pid(pid); 6910 p = find_process_by_pid(pid);
6881 if (!p) 6911 if (!p)
6882 goto out_unlock; 6912 goto out_unlock;
@@ -6889,13 +6919,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
6889 time_slice = p->sched_class->get_rr_interval(rq, p); 6919 time_slice = p->sched_class->get_rr_interval(rq, p);
6890 task_rq_unlock(rq, &flags); 6920 task_rq_unlock(rq, &flags);
6891 6921
6892 read_unlock(&tasklist_lock); 6922 rcu_read_unlock();
6893 jiffies_to_timespec(time_slice, &t); 6923 jiffies_to_timespec(time_slice, &t);
6894 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; 6924 retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
6895 return retval; 6925 return retval;
6896 6926
6897out_unlock: 6927out_unlock:
6898 read_unlock(&tasklist_lock); 6928 rcu_read_unlock();
6899 return retval; 6929 return retval;
6900} 6930}
6901 6931
@@ -6986,6 +7016,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
6986 raw_spin_lock_irqsave(&rq->lock, flags); 7016 raw_spin_lock_irqsave(&rq->lock, flags);
6987 7017
6988 __sched_fork(idle); 7018 __sched_fork(idle);
7019 idle->state = TASK_RUNNING;
6989 idle->se.exec_start = sched_clock(); 7020 idle->se.exec_start = sched_clock();
6990 7021
6991 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 7022 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
@@ -7100,7 +7131,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
7100 struct rq *rq; 7131 struct rq *rq;
7101 int ret = 0; 7132 int ret = 0;
7102 7133
7134 /*
7135 * Since we rely on wake-ups to migrate sleeping tasks, don't change
7136 * the ->cpus_allowed mask from under waking tasks, which would be
7137 * possible when we change rq->lock in ttwu(), so synchronize against
7138 * TASK_WAKING to avoid that.
7139 */
7140again:
7141 while (p->state == TASK_WAKING)
7142 cpu_relax();
7143
7103 rq = task_rq_lock(p, &flags); 7144 rq = task_rq_lock(p, &flags);
7145
7146 if (p->state == TASK_WAKING) {
7147 task_rq_unlock(rq, &flags);
7148 goto again;
7149 }
7150
7104 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 7151 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
7105 ret = -EINVAL; 7152 ret = -EINVAL;
7106 goto out; 7153 goto out;
@@ -7156,7 +7203,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
7156static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) 7203static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7157{ 7204{
7158 struct rq *rq_dest, *rq_src; 7205 struct rq *rq_dest, *rq_src;
7159 int ret = 0, on_rq; 7206 int ret = 0;
7160 7207
7161 if (unlikely(!cpu_active(dest_cpu))) 7208 if (unlikely(!cpu_active(dest_cpu)))
7162 return ret; 7209 return ret;
@@ -7172,12 +7219,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
7172 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) 7219 if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7173 goto fail; 7220 goto fail;
7174 7221
7175 on_rq = p->se.on_rq; 7222 /*
7176 if (on_rq) 7223 * If we're not on a rq, the next wake-up will ensure we're
7224 * placed properly.
7225 */
7226 if (p->se.on_rq) {
7177 deactivate_task(rq_src, p, 0); 7227 deactivate_task(rq_src, p, 0);
7178 7228 set_task_cpu(p, dest_cpu);
7179 set_task_cpu(p, dest_cpu);
7180 if (on_rq) {
7181 activate_task(rq_dest, p, 0); 7229 activate_task(rq_dest, p, 0);
7182 check_preempt_curr(rq_dest, p, 0); 7230 check_preempt_curr(rq_dest, p, 0);
7183 } 7231 }
@@ -7273,37 +7321,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
7273static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) 7321static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
7274{ 7322{
7275 int dest_cpu; 7323 int dest_cpu;
7276 const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
7277 7324
7278again: 7325again:
7279 /* Look for allowed, online CPU in same node. */ 7326 dest_cpu = select_fallback_rq(dead_cpu, p);
7280 for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
7281 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
7282 goto move;
7283
7284 /* Any allowed, online CPU? */
7285 dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
7286 if (dest_cpu < nr_cpu_ids)
7287 goto move;
7288
7289 /* No more Mr. Nice Guy. */
7290 if (dest_cpu >= nr_cpu_ids) {
7291 cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
7292 dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
7293
7294 /*
7295 * Don't tell them about moving exiting tasks or
7296 * kernel threads (both mm NULL), since they never
7297 * leave kernel.
7298 */
7299 if (p->mm && printk_ratelimit()) {
7300 printk(KERN_INFO "process %d (%s) no "
7301 "longer affine to cpu%d\n",
7302 task_pid_nr(p), p->comm, dead_cpu);
7303 }
7304 }
7305 7327
7306move:
7307 /* It can have affinity changed while we were choosing. */ 7328 /* It can have affinity changed while we were choosing. */
7308 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) 7329 if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
7309 goto again; 7330 goto again;
@@ -9668,7 +9689,7 @@ void __init sched_init(void)
9668#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP 9689#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
9669static inline int preempt_count_equals(int preempt_offset) 9690static inline int preempt_count_equals(int preempt_offset)
9670{ 9691{
9671 int nested = preempt_count() & ~PREEMPT_ACTIVE; 9692 int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth();
9672 9693
9673 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); 9694 return (nested == PREEMPT_INATOMIC_BASE + preempt_offset);
9674} 9695}
@@ -10083,7 +10104,7 @@ void sched_move_task(struct task_struct *tsk)
10083 10104
10084#ifdef CONFIG_FAIR_GROUP_SCHED 10105#ifdef CONFIG_FAIR_GROUP_SCHED
10085 if (tsk->sched_class->moved_group) 10106 if (tsk->sched_class->moved_group)
10086 tsk->sched_class->moved_group(tsk); 10107 tsk->sched_class->moved_group(tsk, on_rq);
10087#endif 10108#endif
10088 10109
10089 if (unlikely(running)) 10110 if (unlikely(running))