diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-01-13 03:58:37 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-01-13 04:08:50 -0500 |
commit | 61405fea92c42d072d9b8bd189689f1502a838af (patch) | |
tree | 013ea3e7ed71f4114004d5852d40b6e89e128f76 /kernel/sched.c | |
parent | 9c443dfdd31eddea6cbe6ee0ca469fbcc4e1dc3b (diff) | |
parent | 1703f2c321a8a531c393e137a82602e16c6061cb (diff) |
Merge branch 'perf/urgent' into perf/core
Merge reason: queue up dependent patch, update to -rc4
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 307 |
1 files changed, 164 insertions, 143 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d6527ac0f6e7..e507af086b42 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2002,39 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
2002 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2002 | p->sched_class->prio_changed(rq, p, oldprio, running); |
2003 | } | 2003 | } |
2004 | 2004 | ||
2005 | /** | ||
2006 | * kthread_bind - bind a just-created kthread to a cpu. | ||
2007 | * @p: thread created by kthread_create(). | ||
2008 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
2009 | * | ||
2010 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2011 | * except that @cpu doesn't need to be online, and the thread must be | ||
2012 | * stopped (i.e., just returned from kthread_create()). | ||
2013 | * | ||
2014 | * Function lives here instead of kthread.c because it messes with | ||
2015 | * scheduler internals which require locking. | ||
2016 | */ | ||
2017 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2018 | { | ||
2019 | struct rq *rq = cpu_rq(cpu); | ||
2020 | unsigned long flags; | ||
2021 | |||
2022 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2023 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2024 | WARN_ON(1); | ||
2025 | return; | ||
2026 | } | ||
2027 | |||
2028 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2029 | update_rq_clock(rq); | ||
2030 | set_task_cpu(p, cpu); | ||
2031 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2032 | p->rt.nr_cpus_allowed = 1; | ||
2033 | p->flags |= PF_THREAD_BOUND; | ||
2034 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2035 | } | ||
2036 | EXPORT_SYMBOL(kthread_bind); | ||
2037 | |||
2038 | #ifdef CONFIG_SMP | 2005 | #ifdef CONFIG_SMP |
2039 | /* | 2006 | /* |
2040 | * Is this task likely cache-hot: | 2007 | * Is this task likely cache-hot: |
@@ -2044,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2044 | { | 2011 | { |
2045 | s64 delta; | 2012 | s64 delta; |
2046 | 2013 | ||
2014 | if (p->sched_class != &fair_sched_class) | ||
2015 | return 0; | ||
2016 | |||
2047 | /* | 2017 | /* |
2048 | * Buddy candidates are cache hot: | 2018 | * Buddy candidates are cache hot: |
2049 | */ | 2019 | */ |
@@ -2052,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2052 | &p->se == cfs_rq_of(&p->se)->last)) | 2022 | &p->se == cfs_rq_of(&p->se)->last)) |
2053 | return 1; | 2023 | return 1; |
2054 | 2024 | ||
2055 | if (p->sched_class != &fair_sched_class) | ||
2056 | return 0; | ||
2057 | |||
2058 | if (sysctl_sched_migration_cost == -1) | 2025 | if (sysctl_sched_migration_cost == -1) |
2059 | return 1; | 2026 | return 1; |
2060 | if (sysctl_sched_migration_cost == 0) | 2027 | if (sysctl_sched_migration_cost == 0) |
@@ -2065,22 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2065 | return delta < (s64)sysctl_sched_migration_cost; | 2032 | return delta < (s64)sysctl_sched_migration_cost; |
2066 | } | 2033 | } |
2067 | 2034 | ||
2068 | |||
2069 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2035 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2070 | { | 2036 | { |
2071 | int old_cpu = task_cpu(p); | 2037 | #ifdef CONFIG_SCHED_DEBUG |
2072 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2038 | /* |
2073 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2039 | * We should never call set_task_cpu() on a blocked task, |
2040 | * ttwu() will sort out the placement. | ||
2041 | */ | ||
2042 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | ||
2043 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | ||
2044 | #endif | ||
2074 | 2045 | ||
2075 | trace_sched_migrate_task(p, new_cpu); | 2046 | trace_sched_migrate_task(p, new_cpu); |
2076 | 2047 | ||
2077 | if (old_cpu != new_cpu) { | 2048 | if (task_cpu(p) != new_cpu) { |
2078 | p->se.nr_migrations++; | 2049 | p->se.nr_migrations++; |
2079 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2050 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); |
2080 | 1, 1, NULL, 0); | ||
2081 | } | 2051 | } |
2082 | p->se.vruntime -= old_cfsrq->min_vruntime - | ||
2083 | new_cfsrq->min_vruntime; | ||
2084 | 2052 | ||
2085 | __set_task_cpu(p, new_cpu); | 2053 | __set_task_cpu(p, new_cpu); |
2086 | } | 2054 | } |
@@ -2105,13 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2105 | 2073 | ||
2106 | /* | 2074 | /* |
2107 | * If the task is not on a runqueue (and not running), then | 2075 | * If the task is not on a runqueue (and not running), then |
2108 | * it is sufficient to simply update the task's cpu field. | 2076 | * the next wake-up will properly place the task. |
2109 | */ | 2077 | */ |
2110 | if (!p->se.on_rq && !task_running(rq, p)) { | 2078 | if (!p->se.on_rq && !task_running(rq, p)) |
2111 | update_rq_clock(rq); | ||
2112 | set_task_cpu(p, dest_cpu); | ||
2113 | return 0; | 2079 | return 0; |
2114 | } | ||
2115 | 2080 | ||
2116 | init_completion(&req->done); | 2081 | init_completion(&req->done); |
2117 | req->task = p; | 2082 | req->task = p; |
@@ -2317,10 +2282,73 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2317 | } | 2282 | } |
2318 | 2283 | ||
2319 | #ifdef CONFIG_SMP | 2284 | #ifdef CONFIG_SMP |
2285 | static int select_fallback_rq(int cpu, struct task_struct *p) | ||
2286 | { | ||
2287 | int dest_cpu; | ||
2288 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | ||
2289 | |||
2290 | /* Look for allowed, online CPU in same node. */ | ||
2291 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
2292 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
2293 | return dest_cpu; | ||
2294 | |||
2295 | /* Any allowed, online CPU? */ | ||
2296 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
2297 | if (dest_cpu < nr_cpu_ids) | ||
2298 | return dest_cpu; | ||
2299 | |||
2300 | /* No more Mr. Nice Guy. */ | ||
2301 | if (dest_cpu >= nr_cpu_ids) { | ||
2302 | rcu_read_lock(); | ||
2303 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
2304 | rcu_read_unlock(); | ||
2305 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
2306 | |||
2307 | /* | ||
2308 | * Don't tell them about moving exiting tasks or | ||
2309 | * kernel threads (both mm NULL), since they never | ||
2310 | * leave kernel. | ||
2311 | */ | ||
2312 | if (p->mm && printk_ratelimit()) { | ||
2313 | printk(KERN_INFO "process %d (%s) no " | ||
2314 | "longer affine to cpu%d\n", | ||
2315 | task_pid_nr(p), p->comm, cpu); | ||
2316 | } | ||
2317 | } | ||
2318 | |||
2319 | return dest_cpu; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Called from: | ||
2324 | * | ||
2325 | * - fork, @p is stable because it isn't on the tasklist yet | ||
2326 | * | ||
2327 | * - exec, @p is unstable, retry loop | ||
2328 | * | ||
2329 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2330 | * we should be good. | ||
2331 | */ | ||
2320 | static inline | 2332 | static inline |
2321 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | 2333 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2322 | { | 2334 | { |
2323 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | 2335 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2336 | |||
2337 | /* | ||
2338 | * In order not to call set_task_cpu() on a blocking task we need | ||
2339 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||
2340 | * cpu. | ||
2341 | * | ||
2342 | * Since this is common to all placement strategies, this lives here. | ||
2343 | * | ||
2344 | * [ this allows ->select_task() to simply return task_cpu(p) and | ||
2345 | * not worry about this generic constraint ] | ||
2346 | */ | ||
2347 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | ||
2348 | !cpu_online(cpu))) | ||
2349 | cpu = select_fallback_rq(task_cpu(p), p); | ||
2350 | |||
2351 | return cpu; | ||
2324 | } | 2352 | } |
2325 | #endif | 2353 | #endif |
2326 | 2354 | ||
@@ -2375,6 +2403,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2375 | if (task_contributes_to_load(p)) | 2403 | if (task_contributes_to_load(p)) |
2376 | rq->nr_uninterruptible--; | 2404 | rq->nr_uninterruptible--; |
2377 | p->state = TASK_WAKING; | 2405 | p->state = TASK_WAKING; |
2406 | |||
2407 | if (p->sched_class->task_waking) | ||
2408 | p->sched_class->task_waking(rq, p); | ||
2409 | |||
2378 | __task_rq_unlock(rq); | 2410 | __task_rq_unlock(rq); |
2379 | 2411 | ||
2380 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2412 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
@@ -2438,8 +2470,8 @@ out_running: | |||
2438 | 2470 | ||
2439 | p->state = TASK_RUNNING; | 2471 | p->state = TASK_RUNNING; |
2440 | #ifdef CONFIG_SMP | 2472 | #ifdef CONFIG_SMP |
2441 | if (p->sched_class->task_wake_up) | 2473 | if (p->sched_class->task_woken) |
2442 | p->sched_class->task_wake_up(rq, p); | 2474 | p->sched_class->task_woken(rq, p); |
2443 | 2475 | ||
2444 | if (unlikely(rq->idle_stamp)) { | 2476 | if (unlikely(rq->idle_stamp)) { |
2445 | u64 delta = rq->clock - rq->idle_stamp; | 2477 | u64 delta = rq->clock - rq->idle_stamp; |
@@ -2538,14 +2570,6 @@ static void __sched_fork(struct task_struct *p) | |||
2538 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2570 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2539 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2571 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
2540 | #endif | 2572 | #endif |
2541 | |||
2542 | /* | ||
2543 | * We mark the process as running here, but have not actually | ||
2544 | * inserted it onto the runqueue yet. This guarantees that | ||
2545 | * nobody will actually run it, and a signal or other external | ||
2546 | * event cannot wake it up and insert it on the runqueue either. | ||
2547 | */ | ||
2548 | p->state = TASK_RUNNING; | ||
2549 | } | 2573 | } |
2550 | 2574 | ||
2551 | /* | 2575 | /* |
@@ -2556,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2556 | int cpu = get_cpu(); | 2580 | int cpu = get_cpu(); |
2557 | 2581 | ||
2558 | __sched_fork(p); | 2582 | __sched_fork(p); |
2583 | /* | ||
2584 | * We mark the process as waking here. This guarantees that | ||
2585 | * nobody will actually run it, and a signal or other external | ||
2586 | * event cannot wake it up and insert it on the runqueue either. | ||
2587 | */ | ||
2588 | p->state = TASK_WAKING; | ||
2559 | 2589 | ||
2560 | /* | 2590 | /* |
2561 | * Revert to default priority/policy on fork if requested. | 2591 | * Revert to default priority/policy on fork if requested. |
@@ -2624,14 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2624 | struct rq *rq; | 2654 | struct rq *rq; |
2625 | 2655 | ||
2626 | rq = task_rq_lock(p, &flags); | 2656 | rq = task_rq_lock(p, &flags); |
2627 | BUG_ON(p->state != TASK_RUNNING); | 2657 | BUG_ON(p->state != TASK_WAKING); |
2658 | p->state = TASK_RUNNING; | ||
2628 | update_rq_clock(rq); | 2659 | update_rq_clock(rq); |
2629 | activate_task(rq, p, 0); | 2660 | activate_task(rq, p, 0); |
2630 | trace_sched_wakeup_new(rq, p, 1); | 2661 | trace_sched_wakeup_new(rq, p, 1); |
2631 | check_preempt_curr(rq, p, WF_FORK); | 2662 | check_preempt_curr(rq, p, WF_FORK); |
2632 | #ifdef CONFIG_SMP | 2663 | #ifdef CONFIG_SMP |
2633 | if (p->sched_class->task_wake_up) | 2664 | if (p->sched_class->task_woken) |
2634 | p->sched_class->task_wake_up(rq, p); | 2665 | p->sched_class->task_woken(rq, p); |
2635 | #endif | 2666 | #endif |
2636 | task_rq_unlock(rq, &flags); | 2667 | task_rq_unlock(rq, &flags); |
2637 | } | 2668 | } |
@@ -3101,21 +3132,36 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
3101 | } | 3132 | } |
3102 | 3133 | ||
3103 | /* | 3134 | /* |
3104 | * If dest_cpu is allowed for this process, migrate the task to it. | 3135 | * sched_exec - execve() is a valuable balancing opportunity, because at |
3105 | * This is accomplished by forcing the cpu_allowed mask to only | 3136 | * this point the task has the smallest effective memory and cache footprint. |
3106 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | ||
3107 | * the cpu_allowed mask is restored. | ||
3108 | */ | 3137 | */ |
3109 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) | 3138 | void sched_exec(void) |
3110 | { | 3139 | { |
3140 | struct task_struct *p = current; | ||
3111 | struct migration_req req; | 3141 | struct migration_req req; |
3142 | int dest_cpu, this_cpu; | ||
3112 | unsigned long flags; | 3143 | unsigned long flags; |
3113 | struct rq *rq; | 3144 | struct rq *rq; |
3114 | 3145 | ||
3146 | again: | ||
3147 | this_cpu = get_cpu(); | ||
3148 | dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); | ||
3149 | if (dest_cpu == this_cpu) { | ||
3150 | put_cpu(); | ||
3151 | return; | ||
3152 | } | ||
3153 | |||
3115 | rq = task_rq_lock(p, &flags); | 3154 | rq = task_rq_lock(p, &flags); |
3155 | put_cpu(); | ||
3156 | |||
3157 | /* | ||
3158 | * select_task_rq() can race against ->cpus_allowed | ||
3159 | */ | ||
3116 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) | 3160 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) |
3117 | || unlikely(!cpu_active(dest_cpu))) | 3161 | || unlikely(!cpu_active(dest_cpu))) { |
3118 | goto out; | 3162 | task_rq_unlock(rq, &flags); |
3163 | goto again; | ||
3164 | } | ||
3119 | 3165 | ||
3120 | /* force the process onto the specified CPU */ | 3166 | /* force the process onto the specified CPU */ |
3121 | if (migrate_task(p, dest_cpu, &req)) { | 3167 | if (migrate_task(p, dest_cpu, &req)) { |
@@ -3130,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
3130 | 3176 | ||
3131 | return; | 3177 | return; |
3132 | } | 3178 | } |
3133 | out: | ||
3134 | task_rq_unlock(rq, &flags); | 3179 | task_rq_unlock(rq, &flags); |
3135 | } | 3180 | } |
3136 | 3181 | ||
3137 | /* | 3182 | /* |
3138 | * sched_exec - execve() is a valuable balancing opportunity, because at | ||
3139 | * this point the task has the smallest effective memory and cache footprint. | ||
3140 | */ | ||
3141 | void sched_exec(void) | ||
3142 | { | ||
3143 | int new_cpu, this_cpu = get_cpu(); | ||
3144 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); | ||
3145 | put_cpu(); | ||
3146 | if (new_cpu != this_cpu) | ||
3147 | sched_migrate_task(current, new_cpu); | ||
3148 | } | ||
3149 | |||
3150 | /* | ||
3151 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3183 | * pull_task - move a task from a remote runqueue to the local runqueue. |
3152 | * Both runqueues must be locked. | 3184 | * Both runqueues must be locked. |
3153 | */ | 3185 | */ |
@@ -5911,14 +5943,15 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
5911 | */ | 5943 | */ |
5912 | bool try_wait_for_completion(struct completion *x) | 5944 | bool try_wait_for_completion(struct completion *x) |
5913 | { | 5945 | { |
5946 | unsigned long flags; | ||
5914 | int ret = 1; | 5947 | int ret = 1; |
5915 | 5948 | ||
5916 | spin_lock_irq(&x->wait.lock); | 5949 | spin_lock_irqsave(&x->wait.lock, flags); |
5917 | if (!x->done) | 5950 | if (!x->done) |
5918 | ret = 0; | 5951 | ret = 0; |
5919 | else | 5952 | else |
5920 | x->done--; | 5953 | x->done--; |
5921 | spin_unlock_irq(&x->wait.lock); | 5954 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5922 | return ret; | 5955 | return ret; |
5923 | } | 5956 | } |
5924 | EXPORT_SYMBOL(try_wait_for_completion); | 5957 | EXPORT_SYMBOL(try_wait_for_completion); |
@@ -5933,12 +5966,13 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
5933 | */ | 5966 | */ |
5934 | bool completion_done(struct completion *x) | 5967 | bool completion_done(struct completion *x) |
5935 | { | 5968 | { |
5969 | unsigned long flags; | ||
5936 | int ret = 1; | 5970 | int ret = 1; |
5937 | 5971 | ||
5938 | spin_lock_irq(&x->wait.lock); | 5972 | spin_lock_irqsave(&x->wait.lock, flags); |
5939 | if (!x->done) | 5973 | if (!x->done) |
5940 | ret = 0; | 5974 | ret = 0; |
5941 | spin_unlock_irq(&x->wait.lock); | 5975 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5942 | return ret; | 5976 | return ret; |
5943 | } | 5977 | } |
5944 | EXPORT_SYMBOL(completion_done); | 5978 | EXPORT_SYMBOL(completion_done); |
@@ -6457,7 +6491,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6457 | return -EINVAL; | 6491 | return -EINVAL; |
6458 | 6492 | ||
6459 | retval = -ESRCH; | 6493 | retval = -ESRCH; |
6460 | read_lock(&tasklist_lock); | 6494 | rcu_read_lock(); |
6461 | p = find_process_by_pid(pid); | 6495 | p = find_process_by_pid(pid); |
6462 | if (p) { | 6496 | if (p) { |
6463 | retval = security_task_getscheduler(p); | 6497 | retval = security_task_getscheduler(p); |
@@ -6465,7 +6499,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6465 | retval = p->policy | 6499 | retval = p->policy |
6466 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); | 6500 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); |
6467 | } | 6501 | } |
6468 | read_unlock(&tasklist_lock); | 6502 | rcu_read_unlock(); |
6469 | return retval; | 6503 | return retval; |
6470 | } | 6504 | } |
6471 | 6505 | ||
@@ -6483,7 +6517,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6483 | if (!param || pid < 0) | 6517 | if (!param || pid < 0) |
6484 | return -EINVAL; | 6518 | return -EINVAL; |
6485 | 6519 | ||
6486 | read_lock(&tasklist_lock); | 6520 | rcu_read_lock(); |
6487 | p = find_process_by_pid(pid); | 6521 | p = find_process_by_pid(pid); |
6488 | retval = -ESRCH; | 6522 | retval = -ESRCH; |
6489 | if (!p) | 6523 | if (!p) |
@@ -6494,7 +6528,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6494 | goto out_unlock; | 6528 | goto out_unlock; |
6495 | 6529 | ||
6496 | lp.sched_priority = p->rt_priority; | 6530 | lp.sched_priority = p->rt_priority; |
6497 | read_unlock(&tasklist_lock); | 6531 | rcu_read_unlock(); |
6498 | 6532 | ||
6499 | /* | 6533 | /* |
6500 | * This one might sleep, we cannot do it with a spinlock held ... | 6534 | * This one might sleep, we cannot do it with a spinlock held ... |
@@ -6504,7 +6538,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6504 | return retval; | 6538 | return retval; |
6505 | 6539 | ||
6506 | out_unlock: | 6540 | out_unlock: |
6507 | read_unlock(&tasklist_lock); | 6541 | rcu_read_unlock(); |
6508 | return retval; | 6542 | return retval; |
6509 | } | 6543 | } |
6510 | 6544 | ||
@@ -6515,22 +6549,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
6515 | int retval; | 6549 | int retval; |
6516 | 6550 | ||
6517 | get_online_cpus(); | 6551 | get_online_cpus(); |
6518 | read_lock(&tasklist_lock); | 6552 | rcu_read_lock(); |
6519 | 6553 | ||
6520 | p = find_process_by_pid(pid); | 6554 | p = find_process_by_pid(pid); |
6521 | if (!p) { | 6555 | if (!p) { |
6522 | read_unlock(&tasklist_lock); | 6556 | rcu_read_unlock(); |
6523 | put_online_cpus(); | 6557 | put_online_cpus(); |
6524 | return -ESRCH; | 6558 | return -ESRCH; |
6525 | } | 6559 | } |
6526 | 6560 | ||
6527 | /* | 6561 | /* Prevent p going away */ |
6528 | * It is not safe to call set_cpus_allowed with the | ||
6529 | * tasklist_lock held. We will bump the task_struct's | ||
6530 | * usage count and then drop tasklist_lock. | ||
6531 | */ | ||
6532 | get_task_struct(p); | 6562 | get_task_struct(p); |
6533 | read_unlock(&tasklist_lock); | 6563 | rcu_read_unlock(); |
6534 | 6564 | ||
6535 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 6565 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
6536 | retval = -ENOMEM; | 6566 | retval = -ENOMEM; |
@@ -6616,7 +6646,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6616 | int retval; | 6646 | int retval; |
6617 | 6647 | ||
6618 | get_online_cpus(); | 6648 | get_online_cpus(); |
6619 | read_lock(&tasklist_lock); | 6649 | rcu_read_lock(); |
6620 | 6650 | ||
6621 | retval = -ESRCH; | 6651 | retval = -ESRCH; |
6622 | p = find_process_by_pid(pid); | 6652 | p = find_process_by_pid(pid); |
@@ -6632,7 +6662,7 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6632 | task_rq_unlock(rq, &flags); | 6662 | task_rq_unlock(rq, &flags); |
6633 | 6663 | ||
6634 | out_unlock: | 6664 | out_unlock: |
6635 | read_unlock(&tasklist_lock); | 6665 | rcu_read_unlock(); |
6636 | put_online_cpus(); | 6666 | put_online_cpus(); |
6637 | 6667 | ||
6638 | return retval; | 6668 | return retval; |
@@ -6876,7 +6906,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6876 | return -EINVAL; | 6906 | return -EINVAL; |
6877 | 6907 | ||
6878 | retval = -ESRCH; | 6908 | retval = -ESRCH; |
6879 | read_lock(&tasklist_lock); | 6909 | rcu_read_lock(); |
6880 | p = find_process_by_pid(pid); | 6910 | p = find_process_by_pid(pid); |
6881 | if (!p) | 6911 | if (!p) |
6882 | goto out_unlock; | 6912 | goto out_unlock; |
@@ -6889,13 +6919,13 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6889 | time_slice = p->sched_class->get_rr_interval(rq, p); | 6919 | time_slice = p->sched_class->get_rr_interval(rq, p); |
6890 | task_rq_unlock(rq, &flags); | 6920 | task_rq_unlock(rq, &flags); |
6891 | 6921 | ||
6892 | read_unlock(&tasklist_lock); | 6922 | rcu_read_unlock(); |
6893 | jiffies_to_timespec(time_slice, &t); | 6923 | jiffies_to_timespec(time_slice, &t); |
6894 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 6924 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
6895 | return retval; | 6925 | return retval; |
6896 | 6926 | ||
6897 | out_unlock: | 6927 | out_unlock: |
6898 | read_unlock(&tasklist_lock); | 6928 | rcu_read_unlock(); |
6899 | return retval; | 6929 | return retval; |
6900 | } | 6930 | } |
6901 | 6931 | ||
@@ -6986,6 +7016,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6986 | raw_spin_lock_irqsave(&rq->lock, flags); | 7016 | raw_spin_lock_irqsave(&rq->lock, flags); |
6987 | 7017 | ||
6988 | __sched_fork(idle); | 7018 | __sched_fork(idle); |
7019 | idle->state = TASK_RUNNING; | ||
6989 | idle->se.exec_start = sched_clock(); | 7020 | idle->se.exec_start = sched_clock(); |
6990 | 7021 | ||
6991 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 7022 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
@@ -7100,7 +7131,23 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7100 | struct rq *rq; | 7131 | struct rq *rq; |
7101 | int ret = 0; | 7132 | int ret = 0; |
7102 | 7133 | ||
7134 | /* | ||
7135 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7136 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7137 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7138 | * TASK_WAKING to avoid that. | ||
7139 | */ | ||
7140 | again: | ||
7141 | while (p->state == TASK_WAKING) | ||
7142 | cpu_relax(); | ||
7143 | |||
7103 | rq = task_rq_lock(p, &flags); | 7144 | rq = task_rq_lock(p, &flags); |
7145 | |||
7146 | if (p->state == TASK_WAKING) { | ||
7147 | task_rq_unlock(rq, &flags); | ||
7148 | goto again; | ||
7149 | } | ||
7150 | |||
7104 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7151 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7105 | ret = -EINVAL; | 7152 | ret = -EINVAL; |
7106 | goto out; | 7153 | goto out; |
@@ -7156,7 +7203,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
7156 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 7203 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
7157 | { | 7204 | { |
7158 | struct rq *rq_dest, *rq_src; | 7205 | struct rq *rq_dest, *rq_src; |
7159 | int ret = 0, on_rq; | 7206 | int ret = 0; |
7160 | 7207 | ||
7161 | if (unlikely(!cpu_active(dest_cpu))) | 7208 | if (unlikely(!cpu_active(dest_cpu))) |
7162 | return ret; | 7209 | return ret; |
@@ -7172,12 +7219,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7172 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7219 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7173 | goto fail; | 7220 | goto fail; |
7174 | 7221 | ||
7175 | on_rq = p->se.on_rq; | 7222 | /* |
7176 | if (on_rq) | 7223 | * If we're not on a rq, the next wake-up will ensure we're |
7224 | * placed properly. | ||
7225 | */ | ||
7226 | if (p->se.on_rq) { | ||
7177 | deactivate_task(rq_src, p, 0); | 7227 | deactivate_task(rq_src, p, 0); |
7178 | 7228 | set_task_cpu(p, dest_cpu); | |
7179 | set_task_cpu(p, dest_cpu); | ||
7180 | if (on_rq) { | ||
7181 | activate_task(rq_dest, p, 0); | 7229 | activate_task(rq_dest, p, 0); |
7182 | check_preempt_curr(rq_dest, p, 0); | 7230 | check_preempt_curr(rq_dest, p, 0); |
7183 | } | 7231 | } |
@@ -7273,37 +7321,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7273 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 7321 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
7274 | { | 7322 | { |
7275 | int dest_cpu; | 7323 | int dest_cpu; |
7276 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); | ||
7277 | 7324 | ||
7278 | again: | 7325 | again: |
7279 | /* Look for allowed, online CPU in same node. */ | 7326 | dest_cpu = select_fallback_rq(dead_cpu, p); |
7280 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
7281 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
7282 | goto move; | ||
7283 | |||
7284 | /* Any allowed, online CPU? */ | ||
7285 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
7286 | if (dest_cpu < nr_cpu_ids) | ||
7287 | goto move; | ||
7288 | |||
7289 | /* No more Mr. Nice Guy. */ | ||
7290 | if (dest_cpu >= nr_cpu_ids) { | ||
7291 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
7292 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
7293 | |||
7294 | /* | ||
7295 | * Don't tell them about moving exiting tasks or | ||
7296 | * kernel threads (both mm NULL), since they never | ||
7297 | * leave kernel. | ||
7298 | */ | ||
7299 | if (p->mm && printk_ratelimit()) { | ||
7300 | printk(KERN_INFO "process %d (%s) no " | ||
7301 | "longer affine to cpu%d\n", | ||
7302 | task_pid_nr(p), p->comm, dead_cpu); | ||
7303 | } | ||
7304 | } | ||
7305 | 7327 | ||
7306 | move: | ||
7307 | /* It can have affinity changed while we were choosing. */ | 7328 | /* It can have affinity changed while we were choosing. */ |
7308 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) | 7329 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) |
7309 | goto again; | 7330 | goto again; |
@@ -9668,7 +9689,7 @@ void __init sched_init(void) | |||
9668 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 9689 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
9669 | static inline int preempt_count_equals(int preempt_offset) | 9690 | static inline int preempt_count_equals(int preempt_offset) |
9670 | { | 9691 | { |
9671 | int nested = preempt_count() & ~PREEMPT_ACTIVE; | 9692 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
9672 | 9693 | ||
9673 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 9694 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); |
9674 | } | 9695 | } |
@@ -10083,7 +10104,7 @@ void sched_move_task(struct task_struct *tsk) | |||
10083 | 10104 | ||
10084 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10105 | #ifdef CONFIG_FAIR_GROUP_SCHED |
10085 | if (tsk->sched_class->moved_group) | 10106 | if (tsk->sched_class->moved_group) |
10086 | tsk->sched_class->moved_group(tsk); | 10107 | tsk->sched_class->moved_group(tsk, on_rq); |
10087 | #endif | 10108 | #endif |
10088 | 10109 | ||
10089 | if (unlikely(running)) | 10110 | if (unlikely(running)) |