aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2011-05-20 16:10:22 -0400
committerDavid S. Miller <davem@davemloft.net>2011-05-20 16:10:22 -0400
commit90d3ac15e5c637d45849e83c828ed78c62886737 (patch)
treec5568365f32386559d2710e8981ed41e5fe0eb12 /kernel/sched.c
parent9fafbd806198eb690c9a9f9fe35a879db93a1b8d (diff)
parent317f394160e9beb97d19a84c39b7e5eb3d7815a8 (diff)
Merge commit '317f394160e9beb97d19a84c39b7e5eb3d7815a8'
Conflicts: arch/sparc/kernel/smp_32.c With merge conflict help from Daniel Hellstrom. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c627
1 files changed, 349 insertions, 278 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 312f8b95c2d4..8c9d804dc07d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -312,6 +312,9 @@ struct cfs_rq {
312 312
313 u64 exec_clock; 313 u64 exec_clock;
314 u64 min_vruntime; 314 u64 min_vruntime;
315#ifndef CONFIG_64BIT
316 u64 min_vruntime_copy;
317#endif
315 318
316 struct rb_root tasks_timeline; 319 struct rb_root tasks_timeline;
317 struct rb_node *rb_leftmost; 320 struct rb_node *rb_leftmost;
@@ -553,6 +556,10 @@ struct rq {
553 unsigned int ttwu_count; 556 unsigned int ttwu_count;
554 unsigned int ttwu_local; 557 unsigned int ttwu_local;
555#endif 558#endif
559
560#ifdef CONFIG_SMP
561 struct task_struct *wake_list;
562#endif
556}; 563};
557 564
558static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 565static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -596,7 +603,7 @@ static inline int cpu_of(struct rq *rq)
596 * Return the group to which this tasks belongs. 603 * Return the group to which this tasks belongs.
597 * 604 *
598 * We use task_subsys_state_check() and extend the RCU verification 605 * We use task_subsys_state_check() and extend the RCU verification
599 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() 606 * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
600 * holds that lock for each task it moves into the cgroup. Therefore 607 * holds that lock for each task it moves into the cgroup. Therefore
601 * by holding that lock, we pin the task to the current cgroup. 608 * by holding that lock, we pin the task to the current cgroup.
602 */ 609 */
@@ -606,7 +613,7 @@ static inline struct task_group *task_group(struct task_struct *p)
606 struct cgroup_subsys_state *css; 613 struct cgroup_subsys_state *css;
607 614
608 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 615 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
609 lockdep_is_held(&task_rq(p)->lock)); 616 lockdep_is_held(&p->pi_lock));
610 tg = container_of(css, struct task_group, css); 617 tg = container_of(css, struct task_group, css);
611 618
612 return autogroup_task_group(p, tg); 619 return autogroup_task_group(p, tg);
@@ -838,18 +845,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
838 return rq->curr == p; 845 return rq->curr == p;
839} 846}
840 847
841#ifndef __ARCH_WANT_UNLOCKED_CTXSW
842static inline int task_running(struct rq *rq, struct task_struct *p) 848static inline int task_running(struct rq *rq, struct task_struct *p)
843{ 849{
850#ifdef CONFIG_SMP
851 return p->on_cpu;
852#else
844 return task_current(rq, p); 853 return task_current(rq, p);
854#endif
845} 855}
846 856
857#ifndef __ARCH_WANT_UNLOCKED_CTXSW
847static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 858static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
848{ 859{
860#ifdef CONFIG_SMP
861 /*
862 * We can optimise this out completely for !SMP, because the
863 * SMP rebalancing from interrupt is the only thing that cares
864 * here.
865 */
866 next->on_cpu = 1;
867#endif
849} 868}
850 869
851static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) 870static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
852{ 871{
872#ifdef CONFIG_SMP
873 /*
874 * After ->on_cpu is cleared, the task can be moved to a different CPU.
875 * We must ensure this doesn't happen until the switch is completely
876 * finished.
877 */
878 smp_wmb();
879 prev->on_cpu = 0;
880#endif
853#ifdef CONFIG_DEBUG_SPINLOCK 881#ifdef CONFIG_DEBUG_SPINLOCK
854 /* this is a valid case when another task releases the spinlock */ 882 /* this is a valid case when another task releases the spinlock */
855 rq->lock.owner = current; 883 rq->lock.owner = current;
@@ -865,15 +893,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
865} 893}
866 894
867#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 895#else /* __ARCH_WANT_UNLOCKED_CTXSW */
868static inline int task_running(struct rq *rq, struct task_struct *p)
869{
870#ifdef CONFIG_SMP
871 return p->oncpu;
872#else
873 return task_current(rq, p);
874#endif
875}
876
877static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 896static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
878{ 897{
879#ifdef CONFIG_SMP 898#ifdef CONFIG_SMP
@@ -882,7 +901,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
882 * SMP rebalancing from interrupt is the only thing that cares 901 * SMP rebalancing from interrupt is the only thing that cares
883 * here. 902 * here.
884 */ 903 */
885 next->oncpu = 1; 904 next->on_cpu = 1;
886#endif 905#endif
887#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 906#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
888 raw_spin_unlock_irq(&rq->lock); 907 raw_spin_unlock_irq(&rq->lock);
@@ -895,12 +914,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
895{ 914{
896#ifdef CONFIG_SMP 915#ifdef CONFIG_SMP
897 /* 916 /*
898 * After ->oncpu is cleared, the task can be moved to a different CPU. 917 * After ->on_cpu is cleared, the task can be moved to a different CPU.
899 * We must ensure this doesn't happen until the switch is completely 918 * We must ensure this doesn't happen until the switch is completely
900 * finished. 919 * finished.
901 */ 920 */
902 smp_wmb(); 921 smp_wmb();
903 prev->oncpu = 0; 922 prev->on_cpu = 0;
904#endif 923#endif
905#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW 924#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
906 local_irq_enable(); 925 local_irq_enable();
@@ -909,23 +928,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
909#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 928#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
910 929
911/* 930/*
912 * Check whether the task is waking, we use this to synchronize ->cpus_allowed 931 * __task_rq_lock - lock the rq @p resides on.
913 * against ttwu().
914 */
915static inline int task_is_waking(struct task_struct *p)
916{
917 return unlikely(p->state == TASK_WAKING);
918}
919
920/*
921 * __task_rq_lock - lock the runqueue a given task resides on.
922 * Must be called interrupts disabled.
923 */ 932 */
924static inline struct rq *__task_rq_lock(struct task_struct *p) 933static inline struct rq *__task_rq_lock(struct task_struct *p)
925 __acquires(rq->lock) 934 __acquires(rq->lock)
926{ 935{
927 struct rq *rq; 936 struct rq *rq;
928 937
938 lockdep_assert_held(&p->pi_lock);
939
929 for (;;) { 940 for (;;) {
930 rq = task_rq(p); 941 rq = task_rq(p);
931 raw_spin_lock(&rq->lock); 942 raw_spin_lock(&rq->lock);
@@ -936,22 +947,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
936} 947}
937 948
938/* 949/*
939 * task_rq_lock - lock the runqueue a given task resides on and disable 950 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
940 * interrupts. Note the ordering: we can safely lookup the task_rq without
941 * explicitly disabling preemption.
942 */ 951 */
943static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) 952static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
953 __acquires(p->pi_lock)
944 __acquires(rq->lock) 954 __acquires(rq->lock)
945{ 955{
946 struct rq *rq; 956 struct rq *rq;
947 957
948 for (;;) { 958 for (;;) {
949 local_irq_save(*flags); 959 raw_spin_lock_irqsave(&p->pi_lock, *flags);
950 rq = task_rq(p); 960 rq = task_rq(p);
951 raw_spin_lock(&rq->lock); 961 raw_spin_lock(&rq->lock);
952 if (likely(rq == task_rq(p))) 962 if (likely(rq == task_rq(p)))
953 return rq; 963 return rq;
954 raw_spin_unlock_irqrestore(&rq->lock, *flags); 964 raw_spin_unlock(&rq->lock);
965 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
955 } 966 }
956} 967}
957 968
@@ -961,10 +972,13 @@ static void __task_rq_unlock(struct rq *rq)
961 raw_spin_unlock(&rq->lock); 972 raw_spin_unlock(&rq->lock);
962} 973}
963 974
964static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 975static inline void
976task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
965 __releases(rq->lock) 977 __releases(rq->lock)
978 __releases(p->pi_lock)
966{ 979{
967 raw_spin_unlock_irqrestore(&rq->lock, *flags); 980 raw_spin_unlock(&rq->lock);
981 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
968} 982}
969 983
970/* 984/*
@@ -1773,7 +1787,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
1773 update_rq_clock(rq); 1787 update_rq_clock(rq);
1774 sched_info_queued(p); 1788 sched_info_queued(p);
1775 p->sched_class->enqueue_task(rq, p, flags); 1789 p->sched_class->enqueue_task(rq, p, flags);
1776 p->se.on_rq = 1;
1777} 1790}
1778 1791
1779static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) 1792static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1781,7 +1794,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
1781 update_rq_clock(rq); 1794 update_rq_clock(rq);
1782 sched_info_dequeued(p); 1795 sched_info_dequeued(p);
1783 p->sched_class->dequeue_task(rq, p, flags); 1796 p->sched_class->dequeue_task(rq, p, flags);
1784 p->se.on_rq = 0;
1785} 1797}
1786 1798
1787/* 1799/*
@@ -2116,7 +2128,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2116 * A queue event has occurred, and we're going to schedule. In 2128 * A queue event has occurred, and we're going to schedule. In
2117 * this case, we can save a useless back to back clock update. 2129 * this case, we can save a useless back to back clock update.
2118 */ 2130 */
2119 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) 2131 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2120 rq->skip_clock_update = 1; 2132 rq->skip_clock_update = 1;
2121} 2133}
2122 2134
@@ -2162,6 +2174,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2162 */ 2174 */
2163 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && 2175 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2164 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); 2176 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2177
2178#ifdef CONFIG_LOCKDEP
2179 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
2180 lockdep_is_held(&task_rq(p)->lock)));
2181#endif
2165#endif 2182#endif
2166 2183
2167 trace_sched_migrate_task(p, new_cpu); 2184 trace_sched_migrate_task(p, new_cpu);
@@ -2185,13 +2202,15 @@ static int migration_cpu_stop(void *data);
2185 * The task's runqueue lock must be held. 2202 * The task's runqueue lock must be held.
2186 * Returns true if you have to wait for migration thread. 2203 * Returns true if you have to wait for migration thread.
2187 */ 2204 */
2188static bool migrate_task(struct task_struct *p, struct rq *rq) 2205static bool need_migrate_task(struct task_struct *p)
2189{ 2206{
2190 /* 2207 /*
2191 * If the task is not on a runqueue (and not running), then 2208 * If the task is not on a runqueue (and not running), then
2192 * the next wake-up will properly place the task. 2209 * the next wake-up will properly place the task.
2193 */ 2210 */
2194 return p->se.on_rq || task_running(rq, p); 2211 bool running = p->on_rq || p->on_cpu;
2212 smp_rmb(); /* finish_lock_switch() */
2213 return running;
2195} 2214}
2196 2215
2197/* 2216/*
@@ -2251,11 +2270,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2251 rq = task_rq_lock(p, &flags); 2270 rq = task_rq_lock(p, &flags);
2252 trace_sched_wait_task(p); 2271 trace_sched_wait_task(p);
2253 running = task_running(rq, p); 2272 running = task_running(rq, p);
2254 on_rq = p->se.on_rq; 2273 on_rq = p->on_rq;
2255 ncsw = 0; 2274 ncsw = 0;
2256 if (!match_state || p->state == match_state) 2275 if (!match_state || p->state == match_state)
2257 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 2276 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2258 task_rq_unlock(rq, &flags); 2277 task_rq_unlock(rq, p, &flags);
2259 2278
2260 /* 2279 /*
2261 * If it changed from the expected state, bail out now. 2280 * If it changed from the expected state, bail out now.
@@ -2330,7 +2349,7 @@ EXPORT_SYMBOL_GPL(kick_process);
2330 2349
2331#ifdef CONFIG_SMP 2350#ifdef CONFIG_SMP
2332/* 2351/*
2333 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. 2352 * ->cpus_allowed is protected by both rq->lock and p->pi_lock
2334 */ 2353 */
2335static int select_fallback_rq(int cpu, struct task_struct *p) 2354static int select_fallback_rq(int cpu, struct task_struct *p)
2336{ 2355{
@@ -2363,12 +2382,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2363} 2382}
2364 2383
2365/* 2384/*
2366 * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. 2385 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
2367 */ 2386 */
2368static inline 2387static inline
2369int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) 2388int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2370{ 2389{
2371 int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); 2390 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2372 2391
2373 /* 2392 /*
2374 * In order not to call set_task_cpu() on a blocking task we need 2393 * In order not to call set_task_cpu() on a blocking task we need
@@ -2394,27 +2413,60 @@ static void update_avg(u64 *avg, u64 sample)
2394} 2413}
2395#endif 2414#endif
2396 2415
2397static inline void ttwu_activate(struct task_struct *p, struct rq *rq, 2416static void
2398 bool is_sync, bool is_migrate, bool is_local, 2417ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
2399 unsigned long en_flags)
2400{ 2418{
2419#ifdef CONFIG_SCHEDSTATS
2420 struct rq *rq = this_rq();
2421
2422#ifdef CONFIG_SMP
2423 int this_cpu = smp_processor_id();
2424
2425 if (cpu == this_cpu) {
2426 schedstat_inc(rq, ttwu_local);
2427 schedstat_inc(p, se.statistics.nr_wakeups_local);
2428 } else {
2429 struct sched_domain *sd;
2430
2431 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2432 for_each_domain(this_cpu, sd) {
2433 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2434 schedstat_inc(sd, ttwu_wake_remote);
2435 break;
2436 }
2437 }
2438 }
2439#endif /* CONFIG_SMP */
2440
2441 schedstat_inc(rq, ttwu_count);
2401 schedstat_inc(p, se.statistics.nr_wakeups); 2442 schedstat_inc(p, se.statistics.nr_wakeups);
2402 if (is_sync) 2443
2444 if (wake_flags & WF_SYNC)
2403 schedstat_inc(p, se.statistics.nr_wakeups_sync); 2445 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2404 if (is_migrate) 2446
2447 if (cpu != task_cpu(p))
2405 schedstat_inc(p, se.statistics.nr_wakeups_migrate); 2448 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2406 if (is_local)
2407 schedstat_inc(p, se.statistics.nr_wakeups_local);
2408 else
2409 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2410 2449
2450#endif /* CONFIG_SCHEDSTATS */
2451}
2452
2453static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
2454{
2411 activate_task(rq, p, en_flags); 2455 activate_task(rq, p, en_flags);
2456 p->on_rq = 1;
2457
2458 /* if a worker is waking up, notify workqueue */
2459 if (p->flags & PF_WQ_WORKER)
2460 wq_worker_waking_up(p, cpu_of(rq));
2412} 2461}
2413 2462
2414static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, 2463/*
2415 int wake_flags, bool success) 2464 * Mark the task runnable and perform wakeup-preemption.
2465 */
2466static void
2467ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
2416{ 2468{
2417 trace_sched_wakeup(p, success); 2469 trace_sched_wakeup(p, true);
2418 check_preempt_curr(rq, p, wake_flags); 2470 check_preempt_curr(rq, p, wake_flags);
2419 2471
2420 p->state = TASK_RUNNING; 2472 p->state = TASK_RUNNING;
@@ -2433,9 +2485,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2433 rq->idle_stamp = 0; 2485 rq->idle_stamp = 0;
2434 } 2486 }
2435#endif 2487#endif
2436 /* if a worker is waking up, notify workqueue */ 2488}
2437 if ((p->flags & PF_WQ_WORKER) && success) 2489
2438 wq_worker_waking_up(p, cpu_of(rq)); 2490static void
2491ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
2492{
2493#ifdef CONFIG_SMP
2494 if (p->sched_contributes_to_load)
2495 rq->nr_uninterruptible--;
2496#endif
2497
2498 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
2499 ttwu_do_wakeup(rq, p, wake_flags);
2500}
2501
2502/*
2503 * Called in case the task @p isn't fully descheduled from its runqueue,
2504 * in this case we must do a remote wakeup. Its a 'light' wakeup though,
2505 * since all we need to do is flip p->state to TASK_RUNNING, since
2506 * the task is still ->on_rq.
2507 */
2508static int ttwu_remote(struct task_struct *p, int wake_flags)
2509{
2510 struct rq *rq;
2511 int ret = 0;
2512
2513 rq = __task_rq_lock(p);
2514 if (p->on_rq) {
2515 ttwu_do_wakeup(rq, p, wake_flags);
2516 ret = 1;
2517 }
2518 __task_rq_unlock(rq);
2519
2520 return ret;
2521}
2522
2523#ifdef CONFIG_SMP
2524static void sched_ttwu_pending(void)
2525{
2526 struct rq *rq = this_rq();
2527 struct task_struct *list = xchg(&rq->wake_list, NULL);
2528
2529 if (!list)
2530 return;
2531
2532 raw_spin_lock(&rq->lock);
2533
2534 while (list) {
2535 struct task_struct *p = list;
2536 list = list->wake_entry;
2537 ttwu_do_activate(rq, p, 0);
2538 }
2539
2540 raw_spin_unlock(&rq->lock);
2541}
2542
2543void scheduler_ipi(void)
2544{
2545 sched_ttwu_pending();
2546}
2547
2548static void ttwu_queue_remote(struct task_struct *p, int cpu)
2549{
2550 struct rq *rq = cpu_rq(cpu);
2551 struct task_struct *next = rq->wake_list;
2552
2553 for (;;) {
2554 struct task_struct *old = next;
2555
2556 p->wake_entry = next;
2557 next = cmpxchg(&rq->wake_list, old, p);
2558 if (next == old)
2559 break;
2560 }
2561
2562 if (!next)
2563 smp_send_reschedule(cpu);
2564}
2565#endif
2566
2567static void ttwu_queue(struct task_struct *p, int cpu)
2568{
2569 struct rq *rq = cpu_rq(cpu);
2570
2571#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
2572 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2573 ttwu_queue_remote(p, cpu);
2574 return;
2575 }
2576#endif
2577
2578 raw_spin_lock(&rq->lock);
2579 ttwu_do_activate(rq, p, 0);
2580 raw_spin_unlock(&rq->lock);
2439} 2581}
2440 2582
2441/** 2583/**
@@ -2453,92 +2595,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2453 * Returns %true if @p was woken up, %false if it was already running 2595 * Returns %true if @p was woken up, %false if it was already running
2454 * or @state didn't match @p's state. 2596 * or @state didn't match @p's state.
2455 */ 2597 */
2456static int try_to_wake_up(struct task_struct *p, unsigned int state, 2598static int
2457 int wake_flags) 2599try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2458{ 2600{
2459 int cpu, orig_cpu, this_cpu, success = 0;
2460 unsigned long flags; 2601 unsigned long flags;
2461 unsigned long en_flags = ENQUEUE_WAKEUP; 2602 int cpu, success = 0;
2462 struct rq *rq;
2463
2464 this_cpu = get_cpu();
2465 2603
2466 smp_wmb(); 2604 smp_wmb();
2467 rq = task_rq_lock(p, &flags); 2605 raw_spin_lock_irqsave(&p->pi_lock, flags);
2468 if (!(p->state & state)) 2606 if (!(p->state & state))
2469 goto out; 2607 goto out;
2470 2608
2471 if (p->se.on_rq) 2609 success = 1; /* we're going to change ->state */
2472 goto out_running;
2473
2474 cpu = task_cpu(p); 2610 cpu = task_cpu(p);
2475 orig_cpu = cpu;
2476 2611
2477#ifdef CONFIG_SMP 2612 if (p->on_rq && ttwu_remote(p, wake_flags))
2478 if (unlikely(task_running(rq, p))) 2613 goto stat;
2479 goto out_activate;
2480 2614
2615#ifdef CONFIG_SMP
2481 /* 2616 /*
2482 * In order to handle concurrent wakeups and release the rq->lock 2617 * If the owning (remote) cpu is still in the middle of schedule() with
2483 * we put the task in TASK_WAKING state. 2618 * this task as prev, wait until its done referencing the task.
2484 *
2485 * First fix up the nr_uninterruptible count:
2486 */ 2619 */
2487 if (task_contributes_to_load(p)) { 2620 while (p->on_cpu) {
2488 if (likely(cpu_online(orig_cpu))) 2621#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2489 rq->nr_uninterruptible--; 2622 /*
2490 else 2623 * If called from interrupt context we could have landed in the
2491 this_rq()->nr_uninterruptible--; 2624 * middle of schedule(), in this case we should take care not
2492 } 2625 * to spin on ->on_cpu if p is current, since that would
2493 p->state = TASK_WAKING; 2626 * deadlock.
2494 2627 */
2495 if (p->sched_class->task_waking) { 2628 if (p == current) {
2496 p->sched_class->task_waking(rq, p); 2629 ttwu_queue(p, cpu);
2497 en_flags |= ENQUEUE_WAKING; 2630 goto stat;
2631 }
2632#endif
2633 cpu_relax();
2498 } 2634 }
2499
2500 cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
2501 if (cpu != orig_cpu)
2502 set_task_cpu(p, cpu);
2503 __task_rq_unlock(rq);
2504
2505 rq = cpu_rq(cpu);
2506 raw_spin_lock(&rq->lock);
2507
2508 /* 2635 /*
2509 * We migrated the task without holding either rq->lock, however 2636 * Pairs with the smp_wmb() in finish_lock_switch().
2510 * since the task is not on the task list itself, nobody else
2511 * will try and migrate the task, hence the rq should match the
2512 * cpu we just moved it to.
2513 */ 2637 */
2514 WARN_ON(task_cpu(p) != cpu); 2638 smp_rmb();
2515 WARN_ON(p->state != TASK_WAKING);
2516 2639
2517#ifdef CONFIG_SCHEDSTATS 2640 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2518 schedstat_inc(rq, ttwu_count); 2641 p->state = TASK_WAKING;
2519 if (cpu == this_cpu) 2642
2520 schedstat_inc(rq, ttwu_local); 2643 if (p->sched_class->task_waking)
2521 else { 2644 p->sched_class->task_waking(p);
2522 struct sched_domain *sd;
2523 for_each_domain(this_cpu, sd) {
2524 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2525 schedstat_inc(sd, ttwu_wake_remote);
2526 break;
2527 }
2528 }
2529 }
2530#endif /* CONFIG_SCHEDSTATS */
2531 2645
2532out_activate: 2646 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2647 if (task_cpu(p) != cpu)
2648 set_task_cpu(p, cpu);
2533#endif /* CONFIG_SMP */ 2649#endif /* CONFIG_SMP */
2534 ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, 2650
2535 cpu == this_cpu, en_flags); 2651 ttwu_queue(p, cpu);
2536 success = 1; 2652stat:
2537out_running: 2653 ttwu_stat(p, cpu, wake_flags);
2538 ttwu_post_activation(p, rq, wake_flags, success);
2539out: 2654out:
2540 task_rq_unlock(rq, &flags); 2655 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2541 put_cpu();
2542 2656
2543 return success; 2657 return success;
2544} 2658}
@@ -2547,31 +2661,34 @@ out:
2547 * try_to_wake_up_local - try to wake up a local task with rq lock held 2661 * try_to_wake_up_local - try to wake up a local task with rq lock held
2548 * @p: the thread to be awakened 2662 * @p: the thread to be awakened
2549 * 2663 *
2550 * Put @p on the run-queue if it's not already there. The caller must 2664 * Put @p on the run-queue if it's not already there. The caller must
2551 * ensure that this_rq() is locked, @p is bound to this_rq() and not 2665 * ensure that this_rq() is locked, @p is bound to this_rq() and not
2552 * the current task. this_rq() stays locked over invocation. 2666 * the current task.
2553 */ 2667 */
2554static void try_to_wake_up_local(struct task_struct *p) 2668static void try_to_wake_up_local(struct task_struct *p)
2555{ 2669{
2556 struct rq *rq = task_rq(p); 2670 struct rq *rq = task_rq(p);
2557 bool success = false;
2558 2671
2559 BUG_ON(rq != this_rq()); 2672 BUG_ON(rq != this_rq());
2560 BUG_ON(p == current); 2673 BUG_ON(p == current);
2561 lockdep_assert_held(&rq->lock); 2674 lockdep_assert_held(&rq->lock);
2562 2675
2676 if (!raw_spin_trylock(&p->pi_lock)) {
2677 raw_spin_unlock(&rq->lock);
2678 raw_spin_lock(&p->pi_lock);
2679 raw_spin_lock(&rq->lock);
2680 }
2681
2563 if (!(p->state & TASK_NORMAL)) 2682 if (!(p->state & TASK_NORMAL))
2564 return; 2683 goto out;
2565 2684
2566 if (!p->se.on_rq) { 2685 if (!p->on_rq)
2567 if (likely(!task_running(rq, p))) { 2686 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2568 schedstat_inc(rq, ttwu_count); 2687
2569 schedstat_inc(rq, ttwu_local); 2688 ttwu_do_wakeup(rq, p, 0);
2570 } 2689 ttwu_stat(p, smp_processor_id(), 0);
2571 ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); 2690out:
2572 success = true; 2691 raw_spin_unlock(&p->pi_lock);
2573 }
2574 ttwu_post_activation(p, rq, 0, success);
2575} 2692}
2576 2693
2577/** 2694/**
@@ -2604,19 +2721,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
2604 */ 2721 */
2605static void __sched_fork(struct task_struct *p) 2722static void __sched_fork(struct task_struct *p)
2606{ 2723{
2724 p->on_rq = 0;
2725
2726 p->se.on_rq = 0;
2607 p->se.exec_start = 0; 2727 p->se.exec_start = 0;
2608 p->se.sum_exec_runtime = 0; 2728 p->se.sum_exec_runtime = 0;
2609 p->se.prev_sum_exec_runtime = 0; 2729 p->se.prev_sum_exec_runtime = 0;
2610 p->se.nr_migrations = 0; 2730 p->se.nr_migrations = 0;
2611 p->se.vruntime = 0; 2731 p->se.vruntime = 0;
2732 INIT_LIST_HEAD(&p->se.group_node);
2612 2733
2613#ifdef CONFIG_SCHEDSTATS 2734#ifdef CONFIG_SCHEDSTATS
2614 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2735 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2615#endif 2736#endif
2616 2737
2617 INIT_LIST_HEAD(&p->rt.run_list); 2738 INIT_LIST_HEAD(&p->rt.run_list);
2618 p->se.on_rq = 0;
2619 INIT_LIST_HEAD(&p->se.group_node);
2620 2739
2621#ifdef CONFIG_PREEMPT_NOTIFIERS 2740#ifdef CONFIG_PREEMPT_NOTIFIERS
2622 INIT_HLIST_HEAD(&p->preempt_notifiers); 2741 INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2628,6 +2747,7 @@ static void __sched_fork(struct task_struct *p)
2628 */ 2747 */
2629void sched_fork(struct task_struct *p, int clone_flags) 2748void sched_fork(struct task_struct *p, int clone_flags)
2630{ 2749{
2750 unsigned long flags;
2631 int cpu = get_cpu(); 2751 int cpu = get_cpu();
2632 2752
2633 __sched_fork(p); 2753 __sched_fork(p);
@@ -2678,16 +2798,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2678 * 2798 *
2679 * Silence PROVE_RCU. 2799 * Silence PROVE_RCU.
2680 */ 2800 */
2681 rcu_read_lock(); 2801 raw_spin_lock_irqsave(&p->pi_lock, flags);
2682 set_task_cpu(p, cpu); 2802 set_task_cpu(p, cpu);
2683 rcu_read_unlock(); 2803 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2684 2804
2685#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2805#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2686 if (likely(sched_info_on())) 2806 if (likely(sched_info_on()))
2687 memset(&p->sched_info, 0, sizeof(p->sched_info)); 2807 memset(&p->sched_info, 0, sizeof(p->sched_info));
2688#endif 2808#endif
2689#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 2809#if defined(CONFIG_SMP)
2690 p->oncpu = 0; 2810 p->on_cpu = 0;
2691#endif 2811#endif
2692#ifdef CONFIG_PREEMPT 2812#ifdef CONFIG_PREEMPT
2693 /* Want to start with kernel preemption disabled. */ 2813 /* Want to start with kernel preemption disabled. */
@@ -2711,37 +2831,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2711{ 2831{
2712 unsigned long flags; 2832 unsigned long flags;
2713 struct rq *rq; 2833 struct rq *rq;
2714 int cpu __maybe_unused = get_cpu();
2715 2834
2835 raw_spin_lock_irqsave(&p->pi_lock, flags);
2716#ifdef CONFIG_SMP 2836#ifdef CONFIG_SMP
2717 rq = task_rq_lock(p, &flags);
2718 p->state = TASK_WAKING;
2719
2720 /* 2837 /*
2721 * Fork balancing, do it here and not earlier because: 2838 * Fork balancing, do it here and not earlier because:
2722 * - cpus_allowed can change in the fork path 2839 * - cpus_allowed can change in the fork path
2723 * - any previously selected cpu might disappear through hotplug 2840 * - any previously selected cpu might disappear through hotplug
2724 *
2725 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
2726 * without people poking at ->cpus_allowed.
2727 */ 2841 */
2728 cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); 2842 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
2729 set_task_cpu(p, cpu);
2730
2731 p->state = TASK_RUNNING;
2732 task_rq_unlock(rq, &flags);
2733#endif 2843#endif
2734 2844
2735 rq = task_rq_lock(p, &flags); 2845 rq = __task_rq_lock(p);
2736 activate_task(rq, p, 0); 2846 activate_task(rq, p, 0);
2737 trace_sched_wakeup_new(p, 1); 2847 p->on_rq = 1;
2848 trace_sched_wakeup_new(p, true);
2738 check_preempt_curr(rq, p, WF_FORK); 2849 check_preempt_curr(rq, p, WF_FORK);
2739#ifdef CONFIG_SMP 2850#ifdef CONFIG_SMP
2740 if (p->sched_class->task_woken) 2851 if (p->sched_class->task_woken)
2741 p->sched_class->task_woken(rq, p); 2852 p->sched_class->task_woken(rq, p);
2742#endif 2853#endif
2743 task_rq_unlock(rq, &flags); 2854 task_rq_unlock(rq, p, &flags);
2744 put_cpu();
2745} 2855}
2746 2856
2747#ifdef CONFIG_PREEMPT_NOTIFIERS 2857#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3450,27 +3560,22 @@ void sched_exec(void)
3450{ 3560{
3451 struct task_struct *p = current; 3561 struct task_struct *p = current;
3452 unsigned long flags; 3562 unsigned long flags;
3453 struct rq *rq;
3454 int dest_cpu; 3563 int dest_cpu;
3455 3564
3456 rq = task_rq_lock(p, &flags); 3565 raw_spin_lock_irqsave(&p->pi_lock, flags);
3457 dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); 3566 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
3458 if (dest_cpu == smp_processor_id()) 3567 if (dest_cpu == smp_processor_id())
3459 goto unlock; 3568 goto unlock;
3460 3569
3461 /* 3570 if (likely(cpu_active(dest_cpu))) {
3462 * select_task_rq() can race against ->cpus_allowed
3463 */
3464 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
3465 likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
3466 struct migration_arg arg = { p, dest_cpu }; 3571 struct migration_arg arg = { p, dest_cpu };
3467 3572
3468 task_rq_unlock(rq, &flags); 3573 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3469 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 3574 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
3470 return; 3575 return;
3471 } 3576 }
3472unlock: 3577unlock:
3473 task_rq_unlock(rq, &flags); 3578 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3474} 3579}
3475 3580
3476#endif 3581#endif
@@ -3507,7 +3612,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
3507 3612
3508 rq = task_rq_lock(p, &flags); 3613 rq = task_rq_lock(p, &flags);
3509 ns = do_task_delta_exec(p, rq); 3614 ns = do_task_delta_exec(p, rq);
3510 task_rq_unlock(rq, &flags); 3615 task_rq_unlock(rq, p, &flags);
3511 3616
3512 return ns; 3617 return ns;
3513} 3618}
@@ -3525,7 +3630,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3525 3630
3526 rq = task_rq_lock(p, &flags); 3631 rq = task_rq_lock(p, &flags);
3527 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 3632 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
3528 task_rq_unlock(rq, &flags); 3633 task_rq_unlock(rq, p, &flags);
3529 3634
3530 return ns; 3635 return ns;
3531} 3636}
@@ -3549,7 +3654,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
3549 rq = task_rq_lock(p, &flags); 3654 rq = task_rq_lock(p, &flags);
3550 thread_group_cputime(p, &totals); 3655 thread_group_cputime(p, &totals);
3551 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); 3656 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
3552 task_rq_unlock(rq, &flags); 3657 task_rq_unlock(rq, p, &flags);
3553 3658
3554 return ns; 3659 return ns;
3555} 3660}
@@ -4035,7 +4140,7 @@ static inline void schedule_debug(struct task_struct *prev)
4035 4140
4036static void put_prev_task(struct rq *rq, struct task_struct *prev) 4141static void put_prev_task(struct rq *rq, struct task_struct *prev)
4037{ 4142{
4038 if (prev->se.on_rq) 4143 if (prev->on_rq)
4039 update_rq_clock(rq); 4144 update_rq_clock(rq);
4040 prev->sched_class->put_prev_task(rq, prev); 4145 prev->sched_class->put_prev_task(rq, prev);
4041} 4146}
@@ -4097,11 +4202,13 @@ need_resched:
4097 if (unlikely(signal_pending_state(prev->state, prev))) { 4202 if (unlikely(signal_pending_state(prev->state, prev))) {
4098 prev->state = TASK_RUNNING; 4203 prev->state = TASK_RUNNING;
4099 } else { 4204 } else {
4205 deactivate_task(rq, prev, DEQUEUE_SLEEP);
4206 prev->on_rq = 0;
4207
4100 /* 4208 /*
4101 * If a worker is going to sleep, notify and 4209 * If a worker went to sleep, notify and ask workqueue
4102 * ask workqueue whether it wants to wake up a 4210 * whether it wants to wake up a task to maintain
4103 * task to maintain concurrency. If so, wake 4211 * concurrency.
4104 * up the task.
4105 */ 4212 */
4106 if (prev->flags & PF_WQ_WORKER) { 4213 if (prev->flags & PF_WQ_WORKER) {
4107 struct task_struct *to_wakeup; 4214 struct task_struct *to_wakeup;
@@ -4110,11 +4217,10 @@ need_resched:
4110 if (to_wakeup) 4217 if (to_wakeup)
4111 try_to_wake_up_local(to_wakeup); 4218 try_to_wake_up_local(to_wakeup);
4112 } 4219 }
4113 deactivate_task(rq, prev, DEQUEUE_SLEEP);
4114 4220
4115 /* 4221 /*
4116 * If we are going to sleep and we have plugged IO queued, make 4222 * If we are going to sleep and we have plugged IO
4117 * sure to submit it to avoid deadlocks. 4223 * queued, make sure to submit it to avoid deadlocks.
4118 */ 4224 */
4119 if (blk_needs_flush_plug(prev)) { 4225 if (blk_needs_flush_plug(prev)) {
4120 raw_spin_unlock(&rq->lock); 4226 raw_spin_unlock(&rq->lock);
@@ -4161,70 +4267,53 @@ need_resched:
4161EXPORT_SYMBOL(schedule); 4267EXPORT_SYMBOL(schedule);
4162 4268
4163#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4269#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4164/*
4165 * Look out! "owner" is an entirely speculative pointer
4166 * access and not reliable.
4167 */
4168int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4169{
4170 unsigned int cpu;
4171 struct rq *rq;
4172 4270
4173 if (!sched_feat(OWNER_SPIN)) 4271static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4174 return 0; 4272{
4273 bool ret = false;
4175 4274
4176#ifdef CONFIG_DEBUG_PAGEALLOC 4275 rcu_read_lock();
4177 /* 4276 if (lock->owner != owner)
4178 * Need to access the cpu field knowing that 4277 goto fail;
4179 * DEBUG_PAGEALLOC could have unmapped it if
4180 * the mutex owner just released it and exited.
4181 */
4182 if (probe_kernel_address(&owner->cpu, cpu))
4183 return 0;
4184#else
4185 cpu = owner->cpu;
4186#endif
4187 4278
4188 /* 4279 /*
4189 * Even if the access succeeded (likely case), 4280 * Ensure we emit the owner->on_cpu, dereference _after_ checking
4190 * the cpu field may no longer be valid. 4281 * lock->owner still matches owner, if that fails, owner might
4282 * point to free()d memory, if it still matches, the rcu_read_lock()
4283 * ensures the memory stays valid.
4191 */ 4284 */
4192 if (cpu >= nr_cpumask_bits) 4285 barrier();
4193 return 0;
4194 4286
4195 /* 4287 ret = owner->on_cpu;
4196 * We need to validate that we can do a 4288fail:
4197 * get_cpu() and that we have the percpu area. 4289 rcu_read_unlock();
4198 */
4199 if (!cpu_online(cpu))
4200 return 0;
4201 4290
4202 rq = cpu_rq(cpu); 4291 return ret;
4292}
4203 4293
4204 for (;;) { 4294/*
4205 /* 4295 * Look out! "owner" is an entirely speculative pointer
4206 * Owner changed, break to re-assess state. 4296 * access and not reliable.
4207 */ 4297 */
4208 if (lock->owner != owner) { 4298int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
4209 /* 4299{
4210 * If the lock has switched to a different owner, 4300 if (!sched_feat(OWNER_SPIN))
4211 * we likely have heavy contention. Return 0 to quit 4301 return 0;
4212 * optimistic spinning and not contend further:
4213 */
4214 if (lock->owner)
4215 return 0;
4216 break;
4217 }
4218 4302
4219 /* 4303 while (owner_running(lock, owner)) {
4220 * Is that owner really running on that cpu? 4304 if (need_resched())
4221 */
4222 if (task_thread_info(rq->curr) != owner || need_resched())
4223 return 0; 4305 return 0;
4224 4306
4225 arch_mutex_cpu_relax(); 4307 arch_mutex_cpu_relax();
4226 } 4308 }
4227 4309
4310 /*
4311 * If the owner changed to another task there is likely
4312 * heavy contention, stop spinning.
4313 */
4314 if (lock->owner)
4315 return 0;
4316
4228 return 1; 4317 return 1;
4229} 4318}
4230#endif 4319#endif
@@ -4684,19 +4773,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
4684 */ 4773 */
4685void rt_mutex_setprio(struct task_struct *p, int prio) 4774void rt_mutex_setprio(struct task_struct *p, int prio)
4686{ 4775{
4687 unsigned long flags;
4688 int oldprio, on_rq, running; 4776 int oldprio, on_rq, running;
4689 struct rq *rq; 4777 struct rq *rq;
4690 const struct sched_class *prev_class; 4778 const struct sched_class *prev_class;
4691 4779
4692 BUG_ON(prio < 0 || prio > MAX_PRIO); 4780 BUG_ON(prio < 0 || prio > MAX_PRIO);
4693 4781
4694 rq = task_rq_lock(p, &flags); 4782 rq = __task_rq_lock(p);
4695 4783
4696 trace_sched_pi_setprio(p, prio); 4784 trace_sched_pi_setprio(p, prio);
4697 oldprio = p->prio; 4785 oldprio = p->prio;
4698 prev_class = p->sched_class; 4786 prev_class = p->sched_class;
4699 on_rq = p->se.on_rq; 4787 on_rq = p->on_rq;
4700 running = task_current(rq, p); 4788 running = task_current(rq, p);
4701 if (on_rq) 4789 if (on_rq)
4702 dequeue_task(rq, p, 0); 4790 dequeue_task(rq, p, 0);
@@ -4716,7 +4804,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4716 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4804 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4717 4805
4718 check_class_changed(rq, p, prev_class, oldprio); 4806 check_class_changed(rq, p, prev_class, oldprio);
4719 task_rq_unlock(rq, &flags); 4807 __task_rq_unlock(rq);
4720} 4808}
4721 4809
4722#endif 4810#endif
@@ -4744,7 +4832,7 @@ void set_user_nice(struct task_struct *p, long nice)
4744 p->static_prio = NICE_TO_PRIO(nice); 4832 p->static_prio = NICE_TO_PRIO(nice);
4745 goto out_unlock; 4833 goto out_unlock;
4746 } 4834 }
4747 on_rq = p->se.on_rq; 4835 on_rq = p->on_rq;
4748 if (on_rq) 4836 if (on_rq)
4749 dequeue_task(rq, p, 0); 4837 dequeue_task(rq, p, 0);
4750 4838
@@ -4764,7 +4852,7 @@ void set_user_nice(struct task_struct *p, long nice)
4764 resched_task(rq->curr); 4852 resched_task(rq->curr);
4765 } 4853 }
4766out_unlock: 4854out_unlock:
4767 task_rq_unlock(rq, &flags); 4855 task_rq_unlock(rq, p, &flags);
4768} 4856}
4769EXPORT_SYMBOL(set_user_nice); 4857EXPORT_SYMBOL(set_user_nice);
4770 4858
@@ -4878,8 +4966,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
4878static void 4966static void
4879__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) 4967__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4880{ 4968{
4881 BUG_ON(p->se.on_rq);
4882
4883 p->policy = policy; 4969 p->policy = policy;
4884 p->rt_priority = prio; 4970 p->rt_priority = prio;
4885 p->normal_prio = normal_prio(p); 4971 p->normal_prio = normal_prio(p);
@@ -4994,20 +5080,17 @@ recheck:
4994 /* 5080 /*
4995 * make sure no PI-waiters arrive (or leave) while we are 5081 * make sure no PI-waiters arrive (or leave) while we are
4996 * changing the priority of the task: 5082 * changing the priority of the task:
4997 */ 5083 *
4998 raw_spin_lock_irqsave(&p->pi_lock, flags);
4999 /*
5000 * To be able to change p->policy safely, the appropriate 5084 * To be able to change p->policy safely, the appropriate
5001 * runqueue lock must be held. 5085 * runqueue lock must be held.
5002 */ 5086 */
5003 rq = __task_rq_lock(p); 5087 rq = task_rq_lock(p, &flags);
5004 5088
5005 /* 5089 /*
5006 * Changing the policy of the stop threads its a very bad idea 5090 * Changing the policy of the stop threads its a very bad idea
5007 */ 5091 */
5008 if (p == rq->stop) { 5092 if (p == rq->stop) {
5009 __task_rq_unlock(rq); 5093 task_rq_unlock(rq, p, &flags);
5010 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5011 return -EINVAL; 5094 return -EINVAL;
5012 } 5095 }
5013 5096
@@ -5031,8 +5114,7 @@ recheck:
5031 if (rt_bandwidth_enabled() && rt_policy(policy) && 5114 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5032 task_group(p)->rt_bandwidth.rt_runtime == 0 && 5115 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5033 !task_group_is_autogroup(task_group(p))) { 5116 !task_group_is_autogroup(task_group(p))) {
5034 __task_rq_unlock(rq); 5117 task_rq_unlock(rq, p, &flags);
5035 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5036 return -EPERM; 5118 return -EPERM;
5037 } 5119 }
5038 } 5120 }
@@ -5041,11 +5123,10 @@ recheck:
5041 /* recheck policy now with rq lock held */ 5123 /* recheck policy now with rq lock held */
5042 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 5124 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
5043 policy = oldpolicy = -1; 5125 policy = oldpolicy = -1;
5044 __task_rq_unlock(rq); 5126 task_rq_unlock(rq, p, &flags);
5045 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5046 goto recheck; 5127 goto recheck;
5047 } 5128 }
5048 on_rq = p->se.on_rq; 5129 on_rq = p->on_rq;
5049 running = task_current(rq, p); 5130 running = task_current(rq, p);
5050 if (on_rq) 5131 if (on_rq)
5051 deactivate_task(rq, p, 0); 5132 deactivate_task(rq, p, 0);
@@ -5064,8 +5145,7 @@ recheck:
5064 activate_task(rq, p, 0); 5145 activate_task(rq, p, 0);
5065 5146
5066 check_class_changed(rq, p, prev_class, oldprio); 5147 check_class_changed(rq, p, prev_class, oldprio);
5067 __task_rq_unlock(rq); 5148 task_rq_unlock(rq, p, &flags);
5068 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5069 5149
5070 rt_mutex_adjust_pi(p); 5150 rt_mutex_adjust_pi(p);
5071 5151
@@ -5316,7 +5396,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5316{ 5396{
5317 struct task_struct *p; 5397 struct task_struct *p;
5318 unsigned long flags; 5398 unsigned long flags;
5319 struct rq *rq;
5320 int retval; 5399 int retval;
5321 5400
5322 get_online_cpus(); 5401 get_online_cpus();
@@ -5331,9 +5410,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5331 if (retval) 5410 if (retval)
5332 goto out_unlock; 5411 goto out_unlock;
5333 5412
5334 rq = task_rq_lock(p, &flags); 5413 raw_spin_lock_irqsave(&p->pi_lock, flags);
5335 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 5414 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5336 task_rq_unlock(rq, &flags); 5415 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5337 5416
5338out_unlock: 5417out_unlock:
5339 rcu_read_unlock(); 5418 rcu_read_unlock();
@@ -5658,7 +5737,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5658 5737
5659 rq = task_rq_lock(p, &flags); 5738 rq = task_rq_lock(p, &flags);
5660 time_slice = p->sched_class->get_rr_interval(rq, p); 5739 time_slice = p->sched_class->get_rr_interval(rq, p);
5661 task_rq_unlock(rq, &flags); 5740 task_rq_unlock(rq, p, &flags);
5662 5741
5663 rcu_read_unlock(); 5742 rcu_read_unlock();
5664 jiffies_to_timespec(time_slice, &t); 5743 jiffies_to_timespec(time_slice, &t);
@@ -5776,8 +5855,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5776 rcu_read_unlock(); 5855 rcu_read_unlock();
5777 5856
5778 rq->curr = rq->idle = idle; 5857 rq->curr = rq->idle = idle;
5779#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5858#if defined(CONFIG_SMP)
5780 idle->oncpu = 1; 5859 idle->on_cpu = 1;
5781#endif 5860#endif
5782 raw_spin_unlock_irqrestore(&rq->lock, flags); 5861 raw_spin_unlock_irqrestore(&rq->lock, flags);
5783 5862
@@ -5881,18 +5960,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5881 unsigned int dest_cpu; 5960 unsigned int dest_cpu;
5882 int ret = 0; 5961 int ret = 0;
5883 5962
5884 /*
5885 * Serialize against TASK_WAKING so that ttwu() and wunt() can
5886 * drop the rq->lock and still rely on ->cpus_allowed.
5887 */
5888again:
5889 while (task_is_waking(p))
5890 cpu_relax();
5891 rq = task_rq_lock(p, &flags); 5963 rq = task_rq_lock(p, &flags);
5892 if (task_is_waking(p)) {
5893 task_rq_unlock(rq, &flags);
5894 goto again;
5895 }
5896 5964
5897 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 5965 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5898 ret = -EINVAL; 5966 ret = -EINVAL;
@@ -5917,16 +5985,16 @@ again:
5917 goto out; 5985 goto out;
5918 5986
5919 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); 5987 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5920 if (migrate_task(p, rq)) { 5988 if (need_migrate_task(p)) {
5921 struct migration_arg arg = { p, dest_cpu }; 5989 struct migration_arg arg = { p, dest_cpu };
5922 /* Need help from migration thread: drop lock and wait. */ 5990 /* Need help from migration thread: drop lock and wait. */
5923 task_rq_unlock(rq, &flags); 5991 task_rq_unlock(rq, p, &flags);
5924 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 5992 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5925 tlb_migrate_finish(p->mm); 5993 tlb_migrate_finish(p->mm);
5926 return 0; 5994 return 0;
5927 } 5995 }
5928out: 5996out:
5929 task_rq_unlock(rq, &flags); 5997 task_rq_unlock(rq, p, &flags);
5930 5998
5931 return ret; 5999 return ret;
5932} 6000}
@@ -5954,6 +6022,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5954 rq_src = cpu_rq(src_cpu); 6022 rq_src = cpu_rq(src_cpu);
5955 rq_dest = cpu_rq(dest_cpu); 6023 rq_dest = cpu_rq(dest_cpu);
5956 6024
6025 raw_spin_lock(&p->pi_lock);
5957 double_rq_lock(rq_src, rq_dest); 6026 double_rq_lock(rq_src, rq_dest);
5958 /* Already moved. */ 6027 /* Already moved. */
5959 if (task_cpu(p) != src_cpu) 6028 if (task_cpu(p) != src_cpu)
@@ -5966,7 +6035,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5966 * If we're not on a rq, the next wake-up will ensure we're 6035 * If we're not on a rq, the next wake-up will ensure we're
5967 * placed properly. 6036 * placed properly.
5968 */ 6037 */
5969 if (p->se.on_rq) { 6038 if (p->on_rq) {
5970 deactivate_task(rq_src, p, 0); 6039 deactivate_task(rq_src, p, 0);
5971 set_task_cpu(p, dest_cpu); 6040 set_task_cpu(p, dest_cpu);
5972 activate_task(rq_dest, p, 0); 6041 activate_task(rq_dest, p, 0);
@@ -5976,6 +6045,7 @@ done:
5976 ret = 1; 6045 ret = 1;
5977fail: 6046fail:
5978 double_rq_unlock(rq_src, rq_dest); 6047 double_rq_unlock(rq_src, rq_dest);
6048 raw_spin_unlock(&p->pi_lock);
5979 return ret; 6049 return ret;
5980} 6050}
5981 6051
@@ -6316,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6316 6386
6317#ifdef CONFIG_HOTPLUG_CPU 6387#ifdef CONFIG_HOTPLUG_CPU
6318 case CPU_DYING: 6388 case CPU_DYING:
6389 sched_ttwu_pending();
6319 /* Update our root-domain */ 6390 /* Update our root-domain */
6320 raw_spin_lock_irqsave(&rq->lock, flags); 6391 raw_spin_lock_irqsave(&rq->lock, flags);
6321 if (rq->rd) { 6392 if (rq->rd) {
@@ -8340,7 +8411,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
8340 int old_prio = p->prio; 8411 int old_prio = p->prio;
8341 int on_rq; 8412 int on_rq;
8342 8413
8343 on_rq = p->se.on_rq; 8414 on_rq = p->on_rq;
8344 if (on_rq) 8415 if (on_rq)
8345 deactivate_task(rq, p, 0); 8416 deactivate_task(rq, p, 0);
8346 __setscheduler(rq, p, SCHED_NORMAL, 0); 8417 __setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8683,7 +8754,7 @@ void sched_move_task(struct task_struct *tsk)
8683 rq = task_rq_lock(tsk, &flags); 8754 rq = task_rq_lock(tsk, &flags);
8684 8755
8685 running = task_current(rq, tsk); 8756 running = task_current(rq, tsk);
8686 on_rq = tsk->se.on_rq; 8757 on_rq = tsk->on_rq;
8687 8758
8688 if (on_rq) 8759 if (on_rq)
8689 dequeue_task(rq, tsk, 0); 8760 dequeue_task(rq, tsk, 0);
@@ -8702,7 +8773,7 @@ void sched_move_task(struct task_struct *tsk)
8702 if (on_rq) 8773 if (on_rq)
8703 enqueue_task(rq, tsk, 0); 8774 enqueue_task(rq, tsk, 0);
8704 8775
8705 task_rq_unlock(rq, &flags); 8776 task_rq_unlock(rq, tsk, &flags);
8706} 8777}
8707#endif /* CONFIG_CGROUP_SCHED */ 8778#endif /* CONFIG_CGROUP_SCHED */
8708 8779