aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-18 08:53:18 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-18 08:53:33 -0400
commit6ddafdaab3f809b110ada253d2f2d4910ebd3ac5 (patch)
tree366bb7513511a05b6e11ab89bfe3b2dbd1d62a03 /kernel/sched.c
parent3905c54f2bd2c6f937f87307987ca072eabc3e7b (diff)
parentbd8e7dded88a3e1c085c333f19ff31387616f71a (diff)
Merge branch 'sched/locking' into sched/core
Merge reason: the rq locking changes are stable, propagate them into the .40 queue. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c650
1 files changed, 353 insertions, 297 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 506cb8147c70..0cfe0310ed5d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -312,6 +312,9 @@ struct cfs_rq {
312 312
313 u64 exec_clock; 313 u64 exec_clock;
314 u64 min_vruntime; 314 u64 min_vruntime;
315#ifndef CONFIG_64BIT
316 u64 min_vruntime_copy;
317#endif
315 318
316 struct rb_root tasks_timeline; 319 struct rb_root tasks_timeline;
317 struct rb_node *rb_leftmost; 320 struct rb_node *rb_leftmost;
@@ -554,6 +557,10 @@ struct rq {
554 unsigned int ttwu_count; 557 unsigned int ttwu_count;
555 unsigned int ttwu_local; 558 unsigned int ttwu_local;
556#endif 559#endif
560
561#ifdef CONFIG_SMP
562 struct task_struct *wake_list;
563#endif
557}; 564};
558 565
559static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 566static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -597,7 +604,7 @@ static inline int cpu_of(struct rq *rq)
597 * Return the group to which this tasks belongs. 604 * Return the group to which this tasks belongs.
598 * 605 *
599 * We use task_subsys_state_check() and extend the RCU verification 606 * We use task_subsys_state_check() and extend the RCU verification
600 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() 607 * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
601 * holds that lock for each task it moves into the cgroup. Therefore 608 * holds that lock for each task it moves into the cgroup. Therefore
602 * by holding that lock, we pin the task to the current cgroup. 609 * by holding that lock, we pin the task to the current cgroup.
603 */ 610 */
@@ -607,7 +614,7 @@ static inline struct task_group *task_group(struct task_struct *p)
607 struct cgroup_subsys_state *css; 614 struct cgroup_subsys_state *css;
608 615
609 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 616 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
610 lockdep_is_held(&task_rq(p)->lock)); 617 lockdep_is_held(&p->pi_lock));
611 tg = container_of(css, struct task_group, css); 618 tg = container_of(css, struct task_group, css);
612 619
613 return autogroup_task_group(p, tg); 620 return autogroup_task_group(p, tg);
@@ -839,18 +846,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
839 return rq->curr == p; 846 return rq->curr == p;
840} 847}
841 848
842#ifndef __ARCH_WANT_UNLOCKED_CTXSW
843static inline int task_running(struct rq *rq, struct task_struct *p) 849static inline int task_running(struct rq *rq, struct task_struct *p)
844{ 850{
851#ifdef CONFIG_SMP
852 return p->on_cpu;
853#else
845 return task_current(rq, p); 854 return task_current(rq, p);
855#endif
846} 856}
847 857
858#ifndef __ARCH_WANT_UNLOCKED_CTXSW
848static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 859static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
849{ 860{
861#ifdef CONFIG_SMP
862 /*
863 * We can optimise this out completely for !SMP, because the
864 * SMP rebalancing from interrupt is the only thing that cares
865 * here.
866 */
867 next->on_cpu = 1;
868#endif
850} 869}
851 870
852static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) 871static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
853{ 872{
873#ifdef CONFIG_SMP
874 /*
875 * After ->on_cpu is cleared, the task can be moved to a different CPU.
876 * We must ensure this doesn't happen until the switch is completely
877 * finished.
878 */
879 smp_wmb();
880 prev->on_cpu = 0;
881#endif
854#ifdef CONFIG_DEBUG_SPINLOCK 882#ifdef CONFIG_DEBUG_SPINLOCK
855 /* this is a valid case when another task releases the spinlock */ 883 /* this is a valid case when another task releases the spinlock */
856 rq->lock.owner = current; 884 rq->lock.owner = current;
@@ -866,15 +894,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
866} 894}
867 895
868#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 896#else /* __ARCH_WANT_UNLOCKED_CTXSW */
869static inline int task_running(struct rq *rq, struct task_struct *p)
870{
871#ifdef CONFIG_SMP
872 return p->oncpu;
873#else
874 return task_current(rq, p);
875#endif
876}
877
878static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 897static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
879{ 898{
880#ifdef CONFIG_SMP 899#ifdef CONFIG_SMP
@@ -883,7 +902,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
883 * SMP rebalancing from interrupt is the only thing that cares 902 * SMP rebalancing from interrupt is the only thing that cares
884 * here. 903 * here.
885 */ 904 */
886 next->oncpu = 1; 905 next->on_cpu = 1;
887#endif 906#endif
888#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 907#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
889 raw_spin_unlock_irq(&rq->lock); 908 raw_spin_unlock_irq(&rq->lock);
@@ -896,12 +915,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
896{ 915{
897#ifdef CONFIG_SMP 916#ifdef CONFIG_SMP
898 /* 917 /*
899 * After ->oncpu is cleared, the task can be moved to a different CPU. 918 * After ->on_cpu is cleared, the task can be moved to a different CPU.
900 * We must ensure this doesn't happen until the switch is completely 919 * We must ensure this doesn't happen until the switch is completely
901 * finished. 920 * finished.
902 */ 921 */
903 smp_wmb(); 922 smp_wmb();
904 prev->oncpu = 0; 923 prev->on_cpu = 0;
905#endif 924#endif
906#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW 925#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
907 local_irq_enable(); 926 local_irq_enable();
@@ -910,23 +929,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
910#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 929#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
911 930
912/* 931/*
913 * Check whether the task is waking, we use this to synchronize ->cpus_allowed 932 * __task_rq_lock - lock the rq @p resides on.
914 * against ttwu().
915 */
916static inline int task_is_waking(struct task_struct *p)
917{
918 return unlikely(p->state == TASK_WAKING);
919}
920
921/*
922 * __task_rq_lock - lock the runqueue a given task resides on.
923 * Must be called interrupts disabled.
924 */ 933 */
925static inline struct rq *__task_rq_lock(struct task_struct *p) 934static inline struct rq *__task_rq_lock(struct task_struct *p)
926 __acquires(rq->lock) 935 __acquires(rq->lock)
927{ 936{
928 struct rq *rq; 937 struct rq *rq;
929 938
939 lockdep_assert_held(&p->pi_lock);
940
930 for (;;) { 941 for (;;) {
931 rq = task_rq(p); 942 rq = task_rq(p);
932 raw_spin_lock(&rq->lock); 943 raw_spin_lock(&rq->lock);
@@ -937,22 +948,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
937} 948}
938 949
939/* 950/*
940 * task_rq_lock - lock the runqueue a given task resides on and disable 951 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
941 * interrupts. Note the ordering: we can safely lookup the task_rq without
942 * explicitly disabling preemption.
943 */ 952 */
944static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) 953static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
954 __acquires(p->pi_lock)
945 __acquires(rq->lock) 955 __acquires(rq->lock)
946{ 956{
947 struct rq *rq; 957 struct rq *rq;
948 958
949 for (;;) { 959 for (;;) {
950 local_irq_save(*flags); 960 raw_spin_lock_irqsave(&p->pi_lock, *flags);
951 rq = task_rq(p); 961 rq = task_rq(p);
952 raw_spin_lock(&rq->lock); 962 raw_spin_lock(&rq->lock);
953 if (likely(rq == task_rq(p))) 963 if (likely(rq == task_rq(p)))
954 return rq; 964 return rq;
955 raw_spin_unlock_irqrestore(&rq->lock, *flags); 965 raw_spin_unlock(&rq->lock);
966 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
956 } 967 }
957} 968}
958 969
@@ -962,10 +973,13 @@ static void __task_rq_unlock(struct rq *rq)
962 raw_spin_unlock(&rq->lock); 973 raw_spin_unlock(&rq->lock);
963} 974}
964 975
965static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 976static inline void
977task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
966 __releases(rq->lock) 978 __releases(rq->lock)
979 __releases(p->pi_lock)
967{ 980{
968 raw_spin_unlock_irqrestore(&rq->lock, *flags); 981 raw_spin_unlock(&rq->lock);
982 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
969} 983}
970 984
971/* 985/*
@@ -1774,7 +1788,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
1774 update_rq_clock(rq); 1788 update_rq_clock(rq);
1775 sched_info_queued(p); 1789 sched_info_queued(p);
1776 p->sched_class->enqueue_task(rq, p, flags); 1790 p->sched_class->enqueue_task(rq, p, flags);
1777 p->se.on_rq = 1;
1778} 1791}
1779 1792
1780static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) 1793static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1782,7 +1795,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
1782 update_rq_clock(rq); 1795 update_rq_clock(rq);
1783 sched_info_dequeued(p); 1796 sched_info_dequeued(p);
1784 p->sched_class->dequeue_task(rq, p, flags); 1797 p->sched_class->dequeue_task(rq, p, flags);
1785 p->se.on_rq = 0;
1786} 1798}
1787 1799
1788/* 1800/*
@@ -2117,7 +2129,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2117 * A queue event has occurred, and we're going to schedule. In 2129 * A queue event has occurred, and we're going to schedule. In
2118 * this case, we can save a useless back to back clock update. 2130 * this case, we can save a useless back to back clock update.
2119 */ 2131 */
2120 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) 2132 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2121 rq->skip_clock_update = 1; 2133 rq->skip_clock_update = 1;
2122} 2134}
2123 2135
@@ -2163,6 +2175,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2163 */ 2175 */
2164 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && 2176 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2165 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); 2177 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2178
2179#ifdef CONFIG_LOCKDEP
2180 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
2181 lockdep_is_held(&task_rq(p)->lock)));
2182#endif
2166#endif 2183#endif
2167 2184
2168 trace_sched_migrate_task(p, new_cpu); 2185 trace_sched_migrate_task(p, new_cpu);
@@ -2183,19 +2200,6 @@ struct migration_arg {
2183static int migration_cpu_stop(void *data); 2200static int migration_cpu_stop(void *data);
2184 2201
2185/* 2202/*
2186 * The task's runqueue lock must be held.
2187 * Returns true if you have to wait for migration thread.
2188 */
2189static bool migrate_task(struct task_struct *p, struct rq *rq)
2190{
2191 /*
2192 * If the task is not on a runqueue (and not running), then
2193 * the next wake-up will properly place the task.
2194 */
2195 return p->se.on_rq || task_running(rq, p);
2196}
2197
2198/*
2199 * wait_task_inactive - wait for a thread to unschedule. 2203 * wait_task_inactive - wait for a thread to unschedule.
2200 * 2204 *
2201 * If @match_state is nonzero, it's the @p->state value just checked and 2205 * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2252,11 +2256,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2252 rq = task_rq_lock(p, &flags); 2256 rq = task_rq_lock(p, &flags);
2253 trace_sched_wait_task(p); 2257 trace_sched_wait_task(p);
2254 running = task_running(rq, p); 2258 running = task_running(rq, p);
2255 on_rq = p->se.on_rq; 2259 on_rq = p->on_rq;
2256 ncsw = 0; 2260 ncsw = 0;
2257 if (!match_state || p->state == match_state) 2261 if (!match_state || p->state == match_state)
2258 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 2262 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2259 task_rq_unlock(rq, &flags); 2263 task_rq_unlock(rq, p, &flags);
2260 2264
2261 /* 2265 /*
2262 * If it changed from the expected state, bail out now. 2266 * If it changed from the expected state, bail out now.
@@ -2331,7 +2335,7 @@ EXPORT_SYMBOL_GPL(kick_process);
2331 2335
2332#ifdef CONFIG_SMP 2336#ifdef CONFIG_SMP
2333/* 2337/*
2334 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. 2338 * ->cpus_allowed is protected by both rq->lock and p->pi_lock
2335 */ 2339 */
2336static int select_fallback_rq(int cpu, struct task_struct *p) 2340static int select_fallback_rq(int cpu, struct task_struct *p)
2337{ 2341{
@@ -2364,12 +2368,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2364} 2368}
2365 2369
2366/* 2370/*
2367 * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. 2371 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
2368 */ 2372 */
2369static inline 2373static inline
2370int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) 2374int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2371{ 2375{
2372 int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); 2376 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2373 2377
2374 /* 2378 /*
2375 * In order not to call set_task_cpu() on a blocking task we need 2379 * In order not to call set_task_cpu() on a blocking task we need
@@ -2395,27 +2399,60 @@ static void update_avg(u64 *avg, u64 sample)
2395} 2399}
2396#endif 2400#endif
2397 2401
2398static inline void ttwu_activate(struct task_struct *p, struct rq *rq, 2402static void
2399 bool is_sync, bool is_migrate, bool is_local, 2403ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
2400 unsigned long en_flags)
2401{ 2404{
2405#ifdef CONFIG_SCHEDSTATS
2406 struct rq *rq = this_rq();
2407
2408#ifdef CONFIG_SMP
2409 int this_cpu = smp_processor_id();
2410
2411 if (cpu == this_cpu) {
2412 schedstat_inc(rq, ttwu_local);
2413 schedstat_inc(p, se.statistics.nr_wakeups_local);
2414 } else {
2415 struct sched_domain *sd;
2416
2417 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2418 for_each_domain(this_cpu, sd) {
2419 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2420 schedstat_inc(sd, ttwu_wake_remote);
2421 break;
2422 }
2423 }
2424 }
2425#endif /* CONFIG_SMP */
2426
2427 schedstat_inc(rq, ttwu_count);
2402 schedstat_inc(p, se.statistics.nr_wakeups); 2428 schedstat_inc(p, se.statistics.nr_wakeups);
2403 if (is_sync) 2429
2430 if (wake_flags & WF_SYNC)
2404 schedstat_inc(p, se.statistics.nr_wakeups_sync); 2431 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2405 if (is_migrate) 2432
2433 if (cpu != task_cpu(p))
2406 schedstat_inc(p, se.statistics.nr_wakeups_migrate); 2434 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2407 if (is_local)
2408 schedstat_inc(p, se.statistics.nr_wakeups_local);
2409 else
2410 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2411 2435
2436#endif /* CONFIG_SCHEDSTATS */
2437}
2438
2439static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
2440{
2412 activate_task(rq, p, en_flags); 2441 activate_task(rq, p, en_flags);
2442 p->on_rq = 1;
2443
2444 /* if a worker is waking up, notify workqueue */
2445 if (p->flags & PF_WQ_WORKER)
2446 wq_worker_waking_up(p, cpu_of(rq));
2413} 2447}
2414 2448
2415static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, 2449/*
2416 int wake_flags, bool success) 2450 * Mark the task runnable and perform wakeup-preemption.
2451 */
2452static void
2453ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
2417{ 2454{
2418 trace_sched_wakeup(p, success); 2455 trace_sched_wakeup(p, true);
2419 check_preempt_curr(rq, p, wake_flags); 2456 check_preempt_curr(rq, p, wake_flags);
2420 2457
2421 p->state = TASK_RUNNING; 2458 p->state = TASK_RUNNING;
@@ -2434,9 +2471,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2434 rq->idle_stamp = 0; 2471 rq->idle_stamp = 0;
2435 } 2472 }
2436#endif 2473#endif
2437 /* if a worker is waking up, notify workqueue */ 2474}
2438 if ((p->flags & PF_WQ_WORKER) && success) 2475
2439 wq_worker_waking_up(p, cpu_of(rq)); 2476static void
2477ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
2478{
2479#ifdef CONFIG_SMP
2480 if (p->sched_contributes_to_load)
2481 rq->nr_uninterruptible--;
2482#endif
2483
2484 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
2485 ttwu_do_wakeup(rq, p, wake_flags);
2486}
2487
2488/*
2489 * Called in case the task @p isn't fully descheduled from its runqueue,
2490 * in this case we must do a remote wakeup. Its a 'light' wakeup though,
2491 * since all we need to do is flip p->state to TASK_RUNNING, since
2492 * the task is still ->on_rq.
2493 */
2494static int ttwu_remote(struct task_struct *p, int wake_flags)
2495{
2496 struct rq *rq;
2497 int ret = 0;
2498
2499 rq = __task_rq_lock(p);
2500 if (p->on_rq) {
2501 ttwu_do_wakeup(rq, p, wake_flags);
2502 ret = 1;
2503 }
2504 __task_rq_unlock(rq);
2505
2506 return ret;
2507}
2508
2509#ifdef CONFIG_SMP
2510static void sched_ttwu_pending(void)
2511{
2512 struct rq *rq = this_rq();
2513 struct task_struct *list = xchg(&rq->wake_list, NULL);
2514
2515 if (!list)
2516 return;
2517
2518 raw_spin_lock(&rq->lock);
2519
2520 while (list) {
2521 struct task_struct *p = list;
2522 list = list->wake_entry;
2523 ttwu_do_activate(rq, p, 0);
2524 }
2525
2526 raw_spin_unlock(&rq->lock);
2527}
2528
2529void scheduler_ipi(void)
2530{
2531 sched_ttwu_pending();
2532}
2533
2534static void ttwu_queue_remote(struct task_struct *p, int cpu)
2535{
2536 struct rq *rq = cpu_rq(cpu);
2537 struct task_struct *next = rq->wake_list;
2538
2539 for (;;) {
2540 struct task_struct *old = next;
2541
2542 p->wake_entry = next;
2543 next = cmpxchg(&rq->wake_list, old, p);
2544 if (next == old)
2545 break;
2546 }
2547
2548 if (!next)
2549 smp_send_reschedule(cpu);
2550}
2551#endif
2552
2553static void ttwu_queue(struct task_struct *p, int cpu)
2554{
2555 struct rq *rq = cpu_rq(cpu);
2556
2557#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
2558 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2559 ttwu_queue_remote(p, cpu);
2560 return;
2561 }
2562#endif
2563
2564 raw_spin_lock(&rq->lock);
2565 ttwu_do_activate(rq, p, 0);
2566 raw_spin_unlock(&rq->lock);
2440} 2567}
2441 2568
2442/** 2569/**
@@ -2454,92 +2581,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2454 * Returns %true if @p was woken up, %false if it was already running 2581 * Returns %true if @p was woken up, %false if it was already running
2455 * or @state didn't match @p's state. 2582 * or @state didn't match @p's state.
2456 */ 2583 */
2457static int try_to_wake_up(struct task_struct *p, unsigned int state, 2584static int
2458 int wake_flags) 2585try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2459{ 2586{
2460 int cpu, orig_cpu, this_cpu, success = 0;
2461 unsigned long flags; 2587 unsigned long flags;
2462 unsigned long en_flags = ENQUEUE_WAKEUP; 2588 int cpu, success = 0;
2463 struct rq *rq;
2464
2465 this_cpu = get_cpu();
2466 2589
2467 smp_wmb(); 2590 smp_wmb();
2468 rq = task_rq_lock(p, &flags); 2591 raw_spin_lock_irqsave(&p->pi_lock, flags);
2469 if (!(p->state & state)) 2592 if (!(p->state & state))
2470 goto out; 2593 goto out;
2471 2594
2472 if (p->se.on_rq) 2595 success = 1; /* we're going to change ->state */
2473 goto out_running;
2474
2475 cpu = task_cpu(p); 2596 cpu = task_cpu(p);
2476 orig_cpu = cpu;
2477 2597
2478#ifdef CONFIG_SMP 2598 if (p->on_rq && ttwu_remote(p, wake_flags))
2479 if (unlikely(task_running(rq, p))) 2599 goto stat;
2480 goto out_activate;
2481 2600
2601#ifdef CONFIG_SMP
2482 /* 2602 /*
2483 * In order to handle concurrent wakeups and release the rq->lock 2603 * If the owning (remote) cpu is still in the middle of schedule() with
2484 * we put the task in TASK_WAKING state. 2604 * this task as prev, wait until its done referencing the task.
2485 *
2486 * First fix up the nr_uninterruptible count:
2487 */ 2605 */
2488 if (task_contributes_to_load(p)) { 2606 while (p->on_cpu) {
2489 if (likely(cpu_online(orig_cpu))) 2607#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2490 rq->nr_uninterruptible--; 2608 /*
2491 else 2609 * If called from interrupt context we could have landed in the
2492 this_rq()->nr_uninterruptible--; 2610 * middle of schedule(), in this case we should take care not
2493 } 2611 * to spin on ->on_cpu if p is current, since that would
2494 p->state = TASK_WAKING; 2612 * deadlock.
2495 2613 */
2496 if (p->sched_class->task_waking) { 2614 if (p == current) {
2497 p->sched_class->task_waking(rq, p); 2615 ttwu_queue(p, cpu);
2498 en_flags |= ENQUEUE_WAKING; 2616 goto stat;
2617 }
2618#endif
2619 cpu_relax();
2499 } 2620 }
2500
2501 cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
2502 if (cpu != orig_cpu)
2503 set_task_cpu(p, cpu);
2504 __task_rq_unlock(rq);
2505
2506 rq = cpu_rq(cpu);
2507 raw_spin_lock(&rq->lock);
2508
2509 /* 2621 /*
2510 * We migrated the task without holding either rq->lock, however 2622 * Pairs with the smp_wmb() in finish_lock_switch().
2511 * since the task is not on the task list itself, nobody else
2512 * will try and migrate the task, hence the rq should match the
2513 * cpu we just moved it to.
2514 */ 2623 */
2515 WARN_ON(task_cpu(p) != cpu); 2624 smp_rmb();
2516 WARN_ON(p->state != TASK_WAKING);
2517 2625
2518#ifdef CONFIG_SCHEDSTATS 2626 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2519 schedstat_inc(rq, ttwu_count); 2627 p->state = TASK_WAKING;
2520 if (cpu == this_cpu)
2521 schedstat_inc(rq, ttwu_local);
2522 else {
2523 struct sched_domain *sd;
2524 for_each_domain(this_cpu, sd) {
2525 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2526 schedstat_inc(sd, ttwu_wake_remote);
2527 break;
2528 }
2529 }
2530 }
2531#endif /* CONFIG_SCHEDSTATS */
2532 2628
2533out_activate: 2629 if (p->sched_class->task_waking)
2630 p->sched_class->task_waking(p);
2631
2632 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2633 if (task_cpu(p) != cpu)
2634 set_task_cpu(p, cpu);
2534#endif /* CONFIG_SMP */ 2635#endif /* CONFIG_SMP */
2535 ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, 2636
2536 cpu == this_cpu, en_flags); 2637 ttwu_queue(p, cpu);
2537 success = 1; 2638stat:
2538out_running: 2639 ttwu_stat(p, cpu, wake_flags);
2539 ttwu_post_activation(p, rq, wake_flags, success);
2540out: 2640out:
2541 task_rq_unlock(rq, &flags); 2641 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2542 put_cpu();
2543 2642
2544 return success; 2643 return success;
2545} 2644}
@@ -2548,31 +2647,34 @@ out:
2548 * try_to_wake_up_local - try to wake up a local task with rq lock held 2647 * try_to_wake_up_local - try to wake up a local task with rq lock held
2549 * @p: the thread to be awakened 2648 * @p: the thread to be awakened
2550 * 2649 *
2551 * Put @p on the run-queue if it's not already there. The caller must 2650 * Put @p on the run-queue if it's not already there. The caller must
2552 * ensure that this_rq() is locked, @p is bound to this_rq() and not 2651 * ensure that this_rq() is locked, @p is bound to this_rq() and not
2553 * the current task. this_rq() stays locked over invocation. 2652 * the current task.
2554 */ 2653 */
2555static void try_to_wake_up_local(struct task_struct *p) 2654static void try_to_wake_up_local(struct task_struct *p)
2556{ 2655{
2557 struct rq *rq = task_rq(p); 2656 struct rq *rq = task_rq(p);
2558 bool success = false;
2559 2657
2560 BUG_ON(rq != this_rq()); 2658 BUG_ON(rq != this_rq());
2561 BUG_ON(p == current); 2659 BUG_ON(p == current);
2562 lockdep_assert_held(&rq->lock); 2660 lockdep_assert_held(&rq->lock);
2563 2661
2662 if (!raw_spin_trylock(&p->pi_lock)) {
2663 raw_spin_unlock(&rq->lock);
2664 raw_spin_lock(&p->pi_lock);
2665 raw_spin_lock(&rq->lock);
2666 }
2667
2564 if (!(p->state & TASK_NORMAL)) 2668 if (!(p->state & TASK_NORMAL))
2565 return; 2669 goto out;
2566 2670
2567 if (!p->se.on_rq) { 2671 if (!p->on_rq)
2568 if (likely(!task_running(rq, p))) { 2672 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2569 schedstat_inc(rq, ttwu_count); 2673
2570 schedstat_inc(rq, ttwu_local); 2674 ttwu_do_wakeup(rq, p, 0);
2571 } 2675 ttwu_stat(p, smp_processor_id(), 0);
2572 ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); 2676out:
2573 success = true; 2677 raw_spin_unlock(&p->pi_lock);
2574 }
2575 ttwu_post_activation(p, rq, 0, success);
2576} 2678}
2577 2679
2578/** 2680/**
@@ -2605,19 +2707,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
2605 */ 2707 */
2606static void __sched_fork(struct task_struct *p) 2708static void __sched_fork(struct task_struct *p)
2607{ 2709{
2710 p->on_rq = 0;
2711
2712 p->se.on_rq = 0;
2608 p->se.exec_start = 0; 2713 p->se.exec_start = 0;
2609 p->se.sum_exec_runtime = 0; 2714 p->se.sum_exec_runtime = 0;
2610 p->se.prev_sum_exec_runtime = 0; 2715 p->se.prev_sum_exec_runtime = 0;
2611 p->se.nr_migrations = 0; 2716 p->se.nr_migrations = 0;
2612 p->se.vruntime = 0; 2717 p->se.vruntime = 0;
2718 INIT_LIST_HEAD(&p->se.group_node);
2613 2719
2614#ifdef CONFIG_SCHEDSTATS 2720#ifdef CONFIG_SCHEDSTATS
2615 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2721 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2616#endif 2722#endif
2617 2723
2618 INIT_LIST_HEAD(&p->rt.run_list); 2724 INIT_LIST_HEAD(&p->rt.run_list);
2619 p->se.on_rq = 0;
2620 INIT_LIST_HEAD(&p->se.group_node);
2621 2725
2622#ifdef CONFIG_PREEMPT_NOTIFIERS 2726#ifdef CONFIG_PREEMPT_NOTIFIERS
2623 INIT_HLIST_HEAD(&p->preempt_notifiers); 2727 INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2629,6 +2733,7 @@ static void __sched_fork(struct task_struct *p)
2629 */ 2733 */
2630void sched_fork(struct task_struct *p, int clone_flags) 2734void sched_fork(struct task_struct *p, int clone_flags)
2631{ 2735{
2736 unsigned long flags;
2632 int cpu = get_cpu(); 2737 int cpu = get_cpu();
2633 2738
2634 __sched_fork(p); 2739 __sched_fork(p);
@@ -2679,16 +2784,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2679 * 2784 *
2680 * Silence PROVE_RCU. 2785 * Silence PROVE_RCU.
2681 */ 2786 */
2682 rcu_read_lock(); 2787 raw_spin_lock_irqsave(&p->pi_lock, flags);
2683 set_task_cpu(p, cpu); 2788 set_task_cpu(p, cpu);
2684 rcu_read_unlock(); 2789 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2685 2790
2686#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2791#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2687 if (likely(sched_info_on())) 2792 if (likely(sched_info_on()))
2688 memset(&p->sched_info, 0, sizeof(p->sched_info)); 2793 memset(&p->sched_info, 0, sizeof(p->sched_info));
2689#endif 2794#endif
2690#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 2795#if defined(CONFIG_SMP)
2691 p->oncpu = 0; 2796 p->on_cpu = 0;
2692#endif 2797#endif
2693#ifdef CONFIG_PREEMPT 2798#ifdef CONFIG_PREEMPT
2694 /* Want to start with kernel preemption disabled. */ 2799 /* Want to start with kernel preemption disabled. */
@@ -2712,37 +2817,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2712{ 2817{
2713 unsigned long flags; 2818 unsigned long flags;
2714 struct rq *rq; 2819 struct rq *rq;
2715 int cpu __maybe_unused = get_cpu();
2716 2820
2821 raw_spin_lock_irqsave(&p->pi_lock, flags);
2717#ifdef CONFIG_SMP 2822#ifdef CONFIG_SMP
2718 rq = task_rq_lock(p, &flags);
2719 p->state = TASK_WAKING;
2720
2721 /* 2823 /*
2722 * Fork balancing, do it here and not earlier because: 2824 * Fork balancing, do it here and not earlier because:
2723 * - cpus_allowed can change in the fork path 2825 * - cpus_allowed can change in the fork path
2724 * - any previously selected cpu might disappear through hotplug 2826 * - any previously selected cpu might disappear through hotplug
2725 *
2726 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
2727 * without people poking at ->cpus_allowed.
2728 */ 2827 */
2729 cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); 2828 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
2730 set_task_cpu(p, cpu);
2731
2732 p->state = TASK_RUNNING;
2733 task_rq_unlock(rq, &flags);
2734#endif 2829#endif
2735 2830
2736 rq = task_rq_lock(p, &flags); 2831 rq = __task_rq_lock(p);
2737 activate_task(rq, p, 0); 2832 activate_task(rq, p, 0);
2738 trace_sched_wakeup_new(p, 1); 2833 p->on_rq = 1;
2834 trace_sched_wakeup_new(p, true);
2739 check_preempt_curr(rq, p, WF_FORK); 2835 check_preempt_curr(rq, p, WF_FORK);
2740#ifdef CONFIG_SMP 2836#ifdef CONFIG_SMP
2741 if (p->sched_class->task_woken) 2837 if (p->sched_class->task_woken)
2742 p->sched_class->task_woken(rq, p); 2838 p->sched_class->task_woken(rq, p);
2743#endif 2839#endif
2744 task_rq_unlock(rq, &flags); 2840 task_rq_unlock(rq, p, &flags);
2745 put_cpu();
2746} 2841}
2747 2842
2748#ifdef CONFIG_PREEMPT_NOTIFIERS 2843#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3451,27 +3546,22 @@ void sched_exec(void)
3451{ 3546{
3452 struct task_struct *p = current; 3547 struct task_struct *p = current;
3453 unsigned long flags; 3548 unsigned long flags;
3454 struct rq *rq;
3455 int dest_cpu; 3549 int dest_cpu;
3456 3550
3457 rq = task_rq_lock(p, &flags); 3551 raw_spin_lock_irqsave(&p->pi_lock, flags);
3458 dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); 3552 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
3459 if (dest_cpu == smp_processor_id()) 3553 if (dest_cpu == smp_processor_id())
3460 goto unlock; 3554 goto unlock;
3461 3555
3462 /* 3556 if (likely(cpu_active(dest_cpu))) {
3463 * select_task_rq() can race against ->cpus_allowed
3464 */
3465 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
3466 likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
3467 struct migration_arg arg = { p, dest_cpu }; 3557 struct migration_arg arg = { p, dest_cpu };
3468 3558
3469 task_rq_unlock(rq, &flags); 3559 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3470 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 3560 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
3471 return; 3561 return;
3472 } 3562 }
3473unlock: 3563unlock:
3474 task_rq_unlock(rq, &flags); 3564 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3475} 3565}
3476 3566
3477#endif 3567#endif
@@ -3508,7 +3598,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
3508 3598
3509 rq = task_rq_lock(p, &flags); 3599 rq = task_rq_lock(p, &flags);
3510 ns = do_task_delta_exec(p, rq); 3600 ns = do_task_delta_exec(p, rq);
3511 task_rq_unlock(rq, &flags); 3601 task_rq_unlock(rq, p, &flags);
3512 3602
3513 return ns; 3603 return ns;
3514} 3604}
@@ -3526,7 +3616,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3526 3616
3527 rq = task_rq_lock(p, &flags); 3617 rq = task_rq_lock(p, &flags);
3528 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 3618 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
3529 task_rq_unlock(rq, &flags); 3619 task_rq_unlock(rq, p, &flags);
3530 3620
3531 return ns; 3621 return ns;
3532} 3622}
@@ -3550,7 +3640,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
3550 rq = task_rq_lock(p, &flags); 3640 rq = task_rq_lock(p, &flags);
3551 thread_group_cputime(p, &totals); 3641 thread_group_cputime(p, &totals);
3552 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); 3642 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
3553 task_rq_unlock(rq, &flags); 3643 task_rq_unlock(rq, p, &flags);
3554 3644
3555 return ns; 3645 return ns;
3556} 3646}
@@ -4036,7 +4126,7 @@ static inline void schedule_debug(struct task_struct *prev)
4036 4126
4037static void put_prev_task(struct rq *rq, struct task_struct *prev) 4127static void put_prev_task(struct rq *rq, struct task_struct *prev)
4038{ 4128{
4039 if (prev->se.on_rq) 4129 if (prev->on_rq)
4040 update_rq_clock(rq); 4130 update_rq_clock(rq);
4041 prev->sched_class->put_prev_task(rq, prev); 4131 prev->sched_class->put_prev_task(rq, prev);
4042} 4132}
@@ -4098,11 +4188,13 @@ need_resched:
4098 if (unlikely(signal_pending_state(prev->state, prev))) { 4188 if (unlikely(signal_pending_state(prev->state, prev))) {
4099 prev->state = TASK_RUNNING; 4189 prev->state = TASK_RUNNING;
4100 } else { 4190 } else {
4191 deactivate_task(rq, prev, DEQUEUE_SLEEP);
4192 prev->on_rq = 0;
4193
4101 /* 4194 /*
4102 * If a worker is going to sleep, notify and 4195 * If a worker went to sleep, notify and ask workqueue
4103 * ask workqueue whether it wants to wake up a 4196 * whether it wants to wake up a task to maintain
4104 * task to maintain concurrency. If so, wake 4197 * concurrency.
4105 * up the task.
4106 */ 4198 */
4107 if (prev->flags & PF_WQ_WORKER) { 4199 if (prev->flags & PF_WQ_WORKER) {
4108 struct task_struct *to_wakeup; 4200 struct task_struct *to_wakeup;
@@ -4111,21 +4203,20 @@ need_resched:
4111 if (to_wakeup) 4203 if (to_wakeup)
4112 try_to_wake_up_local(to_wakeup); 4204 try_to_wake_up_local(to_wakeup);
4113 } 4205 }
4114 deactivate_task(rq, prev, DEQUEUE_SLEEP); 4206
4207 /*
4208 * If we are going to sleep and we have plugged IO
4209 * queued, make sure to submit it to avoid deadlocks.
4210 */
4211 if (blk_needs_flush_plug(prev)) {
4212 raw_spin_unlock(&rq->lock);
4213 blk_flush_plug(prev);
4214 raw_spin_lock(&rq->lock);
4215 }
4115 } 4216 }
4116 switch_count = &prev->nvcsw; 4217 switch_count = &prev->nvcsw;
4117 } 4218 }
4118 4219
4119 /*
4120 * If we are going to sleep and we have plugged IO queued, make
4121 * sure to submit it to avoid deadlocks.
4122 */
4123 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
4124 raw_spin_unlock(&rq->lock);
4125 blk_flush_plug(prev);
4126 raw_spin_lock(&rq->lock);
4127 }
4128
4129 pre_schedule(rq, prev); 4220 pre_schedule(rq, prev);
4130 4221
4131 if (unlikely(!rq->nr_running)) 4222 if (unlikely(!rq->nr_running))
@@ -4162,70 +4253,53 @@ need_resched:
4162EXPORT_SYMBOL(schedule); 4253EXPORT_SYMBOL(schedule);
4163 4254
4164#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4255#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4165/*
4166 * Look out! "owner" is an entirely speculative pointer
4167 * access and not reliable.
4168 */
4169int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4170{
4171 unsigned int cpu;
4172 struct rq *rq;
4173 4256
4174 if (!sched_feat(OWNER_SPIN)) 4257static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4175 return 0; 4258{
4259 bool ret = false;
4176 4260
4177#ifdef CONFIG_DEBUG_PAGEALLOC 4261 rcu_read_lock();
4178 /* 4262 if (lock->owner != owner)
4179 * Need to access the cpu field knowing that 4263 goto fail;
4180 * DEBUG_PAGEALLOC could have unmapped it if
4181 * the mutex owner just released it and exited.
4182 */
4183 if (probe_kernel_address(&owner->cpu, cpu))
4184 return 0;
4185#else
4186 cpu = owner->cpu;
4187#endif
4188 4264
4189 /* 4265 /*
4190 * Even if the access succeeded (likely case), 4266 * Ensure we emit the owner->on_cpu, dereference _after_ checking
4191 * the cpu field may no longer be valid. 4267 * lock->owner still matches owner, if that fails, owner might
4268 * point to free()d memory, if it still matches, the rcu_read_lock()
4269 * ensures the memory stays valid.
4192 */ 4270 */
4193 if (cpu >= nr_cpumask_bits) 4271 barrier();
4194 return 0;
4195 4272
4196 /* 4273 ret = owner->on_cpu;
4197 * We need to validate that we can do a 4274fail:
4198 * get_cpu() and that we have the percpu area. 4275 rcu_read_unlock();
4199 */
4200 if (!cpu_online(cpu))
4201 return 0;
4202 4276
4203 rq = cpu_rq(cpu); 4277 return ret;
4278}
4204 4279
4205 for (;;) { 4280/*
4206 /* 4281 * Look out! "owner" is an entirely speculative pointer
4207 * Owner changed, break to re-assess state. 4282 * access and not reliable.
4208 */ 4283 */
4209 if (lock->owner != owner) { 4284int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
4210 /* 4285{
4211 * If the lock has switched to a different owner, 4286 if (!sched_feat(OWNER_SPIN))
4212 * we likely have heavy contention. Return 0 to quit 4287 return 0;
4213 * optimistic spinning and not contend further:
4214 */
4215 if (lock->owner)
4216 return 0;
4217 break;
4218 }
4219 4288
4220 /* 4289 while (owner_running(lock, owner)) {
4221 * Is that owner really running on that cpu? 4290 if (need_resched())
4222 */
4223 if (task_thread_info(rq->curr) != owner || need_resched())
4224 return 0; 4291 return 0;
4225 4292
4226 arch_mutex_cpu_relax(); 4293 arch_mutex_cpu_relax();
4227 } 4294 }
4228 4295
4296 /*
4297 * If the owner changed to another task there is likely
4298 * heavy contention, stop spinning.
4299 */
4300 if (lock->owner)
4301 return 0;
4302
4229 return 1; 4303 return 1;
4230} 4304}
4231#endif 4305#endif
@@ -4685,19 +4759,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
4685 */ 4759 */
4686void rt_mutex_setprio(struct task_struct *p, int prio) 4760void rt_mutex_setprio(struct task_struct *p, int prio)
4687{ 4761{
4688 unsigned long flags;
4689 int oldprio, on_rq, running; 4762 int oldprio, on_rq, running;
4690 struct rq *rq; 4763 struct rq *rq;
4691 const struct sched_class *prev_class; 4764 const struct sched_class *prev_class;
4692 4765
4693 BUG_ON(prio < 0 || prio > MAX_PRIO); 4766 BUG_ON(prio < 0 || prio > MAX_PRIO);
4694 4767
4695 rq = task_rq_lock(p, &flags); 4768 rq = __task_rq_lock(p);
4696 4769
4697 trace_sched_pi_setprio(p, prio); 4770 trace_sched_pi_setprio(p, prio);
4698 oldprio = p->prio; 4771 oldprio = p->prio;
4699 prev_class = p->sched_class; 4772 prev_class = p->sched_class;
4700 on_rq = p->se.on_rq; 4773 on_rq = p->on_rq;
4701 running = task_current(rq, p); 4774 running = task_current(rq, p);
4702 if (on_rq) 4775 if (on_rq)
4703 dequeue_task(rq, p, 0); 4776 dequeue_task(rq, p, 0);
@@ -4717,7 +4790,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4717 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4790 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4718 4791
4719 check_class_changed(rq, p, prev_class, oldprio); 4792 check_class_changed(rq, p, prev_class, oldprio);
4720 task_rq_unlock(rq, &flags); 4793 __task_rq_unlock(rq);
4721} 4794}
4722 4795
4723#endif 4796#endif
@@ -4745,7 +4818,7 @@ void set_user_nice(struct task_struct *p, long nice)
4745 p->static_prio = NICE_TO_PRIO(nice); 4818 p->static_prio = NICE_TO_PRIO(nice);
4746 goto out_unlock; 4819 goto out_unlock;
4747 } 4820 }
4748 on_rq = p->se.on_rq; 4821 on_rq = p->on_rq;
4749 if (on_rq) 4822 if (on_rq)
4750 dequeue_task(rq, p, 0); 4823 dequeue_task(rq, p, 0);
4751 4824
@@ -4765,7 +4838,7 @@ void set_user_nice(struct task_struct *p, long nice)
4765 resched_task(rq->curr); 4838 resched_task(rq->curr);
4766 } 4839 }
4767out_unlock: 4840out_unlock:
4768 task_rq_unlock(rq, &flags); 4841 task_rq_unlock(rq, p, &flags);
4769} 4842}
4770EXPORT_SYMBOL(set_user_nice); 4843EXPORT_SYMBOL(set_user_nice);
4771 4844
@@ -4879,8 +4952,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
4879static void 4952static void
4880__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) 4953__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4881{ 4954{
4882 BUG_ON(p->se.on_rq);
4883
4884 p->policy = policy; 4955 p->policy = policy;
4885 p->rt_priority = prio; 4956 p->rt_priority = prio;
4886 p->normal_prio = normal_prio(p); 4957 p->normal_prio = normal_prio(p);
@@ -4995,20 +5066,17 @@ recheck:
4995 /* 5066 /*
4996 * make sure no PI-waiters arrive (or leave) while we are 5067 * make sure no PI-waiters arrive (or leave) while we are
4997 * changing the priority of the task: 5068 * changing the priority of the task:
4998 */ 5069 *
4999 raw_spin_lock_irqsave(&p->pi_lock, flags);
5000 /*
5001 * To be able to change p->policy safely, the appropriate 5070 * To be able to change p->policy safely, the appropriate
5002 * runqueue lock must be held. 5071 * runqueue lock must be held.
5003 */ 5072 */
5004 rq = __task_rq_lock(p); 5073 rq = task_rq_lock(p, &flags);
5005 5074
5006 /* 5075 /*
5007 * Changing the policy of the stop threads its a very bad idea 5076 * Changing the policy of the stop threads its a very bad idea
5008 */ 5077 */
5009 if (p == rq->stop) { 5078 if (p == rq->stop) {
5010 __task_rq_unlock(rq); 5079 task_rq_unlock(rq, p, &flags);
5011 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5012 return -EINVAL; 5080 return -EINVAL;
5013 } 5081 }
5014 5082
@@ -5032,8 +5100,7 @@ recheck:
5032 if (rt_bandwidth_enabled() && rt_policy(policy) && 5100 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5033 task_group(p)->rt_bandwidth.rt_runtime == 0 && 5101 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5034 !task_group_is_autogroup(task_group(p))) { 5102 !task_group_is_autogroup(task_group(p))) {
5035 __task_rq_unlock(rq); 5103 task_rq_unlock(rq, p, &flags);
5036 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5037 return -EPERM; 5104 return -EPERM;
5038 } 5105 }
5039 } 5106 }
@@ -5042,11 +5109,10 @@ recheck:
5042 /* recheck policy now with rq lock held */ 5109 /* recheck policy now with rq lock held */
5043 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 5110 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
5044 policy = oldpolicy = -1; 5111 policy = oldpolicy = -1;
5045 __task_rq_unlock(rq); 5112 task_rq_unlock(rq, p, &flags);
5046 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5047 goto recheck; 5113 goto recheck;
5048 } 5114 }
5049 on_rq = p->se.on_rq; 5115 on_rq = p->on_rq;
5050 running = task_current(rq, p); 5116 running = task_current(rq, p);
5051 if (on_rq) 5117 if (on_rq)
5052 deactivate_task(rq, p, 0); 5118 deactivate_task(rq, p, 0);
@@ -5065,8 +5131,7 @@ recheck:
5065 activate_task(rq, p, 0); 5131 activate_task(rq, p, 0);
5066 5132
5067 check_class_changed(rq, p, prev_class, oldprio); 5133 check_class_changed(rq, p, prev_class, oldprio);
5068 __task_rq_unlock(rq); 5134 task_rq_unlock(rq, p, &flags);
5069 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5070 5135
5071 rt_mutex_adjust_pi(p); 5136 rt_mutex_adjust_pi(p);
5072 5137
@@ -5317,7 +5382,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5317{ 5382{
5318 struct task_struct *p; 5383 struct task_struct *p;
5319 unsigned long flags; 5384 unsigned long flags;
5320 struct rq *rq;
5321 int retval; 5385 int retval;
5322 5386
5323 get_online_cpus(); 5387 get_online_cpus();
@@ -5332,9 +5396,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5332 if (retval) 5396 if (retval)
5333 goto out_unlock; 5397 goto out_unlock;
5334 5398
5335 rq = task_rq_lock(p, &flags); 5399 raw_spin_lock_irqsave(&p->pi_lock, flags);
5336 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 5400 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5337 task_rq_unlock(rq, &flags); 5401 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5338 5402
5339out_unlock: 5403out_unlock:
5340 rcu_read_unlock(); 5404 rcu_read_unlock();
@@ -5659,7 +5723,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5659 5723
5660 rq = task_rq_lock(p, &flags); 5724 rq = task_rq_lock(p, &flags);
5661 time_slice = p->sched_class->get_rr_interval(rq, p); 5725 time_slice = p->sched_class->get_rr_interval(rq, p);
5662 task_rq_unlock(rq, &flags); 5726 task_rq_unlock(rq, p, &flags);
5663 5727
5664 rcu_read_unlock(); 5728 rcu_read_unlock();
5665 jiffies_to_timespec(time_slice, &t); 5729 jiffies_to_timespec(time_slice, &t);
@@ -5777,8 +5841,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5777 rcu_read_unlock(); 5841 rcu_read_unlock();
5778 5842
5779 rq->curr = rq->idle = idle; 5843 rq->curr = rq->idle = idle;
5780#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5844#if defined(CONFIG_SMP)
5781 idle->oncpu = 1; 5845 idle->on_cpu = 1;
5782#endif 5846#endif
5783 raw_spin_unlock_irqrestore(&rq->lock, flags); 5847 raw_spin_unlock_irqrestore(&rq->lock, flags);
5784 5848
@@ -5882,18 +5946,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5882 unsigned int dest_cpu; 5946 unsigned int dest_cpu;
5883 int ret = 0; 5947 int ret = 0;
5884 5948
5885 /*
5886 * Serialize against TASK_WAKING so that ttwu() and wunt() can
5887 * drop the rq->lock and still rely on ->cpus_allowed.
5888 */
5889again:
5890 while (task_is_waking(p))
5891 cpu_relax();
5892 rq = task_rq_lock(p, &flags); 5949 rq = task_rq_lock(p, &flags);
5893 if (task_is_waking(p)) {
5894 task_rq_unlock(rq, &flags);
5895 goto again;
5896 }
5897 5950
5898 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 5951 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5899 ret = -EINVAL; 5952 ret = -EINVAL;
@@ -5918,16 +5971,16 @@ again:
5918 goto out; 5971 goto out;
5919 5972
5920 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); 5973 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5921 if (migrate_task(p, rq)) { 5974 if (p->on_rq) {
5922 struct migration_arg arg = { p, dest_cpu }; 5975 struct migration_arg arg = { p, dest_cpu };
5923 /* Need help from migration thread: drop lock and wait. */ 5976 /* Need help from migration thread: drop lock and wait. */
5924 task_rq_unlock(rq, &flags); 5977 task_rq_unlock(rq, p, &flags);
5925 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 5978 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5926 tlb_migrate_finish(p->mm); 5979 tlb_migrate_finish(p->mm);
5927 return 0; 5980 return 0;
5928 } 5981 }
5929out: 5982out:
5930 task_rq_unlock(rq, &flags); 5983 task_rq_unlock(rq, p, &flags);
5931 5984
5932 return ret; 5985 return ret;
5933} 5986}
@@ -5955,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5955 rq_src = cpu_rq(src_cpu); 6008 rq_src = cpu_rq(src_cpu);
5956 rq_dest = cpu_rq(dest_cpu); 6009 rq_dest = cpu_rq(dest_cpu);
5957 6010
6011 raw_spin_lock(&p->pi_lock);
5958 double_rq_lock(rq_src, rq_dest); 6012 double_rq_lock(rq_src, rq_dest);
5959 /* Already moved. */ 6013 /* Already moved. */
5960 if (task_cpu(p) != src_cpu) 6014 if (task_cpu(p) != src_cpu)
@@ -5967,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5967 * If we're not on a rq, the next wake-up will ensure we're 6021 * If we're not on a rq, the next wake-up will ensure we're
5968 * placed properly. 6022 * placed properly.
5969 */ 6023 */
5970 if (p->se.on_rq) { 6024 if (p->on_rq) {
5971 deactivate_task(rq_src, p, 0); 6025 deactivate_task(rq_src, p, 0);
5972 set_task_cpu(p, dest_cpu); 6026 set_task_cpu(p, dest_cpu);
5973 activate_task(rq_dest, p, 0); 6027 activate_task(rq_dest, p, 0);
@@ -5977,6 +6031,7 @@ done:
5977 ret = 1; 6031 ret = 1;
5978fail: 6032fail:
5979 double_rq_unlock(rq_src, rq_dest); 6033 double_rq_unlock(rq_src, rq_dest);
6034 raw_spin_unlock(&p->pi_lock);
5980 return ret; 6035 return ret;
5981} 6036}
5982 6037
@@ -6317,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6317 6372
6318#ifdef CONFIG_HOTPLUG_CPU 6373#ifdef CONFIG_HOTPLUG_CPU
6319 case CPU_DYING: 6374 case CPU_DYING:
6375 sched_ttwu_pending();
6320 /* Update our root-domain */ 6376 /* Update our root-domain */
6321 raw_spin_lock_irqsave(&rq->lock, flags); 6377 raw_spin_lock_irqsave(&rq->lock, flags);
6322 if (rq->rd) { 6378 if (rq->rd) {
@@ -7961,7 +8017,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7961 int old_prio = p->prio; 8017 int old_prio = p->prio;
7962 int on_rq; 8018 int on_rq;
7963 8019
7964 on_rq = p->se.on_rq; 8020 on_rq = p->on_rq;
7965 if (on_rq) 8021 if (on_rq)
7966 deactivate_task(rq, p, 0); 8022 deactivate_task(rq, p, 0);
7967 __setscheduler(rq, p, SCHED_NORMAL, 0); 8023 __setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8304,7 +8360,7 @@ void sched_move_task(struct task_struct *tsk)
8304 rq = task_rq_lock(tsk, &flags); 8360 rq = task_rq_lock(tsk, &flags);
8305 8361
8306 running = task_current(rq, tsk); 8362 running = task_current(rq, tsk);
8307 on_rq = tsk->se.on_rq; 8363 on_rq = tsk->on_rq;
8308 8364
8309 if (on_rq) 8365 if (on_rq)
8310 dequeue_task(rq, tsk, 0); 8366 dequeue_task(rq, tsk, 0);
@@ -8323,7 +8379,7 @@ void sched_move_task(struct task_struct *tsk)
8323 if (on_rq) 8379 if (on_rq)
8324 enqueue_task(rq, tsk, 0); 8380 enqueue_task(rq, tsk, 0);
8325 8381
8326 task_rq_unlock(rq, &flags); 8382 task_rq_unlock(rq, tsk, &flags);
8327} 8383}
8328#endif /* CONFIG_CGROUP_SCHED */ 8384#endif /* CONFIG_CGROUP_SCHED */
8329 8385