aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-04-18 08:53:18 -0400
committerIngo Molnar <mingo@elte.hu>2011-04-18 08:53:33 -0400
commit6ddafdaab3f809b110ada253d2f2d4910ebd3ac5 (patch)
tree366bb7513511a05b6e11ab89bfe3b2dbd1d62a03 /kernel
parent3905c54f2bd2c6f937f87307987ca072eabc3e7b (diff)
parentbd8e7dded88a3e1c085c333f19ff31387616f71a (diff)
Merge branch 'sched/locking' into sched/core
Merge reason: the rq locking changes are stable, propagate them into the .40 queue. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/mutex-debug.c2
-rw-r--r--kernel/mutex-debug.h2
-rw-r--r--kernel/mutex.c2
-rw-r--r--kernel/mutex.h2
-rw-r--r--kernel/power/Kconfig6
-rw-r--r--kernel/sched.c650
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/sched_fair.c23
-rw-r--r--kernel/sched_features.h6
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_rt.c54
-rw-r--r--kernel/sched_stoptask.c5
12 files changed, 426 insertions, 330 deletions
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index ec815a960b5d..73da83aff418 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
75 return; 75 return;
76 76
77 DEBUG_LOCKS_WARN_ON(lock->magic != lock); 77 DEBUG_LOCKS_WARN_ON(lock->magic != lock);
78 DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); 78 DEBUG_LOCKS_WARN_ON(lock->owner != current);
79 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); 79 DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
80 mutex_clear_owner(lock); 80 mutex_clear_owner(lock);
81} 81}
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h
index 57d527a16f9d..0799fd3e4cfa 100644
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
29 29
30static inline void mutex_set_owner(struct mutex *lock) 30static inline void mutex_set_owner(struct mutex *lock)
31{ 31{
32 lock->owner = current_thread_info(); 32 lock->owner = current;
33} 33}
34 34
35static inline void mutex_clear_owner(struct mutex *lock) 35static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/mutex.c b/kernel/mutex.c
index c4195fa98900..fe4706cb0c5b 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
160 */ 160 */
161 161
162 for (;;) { 162 for (;;) {
163 struct thread_info *owner; 163 struct task_struct *owner;
164 164
165 /* 165 /*
166 * If we own the BKL, then don't spin. The owner of 166 * If we own the BKL, then don't spin. The owner of
diff --git a/kernel/mutex.h b/kernel/mutex.h
index 67578ca48f94..4115fbf83b12 100644
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -19,7 +19,7 @@
19#ifdef CONFIG_SMP 19#ifdef CONFIG_SMP
20static inline void mutex_set_owner(struct mutex *lock) 20static inline void mutex_set_owner(struct mutex *lock)
21{ 21{
22 lock->owner = current_thread_info(); 22 lock->owner = current;
23} 23}
24 24
25static inline void mutex_clear_owner(struct mutex *lock) 25static inline void mutex_clear_owner(struct mutex *lock)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 4603f08dc47b..6de9a8fc3417 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER
18 18
19 Turning OFF this setting is NOT recommended! If in doubt, say Y. 19 Turning OFF this setting is NOT recommended! If in doubt, say Y.
20 20
21config HIBERNATE_CALLBACKS
22 bool
23
21config HIBERNATION 24config HIBERNATION
22 bool "Hibernation (aka 'suspend to disk')" 25 bool "Hibernation (aka 'suspend to disk')"
23 depends on SWAP && ARCH_HIBERNATION_POSSIBLE 26 depends on SWAP && ARCH_HIBERNATION_POSSIBLE
27 select HIBERNATE_CALLBACKS
24 select LZO_COMPRESS 28 select LZO_COMPRESS
25 select LZO_DECOMPRESS 29 select LZO_DECOMPRESS
26 ---help--- 30 ---help---
@@ -85,7 +89,7 @@ config PM_STD_PARTITION
85 89
86config PM_SLEEP 90config PM_SLEEP
87 def_bool y 91 def_bool y
88 depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE 92 depends on SUSPEND || HIBERNATE_CALLBACKS
89 93
90config PM_SLEEP_SMP 94config PM_SLEEP_SMP
91 def_bool y 95 def_bool y
diff --git a/kernel/sched.c b/kernel/sched.c
index 506cb8147c70..0cfe0310ed5d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -312,6 +312,9 @@ struct cfs_rq {
312 312
313 u64 exec_clock; 313 u64 exec_clock;
314 u64 min_vruntime; 314 u64 min_vruntime;
315#ifndef CONFIG_64BIT
316 u64 min_vruntime_copy;
317#endif
315 318
316 struct rb_root tasks_timeline; 319 struct rb_root tasks_timeline;
317 struct rb_node *rb_leftmost; 320 struct rb_node *rb_leftmost;
@@ -554,6 +557,10 @@ struct rq {
554 unsigned int ttwu_count; 557 unsigned int ttwu_count;
555 unsigned int ttwu_local; 558 unsigned int ttwu_local;
556#endif 559#endif
560
561#ifdef CONFIG_SMP
562 struct task_struct *wake_list;
563#endif
557}; 564};
558 565
559static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 566static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
@@ -597,7 +604,7 @@ static inline int cpu_of(struct rq *rq)
597 * Return the group to which this tasks belongs. 604 * Return the group to which this tasks belongs.
598 * 605 *
599 * We use task_subsys_state_check() and extend the RCU verification 606 * We use task_subsys_state_check() and extend the RCU verification
600 * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() 607 * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach()
601 * holds that lock for each task it moves into the cgroup. Therefore 608 * holds that lock for each task it moves into the cgroup. Therefore
602 * by holding that lock, we pin the task to the current cgroup. 609 * by holding that lock, we pin the task to the current cgroup.
603 */ 610 */
@@ -607,7 +614,7 @@ static inline struct task_group *task_group(struct task_struct *p)
607 struct cgroup_subsys_state *css; 614 struct cgroup_subsys_state *css;
608 615
609 css = task_subsys_state_check(p, cpu_cgroup_subsys_id, 616 css = task_subsys_state_check(p, cpu_cgroup_subsys_id,
610 lockdep_is_held(&task_rq(p)->lock)); 617 lockdep_is_held(&p->pi_lock));
611 tg = container_of(css, struct task_group, css); 618 tg = container_of(css, struct task_group, css);
612 619
613 return autogroup_task_group(p, tg); 620 return autogroup_task_group(p, tg);
@@ -839,18 +846,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p)
839 return rq->curr == p; 846 return rq->curr == p;
840} 847}
841 848
842#ifndef __ARCH_WANT_UNLOCKED_CTXSW
843static inline int task_running(struct rq *rq, struct task_struct *p) 849static inline int task_running(struct rq *rq, struct task_struct *p)
844{ 850{
851#ifdef CONFIG_SMP
852 return p->on_cpu;
853#else
845 return task_current(rq, p); 854 return task_current(rq, p);
855#endif
846} 856}
847 857
858#ifndef __ARCH_WANT_UNLOCKED_CTXSW
848static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 859static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
849{ 860{
861#ifdef CONFIG_SMP
862 /*
863 * We can optimise this out completely for !SMP, because the
864 * SMP rebalancing from interrupt is the only thing that cares
865 * here.
866 */
867 next->on_cpu = 1;
868#endif
850} 869}
851 870
852static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) 871static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
853{ 872{
873#ifdef CONFIG_SMP
874 /*
875 * After ->on_cpu is cleared, the task can be moved to a different CPU.
876 * We must ensure this doesn't happen until the switch is completely
877 * finished.
878 */
879 smp_wmb();
880 prev->on_cpu = 0;
881#endif
854#ifdef CONFIG_DEBUG_SPINLOCK 882#ifdef CONFIG_DEBUG_SPINLOCK
855 /* this is a valid case when another task releases the spinlock */ 883 /* this is a valid case when another task releases the spinlock */
856 rq->lock.owner = current; 884 rq->lock.owner = current;
@@ -866,15 +894,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
866} 894}
867 895
868#else /* __ARCH_WANT_UNLOCKED_CTXSW */ 896#else /* __ARCH_WANT_UNLOCKED_CTXSW */
869static inline int task_running(struct rq *rq, struct task_struct *p)
870{
871#ifdef CONFIG_SMP
872 return p->oncpu;
873#else
874 return task_current(rq, p);
875#endif
876}
877
878static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) 897static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
879{ 898{
880#ifdef CONFIG_SMP 899#ifdef CONFIG_SMP
@@ -883,7 +902,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
883 * SMP rebalancing from interrupt is the only thing that cares 902 * SMP rebalancing from interrupt is the only thing that cares
884 * here. 903 * here.
885 */ 904 */
886 next->oncpu = 1; 905 next->on_cpu = 1;
887#endif 906#endif
888#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 907#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
889 raw_spin_unlock_irq(&rq->lock); 908 raw_spin_unlock_irq(&rq->lock);
@@ -896,12 +915,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
896{ 915{
897#ifdef CONFIG_SMP 916#ifdef CONFIG_SMP
898 /* 917 /*
899 * After ->oncpu is cleared, the task can be moved to a different CPU. 918 * After ->on_cpu is cleared, the task can be moved to a different CPU.
900 * We must ensure this doesn't happen until the switch is completely 919 * We must ensure this doesn't happen until the switch is completely
901 * finished. 920 * finished.
902 */ 921 */
903 smp_wmb(); 922 smp_wmb();
904 prev->oncpu = 0; 923 prev->on_cpu = 0;
905#endif 924#endif
906#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW 925#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
907 local_irq_enable(); 926 local_irq_enable();
@@ -910,23 +929,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
910#endif /* __ARCH_WANT_UNLOCKED_CTXSW */ 929#endif /* __ARCH_WANT_UNLOCKED_CTXSW */
911 930
912/* 931/*
913 * Check whether the task is waking, we use this to synchronize ->cpus_allowed 932 * __task_rq_lock - lock the rq @p resides on.
914 * against ttwu().
915 */
916static inline int task_is_waking(struct task_struct *p)
917{
918 return unlikely(p->state == TASK_WAKING);
919}
920
921/*
922 * __task_rq_lock - lock the runqueue a given task resides on.
923 * Must be called interrupts disabled.
924 */ 933 */
925static inline struct rq *__task_rq_lock(struct task_struct *p) 934static inline struct rq *__task_rq_lock(struct task_struct *p)
926 __acquires(rq->lock) 935 __acquires(rq->lock)
927{ 936{
928 struct rq *rq; 937 struct rq *rq;
929 938
939 lockdep_assert_held(&p->pi_lock);
940
930 for (;;) { 941 for (;;) {
931 rq = task_rq(p); 942 rq = task_rq(p);
932 raw_spin_lock(&rq->lock); 943 raw_spin_lock(&rq->lock);
@@ -937,22 +948,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p)
937} 948}
938 949
939/* 950/*
940 * task_rq_lock - lock the runqueue a given task resides on and disable 951 * task_rq_lock - lock p->pi_lock and lock the rq @p resides on.
941 * interrupts. Note the ordering: we can safely lookup the task_rq without
942 * explicitly disabling preemption.
943 */ 952 */
944static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) 953static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags)
954 __acquires(p->pi_lock)
945 __acquires(rq->lock) 955 __acquires(rq->lock)
946{ 956{
947 struct rq *rq; 957 struct rq *rq;
948 958
949 for (;;) { 959 for (;;) {
950 local_irq_save(*flags); 960 raw_spin_lock_irqsave(&p->pi_lock, *flags);
951 rq = task_rq(p); 961 rq = task_rq(p);
952 raw_spin_lock(&rq->lock); 962 raw_spin_lock(&rq->lock);
953 if (likely(rq == task_rq(p))) 963 if (likely(rq == task_rq(p)))
954 return rq; 964 return rq;
955 raw_spin_unlock_irqrestore(&rq->lock, *flags); 965 raw_spin_unlock(&rq->lock);
966 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
956 } 967 }
957} 968}
958 969
@@ -962,10 +973,13 @@ static void __task_rq_unlock(struct rq *rq)
962 raw_spin_unlock(&rq->lock); 973 raw_spin_unlock(&rq->lock);
963} 974}
964 975
965static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) 976static inline void
977task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags)
966 __releases(rq->lock) 978 __releases(rq->lock)
979 __releases(p->pi_lock)
967{ 980{
968 raw_spin_unlock_irqrestore(&rq->lock, *flags); 981 raw_spin_unlock(&rq->lock);
982 raw_spin_unlock_irqrestore(&p->pi_lock, *flags);
969} 983}
970 984
971/* 985/*
@@ -1774,7 +1788,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
1774 update_rq_clock(rq); 1788 update_rq_clock(rq);
1775 sched_info_queued(p); 1789 sched_info_queued(p);
1776 p->sched_class->enqueue_task(rq, p, flags); 1790 p->sched_class->enqueue_task(rq, p, flags);
1777 p->se.on_rq = 1;
1778} 1791}
1779 1792
1780static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) 1793static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1782,7 +1795,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
1782 update_rq_clock(rq); 1795 update_rq_clock(rq);
1783 sched_info_dequeued(p); 1796 sched_info_dequeued(p);
1784 p->sched_class->dequeue_task(rq, p, flags); 1797 p->sched_class->dequeue_task(rq, p, flags);
1785 p->se.on_rq = 0;
1786} 1798}
1787 1799
1788/* 1800/*
@@ -2117,7 +2129,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2117 * A queue event has occurred, and we're going to schedule. In 2129 * A queue event has occurred, and we're going to schedule. In
2118 * this case, we can save a useless back to back clock update. 2130 * this case, we can save a useless back to back clock update.
2119 */ 2131 */
2120 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) 2132 if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2121 rq->skip_clock_update = 1; 2133 rq->skip_clock_update = 1;
2122} 2134}
2123 2135
@@ -2163,6 +2175,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2163 */ 2175 */
2164 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && 2176 WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
2165 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); 2177 !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
2178
2179#ifdef CONFIG_LOCKDEP
2180 WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) ||
2181 lockdep_is_held(&task_rq(p)->lock)));
2182#endif
2166#endif 2183#endif
2167 2184
2168 trace_sched_migrate_task(p, new_cpu); 2185 trace_sched_migrate_task(p, new_cpu);
@@ -2183,19 +2200,6 @@ struct migration_arg {
2183static int migration_cpu_stop(void *data); 2200static int migration_cpu_stop(void *data);
2184 2201
2185/* 2202/*
2186 * The task's runqueue lock must be held.
2187 * Returns true if you have to wait for migration thread.
2188 */
2189static bool migrate_task(struct task_struct *p, struct rq *rq)
2190{
2191 /*
2192 * If the task is not on a runqueue (and not running), then
2193 * the next wake-up will properly place the task.
2194 */
2195 return p->se.on_rq || task_running(rq, p);
2196}
2197
2198/*
2199 * wait_task_inactive - wait for a thread to unschedule. 2203 * wait_task_inactive - wait for a thread to unschedule.
2200 * 2204 *
2201 * If @match_state is nonzero, it's the @p->state value just checked and 2205 * If @match_state is nonzero, it's the @p->state value just checked and
@@ -2252,11 +2256,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
2252 rq = task_rq_lock(p, &flags); 2256 rq = task_rq_lock(p, &flags);
2253 trace_sched_wait_task(p); 2257 trace_sched_wait_task(p);
2254 running = task_running(rq, p); 2258 running = task_running(rq, p);
2255 on_rq = p->se.on_rq; 2259 on_rq = p->on_rq;
2256 ncsw = 0; 2260 ncsw = 0;
2257 if (!match_state || p->state == match_state) 2261 if (!match_state || p->state == match_state)
2258 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ 2262 ncsw = p->nvcsw | LONG_MIN; /* sets MSB */
2259 task_rq_unlock(rq, &flags); 2263 task_rq_unlock(rq, p, &flags);
2260 2264
2261 /* 2265 /*
2262 * If it changed from the expected state, bail out now. 2266 * If it changed from the expected state, bail out now.
@@ -2331,7 +2335,7 @@ EXPORT_SYMBOL_GPL(kick_process);
2331 2335
2332#ifdef CONFIG_SMP 2336#ifdef CONFIG_SMP
2333/* 2337/*
2334 * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. 2338 * ->cpus_allowed is protected by both rq->lock and p->pi_lock
2335 */ 2339 */
2336static int select_fallback_rq(int cpu, struct task_struct *p) 2340static int select_fallback_rq(int cpu, struct task_struct *p)
2337{ 2341{
@@ -2364,12 +2368,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2364} 2368}
2365 2369
2366/* 2370/*
2367 * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. 2371 * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable.
2368 */ 2372 */
2369static inline 2373static inline
2370int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) 2374int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
2371{ 2375{
2372 int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); 2376 int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
2373 2377
2374 /* 2378 /*
2375 * In order not to call set_task_cpu() on a blocking task we need 2379 * In order not to call set_task_cpu() on a blocking task we need
@@ -2395,27 +2399,60 @@ static void update_avg(u64 *avg, u64 sample)
2395} 2399}
2396#endif 2400#endif
2397 2401
2398static inline void ttwu_activate(struct task_struct *p, struct rq *rq, 2402static void
2399 bool is_sync, bool is_migrate, bool is_local, 2403ttwu_stat(struct task_struct *p, int cpu, int wake_flags)
2400 unsigned long en_flags)
2401{ 2404{
2405#ifdef CONFIG_SCHEDSTATS
2406 struct rq *rq = this_rq();
2407
2408#ifdef CONFIG_SMP
2409 int this_cpu = smp_processor_id();
2410
2411 if (cpu == this_cpu) {
2412 schedstat_inc(rq, ttwu_local);
2413 schedstat_inc(p, se.statistics.nr_wakeups_local);
2414 } else {
2415 struct sched_domain *sd;
2416
2417 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2418 for_each_domain(this_cpu, sd) {
2419 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2420 schedstat_inc(sd, ttwu_wake_remote);
2421 break;
2422 }
2423 }
2424 }
2425#endif /* CONFIG_SMP */
2426
2427 schedstat_inc(rq, ttwu_count);
2402 schedstat_inc(p, se.statistics.nr_wakeups); 2428 schedstat_inc(p, se.statistics.nr_wakeups);
2403 if (is_sync) 2429
2430 if (wake_flags & WF_SYNC)
2404 schedstat_inc(p, se.statistics.nr_wakeups_sync); 2431 schedstat_inc(p, se.statistics.nr_wakeups_sync);
2405 if (is_migrate) 2432
2433 if (cpu != task_cpu(p))
2406 schedstat_inc(p, se.statistics.nr_wakeups_migrate); 2434 schedstat_inc(p, se.statistics.nr_wakeups_migrate);
2407 if (is_local)
2408 schedstat_inc(p, se.statistics.nr_wakeups_local);
2409 else
2410 schedstat_inc(p, se.statistics.nr_wakeups_remote);
2411 2435
2436#endif /* CONFIG_SCHEDSTATS */
2437}
2438
2439static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags)
2440{
2412 activate_task(rq, p, en_flags); 2441 activate_task(rq, p, en_flags);
2442 p->on_rq = 1;
2443
2444 /* if a worker is waking up, notify workqueue */
2445 if (p->flags & PF_WQ_WORKER)
2446 wq_worker_waking_up(p, cpu_of(rq));
2413} 2447}
2414 2448
2415static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, 2449/*
2416 int wake_flags, bool success) 2450 * Mark the task runnable and perform wakeup-preemption.
2451 */
2452static void
2453ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
2417{ 2454{
2418 trace_sched_wakeup(p, success); 2455 trace_sched_wakeup(p, true);
2419 check_preempt_curr(rq, p, wake_flags); 2456 check_preempt_curr(rq, p, wake_flags);
2420 2457
2421 p->state = TASK_RUNNING; 2458 p->state = TASK_RUNNING;
@@ -2434,9 +2471,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2434 rq->idle_stamp = 0; 2471 rq->idle_stamp = 0;
2435 } 2472 }
2436#endif 2473#endif
2437 /* if a worker is waking up, notify workqueue */ 2474}
2438 if ((p->flags & PF_WQ_WORKER) && success) 2475
2439 wq_worker_waking_up(p, cpu_of(rq)); 2476static void
2477ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
2478{
2479#ifdef CONFIG_SMP
2480 if (p->sched_contributes_to_load)
2481 rq->nr_uninterruptible--;
2482#endif
2483
2484 ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
2485 ttwu_do_wakeup(rq, p, wake_flags);
2486}
2487
2488/*
2489 * Called in case the task @p isn't fully descheduled from its runqueue,
2490 * in this case we must do a remote wakeup. Its a 'light' wakeup though,
2491 * since all we need to do is flip p->state to TASK_RUNNING, since
2492 * the task is still ->on_rq.
2493 */
2494static int ttwu_remote(struct task_struct *p, int wake_flags)
2495{
2496 struct rq *rq;
2497 int ret = 0;
2498
2499 rq = __task_rq_lock(p);
2500 if (p->on_rq) {
2501 ttwu_do_wakeup(rq, p, wake_flags);
2502 ret = 1;
2503 }
2504 __task_rq_unlock(rq);
2505
2506 return ret;
2507}
2508
2509#ifdef CONFIG_SMP
2510static void sched_ttwu_pending(void)
2511{
2512 struct rq *rq = this_rq();
2513 struct task_struct *list = xchg(&rq->wake_list, NULL);
2514
2515 if (!list)
2516 return;
2517
2518 raw_spin_lock(&rq->lock);
2519
2520 while (list) {
2521 struct task_struct *p = list;
2522 list = list->wake_entry;
2523 ttwu_do_activate(rq, p, 0);
2524 }
2525
2526 raw_spin_unlock(&rq->lock);
2527}
2528
2529void scheduler_ipi(void)
2530{
2531 sched_ttwu_pending();
2532}
2533
2534static void ttwu_queue_remote(struct task_struct *p, int cpu)
2535{
2536 struct rq *rq = cpu_rq(cpu);
2537 struct task_struct *next = rq->wake_list;
2538
2539 for (;;) {
2540 struct task_struct *old = next;
2541
2542 p->wake_entry = next;
2543 next = cmpxchg(&rq->wake_list, old, p);
2544 if (next == old)
2545 break;
2546 }
2547
2548 if (!next)
2549 smp_send_reschedule(cpu);
2550}
2551#endif
2552
2553static void ttwu_queue(struct task_struct *p, int cpu)
2554{
2555 struct rq *rq = cpu_rq(cpu);
2556
2557#if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE)
2558 if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2559 ttwu_queue_remote(p, cpu);
2560 return;
2561 }
2562#endif
2563
2564 raw_spin_lock(&rq->lock);
2565 ttwu_do_activate(rq, p, 0);
2566 raw_spin_unlock(&rq->lock);
2440} 2567}
2441 2568
2442/** 2569/**
@@ -2454,92 +2581,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq,
2454 * Returns %true if @p was woken up, %false if it was already running 2581 * Returns %true if @p was woken up, %false if it was already running
2455 * or @state didn't match @p's state. 2582 * or @state didn't match @p's state.
2456 */ 2583 */
2457static int try_to_wake_up(struct task_struct *p, unsigned int state, 2584static int
2458 int wake_flags) 2585try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2459{ 2586{
2460 int cpu, orig_cpu, this_cpu, success = 0;
2461 unsigned long flags; 2587 unsigned long flags;
2462 unsigned long en_flags = ENQUEUE_WAKEUP; 2588 int cpu, success = 0;
2463 struct rq *rq;
2464
2465 this_cpu = get_cpu();
2466 2589
2467 smp_wmb(); 2590 smp_wmb();
2468 rq = task_rq_lock(p, &flags); 2591 raw_spin_lock_irqsave(&p->pi_lock, flags);
2469 if (!(p->state & state)) 2592 if (!(p->state & state))
2470 goto out; 2593 goto out;
2471 2594
2472 if (p->se.on_rq) 2595 success = 1; /* we're going to change ->state */
2473 goto out_running;
2474
2475 cpu = task_cpu(p); 2596 cpu = task_cpu(p);
2476 orig_cpu = cpu;
2477 2597
2478#ifdef CONFIG_SMP 2598 if (p->on_rq && ttwu_remote(p, wake_flags))
2479 if (unlikely(task_running(rq, p))) 2599 goto stat;
2480 goto out_activate;
2481 2600
2601#ifdef CONFIG_SMP
2482 /* 2602 /*
2483 * In order to handle concurrent wakeups and release the rq->lock 2603 * If the owning (remote) cpu is still in the middle of schedule() with
2484 * we put the task in TASK_WAKING state. 2604 * this task as prev, wait until its done referencing the task.
2485 *
2486 * First fix up the nr_uninterruptible count:
2487 */ 2605 */
2488 if (task_contributes_to_load(p)) { 2606 while (p->on_cpu) {
2489 if (likely(cpu_online(orig_cpu))) 2607#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2490 rq->nr_uninterruptible--; 2608 /*
2491 else 2609 * If called from interrupt context we could have landed in the
2492 this_rq()->nr_uninterruptible--; 2610 * middle of schedule(), in this case we should take care not
2493 } 2611 * to spin on ->on_cpu if p is current, since that would
2494 p->state = TASK_WAKING; 2612 * deadlock.
2495 2613 */
2496 if (p->sched_class->task_waking) { 2614 if (p == current) {
2497 p->sched_class->task_waking(rq, p); 2615 ttwu_queue(p, cpu);
2498 en_flags |= ENQUEUE_WAKING; 2616 goto stat;
2617 }
2618#endif
2619 cpu_relax();
2499 } 2620 }
2500
2501 cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
2502 if (cpu != orig_cpu)
2503 set_task_cpu(p, cpu);
2504 __task_rq_unlock(rq);
2505
2506 rq = cpu_rq(cpu);
2507 raw_spin_lock(&rq->lock);
2508
2509 /* 2621 /*
2510 * We migrated the task without holding either rq->lock, however 2622 * Pairs with the smp_wmb() in finish_lock_switch().
2511 * since the task is not on the task list itself, nobody else
2512 * will try and migrate the task, hence the rq should match the
2513 * cpu we just moved it to.
2514 */ 2623 */
2515 WARN_ON(task_cpu(p) != cpu); 2624 smp_rmb();
2516 WARN_ON(p->state != TASK_WAKING);
2517 2625
2518#ifdef CONFIG_SCHEDSTATS 2626 p->sched_contributes_to_load = !!task_contributes_to_load(p);
2519 schedstat_inc(rq, ttwu_count); 2627 p->state = TASK_WAKING;
2520 if (cpu == this_cpu)
2521 schedstat_inc(rq, ttwu_local);
2522 else {
2523 struct sched_domain *sd;
2524 for_each_domain(this_cpu, sd) {
2525 if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
2526 schedstat_inc(sd, ttwu_wake_remote);
2527 break;
2528 }
2529 }
2530 }
2531#endif /* CONFIG_SCHEDSTATS */
2532 2628
2533out_activate: 2629 if (p->sched_class->task_waking)
2630 p->sched_class->task_waking(p);
2631
2632 cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
2633 if (task_cpu(p) != cpu)
2634 set_task_cpu(p, cpu);
2534#endif /* CONFIG_SMP */ 2635#endif /* CONFIG_SMP */
2535 ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, 2636
2536 cpu == this_cpu, en_flags); 2637 ttwu_queue(p, cpu);
2537 success = 1; 2638stat:
2538out_running: 2639 ttwu_stat(p, cpu, wake_flags);
2539 ttwu_post_activation(p, rq, wake_flags, success);
2540out: 2640out:
2541 task_rq_unlock(rq, &flags); 2641 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2542 put_cpu();
2543 2642
2544 return success; 2643 return success;
2545} 2644}
@@ -2548,31 +2647,34 @@ out:
2548 * try_to_wake_up_local - try to wake up a local task with rq lock held 2647 * try_to_wake_up_local - try to wake up a local task with rq lock held
2549 * @p: the thread to be awakened 2648 * @p: the thread to be awakened
2550 * 2649 *
2551 * Put @p on the run-queue if it's not already there. The caller must 2650 * Put @p on the run-queue if it's not already there. The caller must
2552 * ensure that this_rq() is locked, @p is bound to this_rq() and not 2651 * ensure that this_rq() is locked, @p is bound to this_rq() and not
2553 * the current task. this_rq() stays locked over invocation. 2652 * the current task.
2554 */ 2653 */
2555static void try_to_wake_up_local(struct task_struct *p) 2654static void try_to_wake_up_local(struct task_struct *p)
2556{ 2655{
2557 struct rq *rq = task_rq(p); 2656 struct rq *rq = task_rq(p);
2558 bool success = false;
2559 2657
2560 BUG_ON(rq != this_rq()); 2658 BUG_ON(rq != this_rq());
2561 BUG_ON(p == current); 2659 BUG_ON(p == current);
2562 lockdep_assert_held(&rq->lock); 2660 lockdep_assert_held(&rq->lock);
2563 2661
2662 if (!raw_spin_trylock(&p->pi_lock)) {
2663 raw_spin_unlock(&rq->lock);
2664 raw_spin_lock(&p->pi_lock);
2665 raw_spin_lock(&rq->lock);
2666 }
2667
2564 if (!(p->state & TASK_NORMAL)) 2668 if (!(p->state & TASK_NORMAL))
2565 return; 2669 goto out;
2566 2670
2567 if (!p->se.on_rq) { 2671 if (!p->on_rq)
2568 if (likely(!task_running(rq, p))) { 2672 ttwu_activate(rq, p, ENQUEUE_WAKEUP);
2569 schedstat_inc(rq, ttwu_count); 2673
2570 schedstat_inc(rq, ttwu_local); 2674 ttwu_do_wakeup(rq, p, 0);
2571 } 2675 ttwu_stat(p, smp_processor_id(), 0);
2572 ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); 2676out:
2573 success = true; 2677 raw_spin_unlock(&p->pi_lock);
2574 }
2575 ttwu_post_activation(p, rq, 0, success);
2576} 2678}
2577 2679
2578/** 2680/**
@@ -2605,19 +2707,21 @@ int wake_up_state(struct task_struct *p, unsigned int state)
2605 */ 2707 */
2606static void __sched_fork(struct task_struct *p) 2708static void __sched_fork(struct task_struct *p)
2607{ 2709{
2710 p->on_rq = 0;
2711
2712 p->se.on_rq = 0;
2608 p->se.exec_start = 0; 2713 p->se.exec_start = 0;
2609 p->se.sum_exec_runtime = 0; 2714 p->se.sum_exec_runtime = 0;
2610 p->se.prev_sum_exec_runtime = 0; 2715 p->se.prev_sum_exec_runtime = 0;
2611 p->se.nr_migrations = 0; 2716 p->se.nr_migrations = 0;
2612 p->se.vruntime = 0; 2717 p->se.vruntime = 0;
2718 INIT_LIST_HEAD(&p->se.group_node);
2613 2719
2614#ifdef CONFIG_SCHEDSTATS 2720#ifdef CONFIG_SCHEDSTATS
2615 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2721 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
2616#endif 2722#endif
2617 2723
2618 INIT_LIST_HEAD(&p->rt.run_list); 2724 INIT_LIST_HEAD(&p->rt.run_list);
2619 p->se.on_rq = 0;
2620 INIT_LIST_HEAD(&p->se.group_node);
2621 2725
2622#ifdef CONFIG_PREEMPT_NOTIFIERS 2726#ifdef CONFIG_PREEMPT_NOTIFIERS
2623 INIT_HLIST_HEAD(&p->preempt_notifiers); 2727 INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2629,6 +2733,7 @@ static void __sched_fork(struct task_struct *p)
2629 */ 2733 */
2630void sched_fork(struct task_struct *p, int clone_flags) 2734void sched_fork(struct task_struct *p, int clone_flags)
2631{ 2735{
2736 unsigned long flags;
2632 int cpu = get_cpu(); 2737 int cpu = get_cpu();
2633 2738
2634 __sched_fork(p); 2739 __sched_fork(p);
@@ -2679,16 +2784,16 @@ void sched_fork(struct task_struct *p, int clone_flags)
2679 * 2784 *
2680 * Silence PROVE_RCU. 2785 * Silence PROVE_RCU.
2681 */ 2786 */
2682 rcu_read_lock(); 2787 raw_spin_lock_irqsave(&p->pi_lock, flags);
2683 set_task_cpu(p, cpu); 2788 set_task_cpu(p, cpu);
2684 rcu_read_unlock(); 2789 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2685 2790
2686#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 2791#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
2687 if (likely(sched_info_on())) 2792 if (likely(sched_info_on()))
2688 memset(&p->sched_info, 0, sizeof(p->sched_info)); 2793 memset(&p->sched_info, 0, sizeof(p->sched_info));
2689#endif 2794#endif
2690#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 2795#if defined(CONFIG_SMP)
2691 p->oncpu = 0; 2796 p->on_cpu = 0;
2692#endif 2797#endif
2693#ifdef CONFIG_PREEMPT 2798#ifdef CONFIG_PREEMPT
2694 /* Want to start with kernel preemption disabled. */ 2799 /* Want to start with kernel preemption disabled. */
@@ -2712,37 +2817,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
2712{ 2817{
2713 unsigned long flags; 2818 unsigned long flags;
2714 struct rq *rq; 2819 struct rq *rq;
2715 int cpu __maybe_unused = get_cpu();
2716 2820
2821 raw_spin_lock_irqsave(&p->pi_lock, flags);
2717#ifdef CONFIG_SMP 2822#ifdef CONFIG_SMP
2718 rq = task_rq_lock(p, &flags);
2719 p->state = TASK_WAKING;
2720
2721 /* 2823 /*
2722 * Fork balancing, do it here and not earlier because: 2824 * Fork balancing, do it here and not earlier because:
2723 * - cpus_allowed can change in the fork path 2825 * - cpus_allowed can change in the fork path
2724 * - any previously selected cpu might disappear through hotplug 2826 * - any previously selected cpu might disappear through hotplug
2725 *
2726 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
2727 * without people poking at ->cpus_allowed.
2728 */ 2827 */
2729 cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); 2828 set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0));
2730 set_task_cpu(p, cpu);
2731
2732 p->state = TASK_RUNNING;
2733 task_rq_unlock(rq, &flags);
2734#endif 2829#endif
2735 2830
2736 rq = task_rq_lock(p, &flags); 2831 rq = __task_rq_lock(p);
2737 activate_task(rq, p, 0); 2832 activate_task(rq, p, 0);
2738 trace_sched_wakeup_new(p, 1); 2833 p->on_rq = 1;
2834 trace_sched_wakeup_new(p, true);
2739 check_preempt_curr(rq, p, WF_FORK); 2835 check_preempt_curr(rq, p, WF_FORK);
2740#ifdef CONFIG_SMP 2836#ifdef CONFIG_SMP
2741 if (p->sched_class->task_woken) 2837 if (p->sched_class->task_woken)
2742 p->sched_class->task_woken(rq, p); 2838 p->sched_class->task_woken(rq, p);
2743#endif 2839#endif
2744 task_rq_unlock(rq, &flags); 2840 task_rq_unlock(rq, p, &flags);
2745 put_cpu();
2746} 2841}
2747 2842
2748#ifdef CONFIG_PREEMPT_NOTIFIERS 2843#ifdef CONFIG_PREEMPT_NOTIFIERS
@@ -3451,27 +3546,22 @@ void sched_exec(void)
3451{ 3546{
3452 struct task_struct *p = current; 3547 struct task_struct *p = current;
3453 unsigned long flags; 3548 unsigned long flags;
3454 struct rq *rq;
3455 int dest_cpu; 3549 int dest_cpu;
3456 3550
3457 rq = task_rq_lock(p, &flags); 3551 raw_spin_lock_irqsave(&p->pi_lock, flags);
3458 dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); 3552 dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
3459 if (dest_cpu == smp_processor_id()) 3553 if (dest_cpu == smp_processor_id())
3460 goto unlock; 3554 goto unlock;
3461 3555
3462 /* 3556 if (likely(cpu_active(dest_cpu))) {
3463 * select_task_rq() can race against ->cpus_allowed
3464 */
3465 if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
3466 likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) {
3467 struct migration_arg arg = { p, dest_cpu }; 3557 struct migration_arg arg = { p, dest_cpu };
3468 3558
3469 task_rq_unlock(rq, &flags); 3559 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3470 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 3560 stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg);
3471 return; 3561 return;
3472 } 3562 }
3473unlock: 3563unlock:
3474 task_rq_unlock(rq, &flags); 3564 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
3475} 3565}
3476 3566
3477#endif 3567#endif
@@ -3508,7 +3598,7 @@ unsigned long long task_delta_exec(struct task_struct *p)
3508 3598
3509 rq = task_rq_lock(p, &flags); 3599 rq = task_rq_lock(p, &flags);
3510 ns = do_task_delta_exec(p, rq); 3600 ns = do_task_delta_exec(p, rq);
3511 task_rq_unlock(rq, &flags); 3601 task_rq_unlock(rq, p, &flags);
3512 3602
3513 return ns; 3603 return ns;
3514} 3604}
@@ -3526,7 +3616,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3526 3616
3527 rq = task_rq_lock(p, &flags); 3617 rq = task_rq_lock(p, &flags);
3528 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); 3618 ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq);
3529 task_rq_unlock(rq, &flags); 3619 task_rq_unlock(rq, p, &flags);
3530 3620
3531 return ns; 3621 return ns;
3532} 3622}
@@ -3550,7 +3640,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p)
3550 rq = task_rq_lock(p, &flags); 3640 rq = task_rq_lock(p, &flags);
3551 thread_group_cputime(p, &totals); 3641 thread_group_cputime(p, &totals);
3552 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); 3642 ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq);
3553 task_rq_unlock(rq, &flags); 3643 task_rq_unlock(rq, p, &flags);
3554 3644
3555 return ns; 3645 return ns;
3556} 3646}
@@ -4036,7 +4126,7 @@ static inline void schedule_debug(struct task_struct *prev)
4036 4126
4037static void put_prev_task(struct rq *rq, struct task_struct *prev) 4127static void put_prev_task(struct rq *rq, struct task_struct *prev)
4038{ 4128{
4039 if (prev->se.on_rq) 4129 if (prev->on_rq)
4040 update_rq_clock(rq); 4130 update_rq_clock(rq);
4041 prev->sched_class->put_prev_task(rq, prev); 4131 prev->sched_class->put_prev_task(rq, prev);
4042} 4132}
@@ -4098,11 +4188,13 @@ need_resched:
4098 if (unlikely(signal_pending_state(prev->state, prev))) { 4188 if (unlikely(signal_pending_state(prev->state, prev))) {
4099 prev->state = TASK_RUNNING; 4189 prev->state = TASK_RUNNING;
4100 } else { 4190 } else {
4191 deactivate_task(rq, prev, DEQUEUE_SLEEP);
4192 prev->on_rq = 0;
4193
4101 /* 4194 /*
4102 * If a worker is going to sleep, notify and 4195 * If a worker went to sleep, notify and ask workqueue
4103 * ask workqueue whether it wants to wake up a 4196 * whether it wants to wake up a task to maintain
4104 * task to maintain concurrency. If so, wake 4197 * concurrency.
4105 * up the task.
4106 */ 4198 */
4107 if (prev->flags & PF_WQ_WORKER) { 4199 if (prev->flags & PF_WQ_WORKER) {
4108 struct task_struct *to_wakeup; 4200 struct task_struct *to_wakeup;
@@ -4111,21 +4203,20 @@ need_resched:
4111 if (to_wakeup) 4203 if (to_wakeup)
4112 try_to_wake_up_local(to_wakeup); 4204 try_to_wake_up_local(to_wakeup);
4113 } 4205 }
4114 deactivate_task(rq, prev, DEQUEUE_SLEEP); 4206
4207 /*
4208 * If we are going to sleep and we have plugged IO
4209 * queued, make sure to submit it to avoid deadlocks.
4210 */
4211 if (blk_needs_flush_plug(prev)) {
4212 raw_spin_unlock(&rq->lock);
4213 blk_flush_plug(prev);
4214 raw_spin_lock(&rq->lock);
4215 }
4115 } 4216 }
4116 switch_count = &prev->nvcsw; 4217 switch_count = &prev->nvcsw;
4117 } 4218 }
4118 4219
4119 /*
4120 * If we are going to sleep and we have plugged IO queued, make
4121 * sure to submit it to avoid deadlocks.
4122 */
4123 if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
4124 raw_spin_unlock(&rq->lock);
4125 blk_flush_plug(prev);
4126 raw_spin_lock(&rq->lock);
4127 }
4128
4129 pre_schedule(rq, prev); 4220 pre_schedule(rq, prev);
4130 4221
4131 if (unlikely(!rq->nr_running)) 4222 if (unlikely(!rq->nr_running))
@@ -4162,70 +4253,53 @@ need_resched:
4162EXPORT_SYMBOL(schedule); 4253EXPORT_SYMBOL(schedule);
4163 4254
4164#ifdef CONFIG_MUTEX_SPIN_ON_OWNER 4255#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
4165/*
4166 * Look out! "owner" is an entirely speculative pointer
4167 * access and not reliable.
4168 */
4169int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
4170{
4171 unsigned int cpu;
4172 struct rq *rq;
4173 4256
4174 if (!sched_feat(OWNER_SPIN)) 4257static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
4175 return 0; 4258{
4259 bool ret = false;
4176 4260
4177#ifdef CONFIG_DEBUG_PAGEALLOC 4261 rcu_read_lock();
4178 /* 4262 if (lock->owner != owner)
4179 * Need to access the cpu field knowing that 4263 goto fail;
4180 * DEBUG_PAGEALLOC could have unmapped it if
4181 * the mutex owner just released it and exited.
4182 */
4183 if (probe_kernel_address(&owner->cpu, cpu))
4184 return 0;
4185#else
4186 cpu = owner->cpu;
4187#endif
4188 4264
4189 /* 4265 /*
4190 * Even if the access succeeded (likely case), 4266 * Ensure we emit the owner->on_cpu, dereference _after_ checking
4191 * the cpu field may no longer be valid. 4267 * lock->owner still matches owner, if that fails, owner might
4268 * point to free()d memory, if it still matches, the rcu_read_lock()
4269 * ensures the memory stays valid.
4192 */ 4270 */
4193 if (cpu >= nr_cpumask_bits) 4271 barrier();
4194 return 0;
4195 4272
4196 /* 4273 ret = owner->on_cpu;
4197 * We need to validate that we can do a 4274fail:
4198 * get_cpu() and that we have the percpu area. 4275 rcu_read_unlock();
4199 */
4200 if (!cpu_online(cpu))
4201 return 0;
4202 4276
4203 rq = cpu_rq(cpu); 4277 return ret;
4278}
4204 4279
4205 for (;;) { 4280/*
4206 /* 4281 * Look out! "owner" is an entirely speculative pointer
4207 * Owner changed, break to re-assess state. 4282 * access and not reliable.
4208 */ 4283 */
4209 if (lock->owner != owner) { 4284int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner)
4210 /* 4285{
4211 * If the lock has switched to a different owner, 4286 if (!sched_feat(OWNER_SPIN))
4212 * we likely have heavy contention. Return 0 to quit 4287 return 0;
4213 * optimistic spinning and not contend further:
4214 */
4215 if (lock->owner)
4216 return 0;
4217 break;
4218 }
4219 4288
4220 /* 4289 while (owner_running(lock, owner)) {
4221 * Is that owner really running on that cpu? 4290 if (need_resched())
4222 */
4223 if (task_thread_info(rq->curr) != owner || need_resched())
4224 return 0; 4291 return 0;
4225 4292
4226 arch_mutex_cpu_relax(); 4293 arch_mutex_cpu_relax();
4227 } 4294 }
4228 4295
4296 /*
4297 * If the owner changed to another task there is likely
4298 * heavy contention, stop spinning.
4299 */
4300 if (lock->owner)
4301 return 0;
4302
4229 return 1; 4303 return 1;
4230} 4304}
4231#endif 4305#endif
@@ -4685,19 +4759,18 @@ EXPORT_SYMBOL(sleep_on_timeout);
4685 */ 4759 */
4686void rt_mutex_setprio(struct task_struct *p, int prio) 4760void rt_mutex_setprio(struct task_struct *p, int prio)
4687{ 4761{
4688 unsigned long flags;
4689 int oldprio, on_rq, running; 4762 int oldprio, on_rq, running;
4690 struct rq *rq; 4763 struct rq *rq;
4691 const struct sched_class *prev_class; 4764 const struct sched_class *prev_class;
4692 4765
4693 BUG_ON(prio < 0 || prio > MAX_PRIO); 4766 BUG_ON(prio < 0 || prio > MAX_PRIO);
4694 4767
4695 rq = task_rq_lock(p, &flags); 4768 rq = __task_rq_lock(p);
4696 4769
4697 trace_sched_pi_setprio(p, prio); 4770 trace_sched_pi_setprio(p, prio);
4698 oldprio = p->prio; 4771 oldprio = p->prio;
4699 prev_class = p->sched_class; 4772 prev_class = p->sched_class;
4700 on_rq = p->se.on_rq; 4773 on_rq = p->on_rq;
4701 running = task_current(rq, p); 4774 running = task_current(rq, p);
4702 if (on_rq) 4775 if (on_rq)
4703 dequeue_task(rq, p, 0); 4776 dequeue_task(rq, p, 0);
@@ -4717,7 +4790,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4717 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); 4790 enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
4718 4791
4719 check_class_changed(rq, p, prev_class, oldprio); 4792 check_class_changed(rq, p, prev_class, oldprio);
4720 task_rq_unlock(rq, &flags); 4793 __task_rq_unlock(rq);
4721} 4794}
4722 4795
4723#endif 4796#endif
@@ -4745,7 +4818,7 @@ void set_user_nice(struct task_struct *p, long nice)
4745 p->static_prio = NICE_TO_PRIO(nice); 4818 p->static_prio = NICE_TO_PRIO(nice);
4746 goto out_unlock; 4819 goto out_unlock;
4747 } 4820 }
4748 on_rq = p->se.on_rq; 4821 on_rq = p->on_rq;
4749 if (on_rq) 4822 if (on_rq)
4750 dequeue_task(rq, p, 0); 4823 dequeue_task(rq, p, 0);
4751 4824
@@ -4765,7 +4838,7 @@ void set_user_nice(struct task_struct *p, long nice)
4765 resched_task(rq->curr); 4838 resched_task(rq->curr);
4766 } 4839 }
4767out_unlock: 4840out_unlock:
4768 task_rq_unlock(rq, &flags); 4841 task_rq_unlock(rq, p, &flags);
4769} 4842}
4770EXPORT_SYMBOL(set_user_nice); 4843EXPORT_SYMBOL(set_user_nice);
4771 4844
@@ -4879,8 +4952,6 @@ static struct task_struct *find_process_by_pid(pid_t pid)
4879static void 4952static void
4880__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) 4953__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4881{ 4954{
4882 BUG_ON(p->se.on_rq);
4883
4884 p->policy = policy; 4955 p->policy = policy;
4885 p->rt_priority = prio; 4956 p->rt_priority = prio;
4886 p->normal_prio = normal_prio(p); 4957 p->normal_prio = normal_prio(p);
@@ -4995,20 +5066,17 @@ recheck:
4995 /* 5066 /*
4996 * make sure no PI-waiters arrive (or leave) while we are 5067 * make sure no PI-waiters arrive (or leave) while we are
4997 * changing the priority of the task: 5068 * changing the priority of the task:
4998 */ 5069 *
4999 raw_spin_lock_irqsave(&p->pi_lock, flags);
5000 /*
5001 * To be able to change p->policy safely, the appropriate 5070 * To be able to change p->policy safely, the appropriate
5002 * runqueue lock must be held. 5071 * runqueue lock must be held.
5003 */ 5072 */
5004 rq = __task_rq_lock(p); 5073 rq = task_rq_lock(p, &flags);
5005 5074
5006 /* 5075 /*
5007 * Changing the policy of the stop threads its a very bad idea 5076 * Changing the policy of the stop threads its a very bad idea
5008 */ 5077 */
5009 if (p == rq->stop) { 5078 if (p == rq->stop) {
5010 __task_rq_unlock(rq); 5079 task_rq_unlock(rq, p, &flags);
5011 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5012 return -EINVAL; 5080 return -EINVAL;
5013 } 5081 }
5014 5082
@@ -5032,8 +5100,7 @@ recheck:
5032 if (rt_bandwidth_enabled() && rt_policy(policy) && 5100 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5033 task_group(p)->rt_bandwidth.rt_runtime == 0 && 5101 task_group(p)->rt_bandwidth.rt_runtime == 0 &&
5034 !task_group_is_autogroup(task_group(p))) { 5102 !task_group_is_autogroup(task_group(p))) {
5035 __task_rq_unlock(rq); 5103 task_rq_unlock(rq, p, &flags);
5036 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5037 return -EPERM; 5104 return -EPERM;
5038 } 5105 }
5039 } 5106 }
@@ -5042,11 +5109,10 @@ recheck:
5042 /* recheck policy now with rq lock held */ 5109 /* recheck policy now with rq lock held */
5043 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { 5110 if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
5044 policy = oldpolicy = -1; 5111 policy = oldpolicy = -1;
5045 __task_rq_unlock(rq); 5112 task_rq_unlock(rq, p, &flags);
5046 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5047 goto recheck; 5113 goto recheck;
5048 } 5114 }
5049 on_rq = p->se.on_rq; 5115 on_rq = p->on_rq;
5050 running = task_current(rq, p); 5116 running = task_current(rq, p);
5051 if (on_rq) 5117 if (on_rq)
5052 deactivate_task(rq, p, 0); 5118 deactivate_task(rq, p, 0);
@@ -5065,8 +5131,7 @@ recheck:
5065 activate_task(rq, p, 0); 5131 activate_task(rq, p, 0);
5066 5132
5067 check_class_changed(rq, p, prev_class, oldprio); 5133 check_class_changed(rq, p, prev_class, oldprio);
5068 __task_rq_unlock(rq); 5134 task_rq_unlock(rq, p, &flags);
5069 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5070 5135
5071 rt_mutex_adjust_pi(p); 5136 rt_mutex_adjust_pi(p);
5072 5137
@@ -5317,7 +5382,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5317{ 5382{
5318 struct task_struct *p; 5383 struct task_struct *p;
5319 unsigned long flags; 5384 unsigned long flags;
5320 struct rq *rq;
5321 int retval; 5385 int retval;
5322 5386
5323 get_online_cpus(); 5387 get_online_cpus();
@@ -5332,9 +5396,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
5332 if (retval) 5396 if (retval)
5333 goto out_unlock; 5397 goto out_unlock;
5334 5398
5335 rq = task_rq_lock(p, &flags); 5399 raw_spin_lock_irqsave(&p->pi_lock, flags);
5336 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); 5400 cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
5337 task_rq_unlock(rq, &flags); 5401 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
5338 5402
5339out_unlock: 5403out_unlock:
5340 rcu_read_unlock(); 5404 rcu_read_unlock();
@@ -5659,7 +5723,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
5659 5723
5660 rq = task_rq_lock(p, &flags); 5724 rq = task_rq_lock(p, &flags);
5661 time_slice = p->sched_class->get_rr_interval(rq, p); 5725 time_slice = p->sched_class->get_rr_interval(rq, p);
5662 task_rq_unlock(rq, &flags); 5726 task_rq_unlock(rq, p, &flags);
5663 5727
5664 rcu_read_unlock(); 5728 rcu_read_unlock();
5665 jiffies_to_timespec(time_slice, &t); 5729 jiffies_to_timespec(time_slice, &t);
@@ -5777,8 +5841,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5777 rcu_read_unlock(); 5841 rcu_read_unlock();
5778 5842
5779 rq->curr = rq->idle = idle; 5843 rq->curr = rq->idle = idle;
5780#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5844#if defined(CONFIG_SMP)
5781 idle->oncpu = 1; 5845 idle->on_cpu = 1;
5782#endif 5846#endif
5783 raw_spin_unlock_irqrestore(&rq->lock, flags); 5847 raw_spin_unlock_irqrestore(&rq->lock, flags);
5784 5848
@@ -5882,18 +5946,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
5882 unsigned int dest_cpu; 5946 unsigned int dest_cpu;
5883 int ret = 0; 5947 int ret = 0;
5884 5948
5885 /*
5886 * Serialize against TASK_WAKING so that ttwu() and wunt() can
5887 * drop the rq->lock and still rely on ->cpus_allowed.
5888 */
5889again:
5890 while (task_is_waking(p))
5891 cpu_relax();
5892 rq = task_rq_lock(p, &flags); 5949 rq = task_rq_lock(p, &flags);
5893 if (task_is_waking(p)) {
5894 task_rq_unlock(rq, &flags);
5895 goto again;
5896 }
5897 5950
5898 if (!cpumask_intersects(new_mask, cpu_active_mask)) { 5951 if (!cpumask_intersects(new_mask, cpu_active_mask)) {
5899 ret = -EINVAL; 5952 ret = -EINVAL;
@@ -5918,16 +5971,16 @@ again:
5918 goto out; 5971 goto out;
5919 5972
5920 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); 5973 dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
5921 if (migrate_task(p, rq)) { 5974 if (p->on_rq) {
5922 struct migration_arg arg = { p, dest_cpu }; 5975 struct migration_arg arg = { p, dest_cpu };
5923 /* Need help from migration thread: drop lock and wait. */ 5976 /* Need help from migration thread: drop lock and wait. */
5924 task_rq_unlock(rq, &flags); 5977 task_rq_unlock(rq, p, &flags);
5925 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); 5978 stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
5926 tlb_migrate_finish(p->mm); 5979 tlb_migrate_finish(p->mm);
5927 return 0; 5980 return 0;
5928 } 5981 }
5929out: 5982out:
5930 task_rq_unlock(rq, &flags); 5983 task_rq_unlock(rq, p, &flags);
5931 5984
5932 return ret; 5985 return ret;
5933} 5986}
@@ -5955,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5955 rq_src = cpu_rq(src_cpu); 6008 rq_src = cpu_rq(src_cpu);
5956 rq_dest = cpu_rq(dest_cpu); 6009 rq_dest = cpu_rq(dest_cpu);
5957 6010
6011 raw_spin_lock(&p->pi_lock);
5958 double_rq_lock(rq_src, rq_dest); 6012 double_rq_lock(rq_src, rq_dest);
5959 /* Already moved. */ 6013 /* Already moved. */
5960 if (task_cpu(p) != src_cpu) 6014 if (task_cpu(p) != src_cpu)
@@ -5967,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5967 * If we're not on a rq, the next wake-up will ensure we're 6021 * If we're not on a rq, the next wake-up will ensure we're
5968 * placed properly. 6022 * placed properly.
5969 */ 6023 */
5970 if (p->se.on_rq) { 6024 if (p->on_rq) {
5971 deactivate_task(rq_src, p, 0); 6025 deactivate_task(rq_src, p, 0);
5972 set_task_cpu(p, dest_cpu); 6026 set_task_cpu(p, dest_cpu);
5973 activate_task(rq_dest, p, 0); 6027 activate_task(rq_dest, p, 0);
@@ -5977,6 +6031,7 @@ done:
5977 ret = 1; 6031 ret = 1;
5978fail: 6032fail:
5979 double_rq_unlock(rq_src, rq_dest); 6033 double_rq_unlock(rq_src, rq_dest);
6034 raw_spin_unlock(&p->pi_lock);
5980 return ret; 6035 return ret;
5981} 6036}
5982 6037
@@ -6317,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
6317 6372
6318#ifdef CONFIG_HOTPLUG_CPU 6373#ifdef CONFIG_HOTPLUG_CPU
6319 case CPU_DYING: 6374 case CPU_DYING:
6375 sched_ttwu_pending();
6320 /* Update our root-domain */ 6376 /* Update our root-domain */
6321 raw_spin_lock_irqsave(&rq->lock, flags); 6377 raw_spin_lock_irqsave(&rq->lock, flags);
6322 if (rq->rd) { 6378 if (rq->rd) {
@@ -7961,7 +8017,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p)
7961 int old_prio = p->prio; 8017 int old_prio = p->prio;
7962 int on_rq; 8018 int on_rq;
7963 8019
7964 on_rq = p->se.on_rq; 8020 on_rq = p->on_rq;
7965 if (on_rq) 8021 if (on_rq)
7966 deactivate_task(rq, p, 0); 8022 deactivate_task(rq, p, 0);
7967 __setscheduler(rq, p, SCHED_NORMAL, 0); 8023 __setscheduler(rq, p, SCHED_NORMAL, 0);
@@ -8304,7 +8360,7 @@ void sched_move_task(struct task_struct *tsk)
8304 rq = task_rq_lock(tsk, &flags); 8360 rq = task_rq_lock(tsk, &flags);
8305 8361
8306 running = task_current(rq, tsk); 8362 running = task_current(rq, tsk);
8307 on_rq = tsk->se.on_rq; 8363 on_rq = tsk->on_rq;
8308 8364
8309 if (on_rq) 8365 if (on_rq)
8310 dequeue_task(rq, tsk, 0); 8366 dequeue_task(rq, tsk, 0);
@@ -8323,7 +8379,7 @@ void sched_move_task(struct task_struct *tsk)
8323 if (on_rq) 8379 if (on_rq)
8324 enqueue_task(rq, tsk, 0); 8380 enqueue_task(rq, tsk, 0);
8325 8381
8326 task_rq_unlock(rq, &flags); 8382 task_rq_unlock(rq, tsk, &flags);
8327} 8383}
8328#endif /* CONFIG_CGROUP_SCHED */ 8384#endif /* CONFIG_CGROUP_SCHED */
8329 8385
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 7bacd83a4158..3669bec6e130 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
152 read_lock_irqsave(&tasklist_lock, flags); 152 read_lock_irqsave(&tasklist_lock, flags);
153 153
154 do_each_thread(g, p) { 154 do_each_thread(g, p) {
155 if (!p->se.on_rq || task_cpu(p) != rq_cpu) 155 if (!p->on_rq || task_cpu(p) != rq_cpu)
156 continue; 156 continue;
157 157
158 print_task(m, rq, p); 158 print_task(m, rq, p);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9c5679cfe3b0..87445931a179 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
358 } 358 }
359 359
360 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); 360 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
361#ifndef CONFIG_64BIT
362 smp_wmb();
363 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
364#endif
361} 365}
362 366
363/* 367/*
@@ -1372,12 +1376,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
1372 1376
1373#ifdef CONFIG_SMP 1377#ifdef CONFIG_SMP
1374 1378
1375static void task_waking_fair(struct rq *rq, struct task_struct *p) 1379static void task_waking_fair(struct task_struct *p)
1376{ 1380{
1377 struct sched_entity *se = &p->se; 1381 struct sched_entity *se = &p->se;
1378 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1382 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1383 u64 min_vruntime;
1379 1384
1380 se->vruntime -= cfs_rq->min_vruntime; 1385#ifndef CONFIG_64BIT
1386 u64 min_vruntime_copy;
1387
1388 do {
1389 min_vruntime_copy = cfs_rq->min_vruntime_copy;
1390 smp_rmb();
1391 min_vruntime = cfs_rq->min_vruntime;
1392 } while (min_vruntime != min_vruntime_copy);
1393#else
1394 min_vruntime = cfs_rq->min_vruntime;
1395#endif
1396
1397 se->vruntime -= min_vruntime;
1381} 1398}
1382 1399
1383#ifdef CONFIG_FAIR_GROUP_SCHED 1400#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -1659,7 +1676,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
1659 * preempt must be disabled. 1676 * preempt must be disabled.
1660 */ 1677 */
1661static int 1678static int
1662select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) 1679select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
1663{ 1680{
1664 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; 1681 struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
1665 int cpu = smp_processor_id(); 1682 int cpu = smp_processor_id();
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 68e69acc29b9..be40f7371ee1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
64 * Decrement CPU power based on irq activity 64 * Decrement CPU power based on irq activity
65 */ 65 */
66SCHED_FEAT(NONIRQ_POWER, 1) 66SCHED_FEAT(NONIRQ_POWER, 1)
67
68/*
69 * Queue remote wakeups on the target CPU and process them
70 * using the scheduler IPI. Reduces rq->lock contention/bounces.
71 */
72SCHED_FEAT(TTWU_QUEUE, 1)
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a776a6396427..0a51882534ea 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -7,7 +7,7 @@
7 7
8#ifdef CONFIG_SMP 8#ifdef CONFIG_SMP
9static int 9static int
10select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags) 10select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
11{ 11{
12 return task_cpu(p); /* IDLE tasks as never migrated */ 12 return task_cpu(p); /* IDLE tasks as never migrated */
13} 13}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index e7cebdc65f82..19ecb3127379 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq)
977static int find_lowest_rq(struct task_struct *task); 977static int find_lowest_rq(struct task_struct *task);
978 978
979static int 979static int
980select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) 980select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
981{ 981{
982 struct task_struct *curr;
983 struct rq *rq;
984 int cpu;
985
982 if (sd_flag != SD_BALANCE_WAKE) 986 if (sd_flag != SD_BALANCE_WAKE)
983 return smp_processor_id(); 987 return smp_processor_id();
984 988
989 cpu = task_cpu(p);
990 rq = cpu_rq(cpu);
991
992 rcu_read_lock();
993 curr = ACCESS_ONCE(rq->curr); /* unlocked access */
994
985 /* 995 /*
986 * If the current task is an RT task, then 996 * If the current task on @p's runqueue is an RT task, then
987 * try to see if we can wake this RT task up on another 997 * try to see if we can wake this RT task up on another
988 * runqueue. Otherwise simply start this RT task 998 * runqueue. Otherwise simply start this RT task
989 * on its current runqueue. 999 * on its current runqueue.
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
997 * lock? 1007 * lock?
998 * 1008 *
999 * For equal prio tasks, we just let the scheduler sort it out. 1009 * For equal prio tasks, we just let the scheduler sort it out.
1010 *
1011 * Otherwise, just let it ride on the affined RQ and the
1012 * post-schedule router will push the preempted task away
1013 *
1014 * This test is optimistic, if we get it wrong the load-balancer
1015 * will have to sort it out.
1000 */ 1016 */
1001 if (unlikely(rt_task(rq->curr)) && 1017 if (curr && unlikely(rt_task(curr)) &&
1002 (rq->curr->rt.nr_cpus_allowed < 2 || 1018 (curr->rt.nr_cpus_allowed < 2 ||
1003 rq->curr->prio < p->prio) && 1019 curr->prio < p->prio) &&
1004 (p->rt.nr_cpus_allowed > 1)) { 1020 (p->rt.nr_cpus_allowed > 1)) {
1005 int cpu = find_lowest_rq(p); 1021 int target = find_lowest_rq(p);
1006 1022
1007 return (cpu == -1) ? task_cpu(p) : cpu; 1023 if (target != -1)
1024 cpu = target;
1008 } 1025 }
1026 rcu_read_unlock();
1009 1027
1010 /* 1028 return cpu;
1011 * Otherwise, just let it ride on the affined RQ and the
1012 * post-schedule router will push the preempted task away
1013 */
1014 return task_cpu(p);
1015} 1029}
1016 1030
1017static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1031static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
@@ -1136,7 +1150,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
1136 * The previous task needs to be made eligible for pushing 1150 * The previous task needs to be made eligible for pushing
1137 * if it is still active 1151 * if it is still active
1138 */ 1152 */
1139 if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) 1153 if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
1140 enqueue_pushable_task(rq, p); 1154 enqueue_pushable_task(rq, p);
1141} 1155}
1142 1156
@@ -1287,7 +1301,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1287 !cpumask_test_cpu(lowest_rq->cpu, 1301 !cpumask_test_cpu(lowest_rq->cpu,
1288 &task->cpus_allowed) || 1302 &task->cpus_allowed) ||
1289 task_running(rq, task) || 1303 task_running(rq, task) ||
1290 !task->se.on_rq)) { 1304 !task->on_rq)) {
1291 1305
1292 raw_spin_unlock(&lowest_rq->lock); 1306 raw_spin_unlock(&lowest_rq->lock);
1293 lowest_rq = NULL; 1307 lowest_rq = NULL;
@@ -1321,7 +1335,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
1321 BUG_ON(task_current(rq, p)); 1335 BUG_ON(task_current(rq, p));
1322 BUG_ON(p->rt.nr_cpus_allowed <= 1); 1336 BUG_ON(p->rt.nr_cpus_allowed <= 1);
1323 1337
1324 BUG_ON(!p->se.on_rq); 1338 BUG_ON(!p->on_rq);
1325 BUG_ON(!rt_task(p)); 1339 BUG_ON(!rt_task(p));
1326 1340
1327 return p; 1341 return p;
@@ -1467,7 +1481,7 @@ static int pull_rt_task(struct rq *this_rq)
1467 */ 1481 */
1468 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 1482 if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
1469 WARN_ON(p == src_rq->curr); 1483 WARN_ON(p == src_rq->curr);
1470 WARN_ON(!p->se.on_rq); 1484 WARN_ON(!p->on_rq);
1471 1485
1472 /* 1486 /*
1473 * There's a chance that p is higher in priority 1487 * There's a chance that p is higher in priority
@@ -1538,7 +1552,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1538 * Update the migration status of the RQ if we have an RT task 1552 * Update the migration status of the RQ if we have an RT task
1539 * which is running AND changing its weight value. 1553 * which is running AND changing its weight value.
1540 */ 1554 */
1541 if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { 1555 if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
1542 struct rq *rq = task_rq(p); 1556 struct rq *rq = task_rq(p);
1543 1557
1544 if (!task_current(rq, p)) { 1558 if (!task_current(rq, p)) {
@@ -1608,7 +1622,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
1608 * we may need to handle the pulling of RT tasks 1622 * we may need to handle the pulling of RT tasks
1609 * now. 1623 * now.
1610 */ 1624 */
1611 if (p->se.on_rq && !rq->rt.rt_nr_running) 1625 if (p->on_rq && !rq->rt.rt_nr_running)
1612 pull_rt_task(rq); 1626 pull_rt_task(rq);
1613} 1627}
1614 1628
@@ -1638,7 +1652,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
1638 * If that current running task is also an RT task 1652 * If that current running task is also an RT task
1639 * then see if we can move to another run queue. 1653 * then see if we can move to another run queue.
1640 */ 1654 */
1641 if (p->se.on_rq && rq->curr != p) { 1655 if (p->on_rq && rq->curr != p) {
1642#ifdef CONFIG_SMP 1656#ifdef CONFIG_SMP
1643 if (rq->rt.overloaded && push_rt_task(rq) && 1657 if (rq->rt.overloaded && push_rt_task(rq) &&
1644 /* Don't resched if we changed runqueues */ 1658 /* Don't resched if we changed runqueues */
@@ -1657,7 +1671,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
1657static void 1671static void
1658prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 1672prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
1659{ 1673{
1660 if (!p->se.on_rq) 1674 if (!p->on_rq)
1661 return; 1675 return;
1662 1676
1663 if (rq->curr == p) { 1677 if (rq->curr == p) {
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 1ba2bd40fdac..6f437632afab 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -9,8 +9,7 @@
9 9
10#ifdef CONFIG_SMP 10#ifdef CONFIG_SMP
11static int 11static int
12select_task_rq_stop(struct rq *rq, struct task_struct *p, 12select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
13 int sd_flag, int flags)
14{ 13{
15 return task_cpu(p); /* stop tasks as never migrate */ 14 return task_cpu(p); /* stop tasks as never migrate */
16} 15}
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
26{ 25{
27 struct task_struct *stop = rq->stop; 26 struct task_struct *stop = rq->stop;
28 27
29 if (stop && stop->se.on_rq) 28 if (stop && stop->on_rq)
30 return stop; 29 return stop;
31 30
32 return NULL; 31 return NULL;