diff options
author | David S. Miller <davem@davemloft.net> | 2011-05-20 16:10:22 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-05-20 16:10:22 -0400 |
commit | 90d3ac15e5c637d45849e83c828ed78c62886737 (patch) | |
tree | c5568365f32386559d2710e8981ed41e5fe0eb12 /kernel | |
parent | 9fafbd806198eb690c9a9f9fe35a879db93a1b8d (diff) | |
parent | 317f394160e9beb97d19a84c39b7e5eb3d7815a8 (diff) |
Merge commit '317f394160e9beb97d19a84c39b7e5eb3d7815a8'
Conflicts:
arch/sparc/kernel/smp_32.c
With merge conflict help from Daniel Hellstrom.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/mutex-debug.c | 2 | ||||
-rw-r--r-- | kernel/mutex-debug.h | 2 | ||||
-rw-r--r-- | kernel/mutex.c | 2 | ||||
-rw-r--r-- | kernel/mutex.h | 2 | ||||
-rw-r--r-- | kernel/sched.c | 627 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/sched_fair.c | 28 | ||||
-rw-r--r-- | kernel/sched_features.h | 6 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 54 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 5 |
11 files changed, 418 insertions, 314 deletions
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index ec815a960b5d..73da83aff418 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock) | |||
75 | return; | 75 | return; |
76 | 76 | ||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
78 | DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); | 78 | DEBUG_LOCKS_WARN_ON(lock->owner != current); |
79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
80 | mutex_clear_owner(lock); | 80 | mutex_clear_owner(lock); |
81 | } | 81 | } |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index 57d527a16f9d..0799fd3e4cfa 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, | |||
29 | 29 | ||
30 | static inline void mutex_set_owner(struct mutex *lock) | 30 | static inline void mutex_set_owner(struct mutex *lock) |
31 | { | 31 | { |
32 | lock->owner = current_thread_info(); | 32 | lock->owner = current; |
33 | } | 33 | } |
34 | 34 | ||
35 | static inline void mutex_clear_owner(struct mutex *lock) | 35 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index c4195fa98900..fe4706cb0c5b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
160 | */ | 160 | */ |
161 | 161 | ||
162 | for (;;) { | 162 | for (;;) { |
163 | struct thread_info *owner; | 163 | struct task_struct *owner; |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * If we own the BKL, then don't spin. The owner of | 166 | * If we own the BKL, then don't spin. The owner of |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 67578ca48f94..4115fbf83b12 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
@@ -19,7 +19,7 @@ | |||
19 | #ifdef CONFIG_SMP | 19 | #ifdef CONFIG_SMP |
20 | static inline void mutex_set_owner(struct mutex *lock) | 20 | static inline void mutex_set_owner(struct mutex *lock) |
21 | { | 21 | { |
22 | lock->owner = current_thread_info(); | 22 | lock->owner = current; |
23 | } | 23 | } |
24 | 24 | ||
25 | static inline void mutex_clear_owner(struct mutex *lock) | 25 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/sched.c b/kernel/sched.c index 312f8b95c2d4..8c9d804dc07d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -312,6 +312,9 @@ struct cfs_rq { | |||
312 | 312 | ||
313 | u64 exec_clock; | 313 | u64 exec_clock; |
314 | u64 min_vruntime; | 314 | u64 min_vruntime; |
315 | #ifndef CONFIG_64BIT | ||
316 | u64 min_vruntime_copy; | ||
317 | #endif | ||
315 | 318 | ||
316 | struct rb_root tasks_timeline; | 319 | struct rb_root tasks_timeline; |
317 | struct rb_node *rb_leftmost; | 320 | struct rb_node *rb_leftmost; |
@@ -553,6 +556,10 @@ struct rq { | |||
553 | unsigned int ttwu_count; | 556 | unsigned int ttwu_count; |
554 | unsigned int ttwu_local; | 557 | unsigned int ttwu_local; |
555 | #endif | 558 | #endif |
559 | |||
560 | #ifdef CONFIG_SMP | ||
561 | struct task_struct *wake_list; | ||
562 | #endif | ||
556 | }; | 563 | }; |
557 | 564 | ||
558 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 565 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
@@ -596,7 +603,7 @@ static inline int cpu_of(struct rq *rq) | |||
596 | * Return the group to which this tasks belongs. | 603 | * Return the group to which this tasks belongs. |
597 | * | 604 | * |
598 | * We use task_subsys_state_check() and extend the RCU verification | 605 | * We use task_subsys_state_check() and extend the RCU verification |
599 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | 606 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() |
600 | * holds that lock for each task it moves into the cgroup. Therefore | 607 | * holds that lock for each task it moves into the cgroup. Therefore |
601 | * by holding that lock, we pin the task to the current cgroup. | 608 | * by holding that lock, we pin the task to the current cgroup. |
602 | */ | 609 | */ |
@@ -606,7 +613,7 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
606 | struct cgroup_subsys_state *css; | 613 | struct cgroup_subsys_state *css; |
607 | 614 | ||
608 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 615 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
609 | lockdep_is_held(&task_rq(p)->lock)); | 616 | lockdep_is_held(&p->pi_lock)); |
610 | tg = container_of(css, struct task_group, css); | 617 | tg = container_of(css, struct task_group, css); |
611 | 618 | ||
612 | return autogroup_task_group(p, tg); | 619 | return autogroup_task_group(p, tg); |
@@ -838,18 +845,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p) | |||
838 | return rq->curr == p; | 845 | return rq->curr == p; |
839 | } | 846 | } |
840 | 847 | ||
841 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
842 | static inline int task_running(struct rq *rq, struct task_struct *p) | 848 | static inline int task_running(struct rq *rq, struct task_struct *p) |
843 | { | 849 | { |
850 | #ifdef CONFIG_SMP | ||
851 | return p->on_cpu; | ||
852 | #else | ||
844 | return task_current(rq, p); | 853 | return task_current(rq, p); |
854 | #endif | ||
845 | } | 855 | } |
846 | 856 | ||
857 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
847 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 858 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
848 | { | 859 | { |
860 | #ifdef CONFIG_SMP | ||
861 | /* | ||
862 | * We can optimise this out completely for !SMP, because the | ||
863 | * SMP rebalancing from interrupt is the only thing that cares | ||
864 | * here. | ||
865 | */ | ||
866 | next->on_cpu = 1; | ||
867 | #endif | ||
849 | } | 868 | } |
850 | 869 | ||
851 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | 870 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
852 | { | 871 | { |
872 | #ifdef CONFIG_SMP | ||
873 | /* | ||
874 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
875 | * We must ensure this doesn't happen until the switch is completely | ||
876 | * finished. | ||
877 | */ | ||
878 | smp_wmb(); | ||
879 | prev->on_cpu = 0; | ||
880 | #endif | ||
853 | #ifdef CONFIG_DEBUG_SPINLOCK | 881 | #ifdef CONFIG_DEBUG_SPINLOCK |
854 | /* this is a valid case when another task releases the spinlock */ | 882 | /* this is a valid case when another task releases the spinlock */ |
855 | rq->lock.owner = current; | 883 | rq->lock.owner = current; |
@@ -865,15 +893,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
865 | } | 893 | } |
866 | 894 | ||
867 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 895 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
868 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
869 | { | ||
870 | #ifdef CONFIG_SMP | ||
871 | return p->oncpu; | ||
872 | #else | ||
873 | return task_current(rq, p); | ||
874 | #endif | ||
875 | } | ||
876 | |||
877 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 896 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
878 | { | 897 | { |
879 | #ifdef CONFIG_SMP | 898 | #ifdef CONFIG_SMP |
@@ -882,7 +901,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
882 | * SMP rebalancing from interrupt is the only thing that cares | 901 | * SMP rebalancing from interrupt is the only thing that cares |
883 | * here. | 902 | * here. |
884 | */ | 903 | */ |
885 | next->oncpu = 1; | 904 | next->on_cpu = 1; |
886 | #endif | 905 | #endif |
887 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 906 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
888 | raw_spin_unlock_irq(&rq->lock); | 907 | raw_spin_unlock_irq(&rq->lock); |
@@ -895,12 +914,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
895 | { | 914 | { |
896 | #ifdef CONFIG_SMP | 915 | #ifdef CONFIG_SMP |
897 | /* | 916 | /* |
898 | * After ->oncpu is cleared, the task can be moved to a different CPU. | 917 | * After ->on_cpu is cleared, the task can be moved to a different CPU. |
899 | * We must ensure this doesn't happen until the switch is completely | 918 | * We must ensure this doesn't happen until the switch is completely |
900 | * finished. | 919 | * finished. |
901 | */ | 920 | */ |
902 | smp_wmb(); | 921 | smp_wmb(); |
903 | prev->oncpu = 0; | 922 | prev->on_cpu = 0; |
904 | #endif | 923 | #endif |
905 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 924 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
906 | local_irq_enable(); | 925 | local_irq_enable(); |
@@ -909,23 +928,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
909 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 928 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
910 | 929 | ||
911 | /* | 930 | /* |
912 | * Check whether the task is waking, we use this to synchronize ->cpus_allowed | 931 | * __task_rq_lock - lock the rq @p resides on. |
913 | * against ttwu(). | ||
914 | */ | ||
915 | static inline int task_is_waking(struct task_struct *p) | ||
916 | { | ||
917 | return unlikely(p->state == TASK_WAKING); | ||
918 | } | ||
919 | |||
920 | /* | ||
921 | * __task_rq_lock - lock the runqueue a given task resides on. | ||
922 | * Must be called interrupts disabled. | ||
923 | */ | 932 | */ |
924 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 933 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
925 | __acquires(rq->lock) | 934 | __acquires(rq->lock) |
926 | { | 935 | { |
927 | struct rq *rq; | 936 | struct rq *rq; |
928 | 937 | ||
938 | lockdep_assert_held(&p->pi_lock); | ||
939 | |||
929 | for (;;) { | 940 | for (;;) { |
930 | rq = task_rq(p); | 941 | rq = task_rq(p); |
931 | raw_spin_lock(&rq->lock); | 942 | raw_spin_lock(&rq->lock); |
@@ -936,22 +947,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
936 | } | 947 | } |
937 | 948 | ||
938 | /* | 949 | /* |
939 | * task_rq_lock - lock the runqueue a given task resides on and disable | 950 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
940 | * interrupts. Note the ordering: we can safely lookup the task_rq without | ||
941 | * explicitly disabling preemption. | ||
942 | */ | 951 | */ |
943 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | 952 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
953 | __acquires(p->pi_lock) | ||
944 | __acquires(rq->lock) | 954 | __acquires(rq->lock) |
945 | { | 955 | { |
946 | struct rq *rq; | 956 | struct rq *rq; |
947 | 957 | ||
948 | for (;;) { | 958 | for (;;) { |
949 | local_irq_save(*flags); | 959 | raw_spin_lock_irqsave(&p->pi_lock, *flags); |
950 | rq = task_rq(p); | 960 | rq = task_rq(p); |
951 | raw_spin_lock(&rq->lock); | 961 | raw_spin_lock(&rq->lock); |
952 | if (likely(rq == task_rq(p))) | 962 | if (likely(rq == task_rq(p))) |
953 | return rq; | 963 | return rq; |
954 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 964 | raw_spin_unlock(&rq->lock); |
965 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
955 | } | 966 | } |
956 | } | 967 | } |
957 | 968 | ||
@@ -961,10 +972,13 @@ static void __task_rq_unlock(struct rq *rq) | |||
961 | raw_spin_unlock(&rq->lock); | 972 | raw_spin_unlock(&rq->lock); |
962 | } | 973 | } |
963 | 974 | ||
964 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 975 | static inline void |
976 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
965 | __releases(rq->lock) | 977 | __releases(rq->lock) |
978 | __releases(p->pi_lock) | ||
966 | { | 979 | { |
967 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 980 | raw_spin_unlock(&rq->lock); |
981 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
968 | } | 982 | } |
969 | 983 | ||
970 | /* | 984 | /* |
@@ -1773,7 +1787,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1773 | update_rq_clock(rq); | 1787 | update_rq_clock(rq); |
1774 | sched_info_queued(p); | 1788 | sched_info_queued(p); |
1775 | p->sched_class->enqueue_task(rq, p, flags); | 1789 | p->sched_class->enqueue_task(rq, p, flags); |
1776 | p->se.on_rq = 1; | ||
1777 | } | 1790 | } |
1778 | 1791 | ||
1779 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | 1792 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
@@ -1781,7 +1794,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1781 | update_rq_clock(rq); | 1794 | update_rq_clock(rq); |
1782 | sched_info_dequeued(p); | 1795 | sched_info_dequeued(p); |
1783 | p->sched_class->dequeue_task(rq, p, flags); | 1796 | p->sched_class->dequeue_task(rq, p, flags); |
1784 | p->se.on_rq = 0; | ||
1785 | } | 1797 | } |
1786 | 1798 | ||
1787 | /* | 1799 | /* |
@@ -2116,7 +2128,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
2116 | * A queue event has occurred, and we're going to schedule. In | 2128 | * A queue event has occurred, and we're going to schedule. In |
2117 | * this case, we can save a useless back to back clock update. | 2129 | * this case, we can save a useless back to back clock update. |
2118 | */ | 2130 | */ |
2119 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | 2131 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) |
2120 | rq->skip_clock_update = 1; | 2132 | rq->skip_clock_update = 1; |
2121 | } | 2133 | } |
2122 | 2134 | ||
@@ -2162,6 +2174,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2162 | */ | 2174 | */ |
2163 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | 2175 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && |
2164 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2176 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2177 | |||
2178 | #ifdef CONFIG_LOCKDEP | ||
2179 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | ||
2180 | lockdep_is_held(&task_rq(p)->lock))); | ||
2181 | #endif | ||
2165 | #endif | 2182 | #endif |
2166 | 2183 | ||
2167 | trace_sched_migrate_task(p, new_cpu); | 2184 | trace_sched_migrate_task(p, new_cpu); |
@@ -2185,13 +2202,15 @@ static int migration_cpu_stop(void *data); | |||
2185 | * The task's runqueue lock must be held. | 2202 | * The task's runqueue lock must be held. |
2186 | * Returns true if you have to wait for migration thread. | 2203 | * Returns true if you have to wait for migration thread. |
2187 | */ | 2204 | */ |
2188 | static bool migrate_task(struct task_struct *p, struct rq *rq) | 2205 | static bool need_migrate_task(struct task_struct *p) |
2189 | { | 2206 | { |
2190 | /* | 2207 | /* |
2191 | * If the task is not on a runqueue (and not running), then | 2208 | * If the task is not on a runqueue (and not running), then |
2192 | * the next wake-up will properly place the task. | 2209 | * the next wake-up will properly place the task. |
2193 | */ | 2210 | */ |
2194 | return p->se.on_rq || task_running(rq, p); | 2211 | bool running = p->on_rq || p->on_cpu; |
2212 | smp_rmb(); /* finish_lock_switch() */ | ||
2213 | return running; | ||
2195 | } | 2214 | } |
2196 | 2215 | ||
2197 | /* | 2216 | /* |
@@ -2251,11 +2270,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2251 | rq = task_rq_lock(p, &flags); | 2270 | rq = task_rq_lock(p, &flags); |
2252 | trace_sched_wait_task(p); | 2271 | trace_sched_wait_task(p); |
2253 | running = task_running(rq, p); | 2272 | running = task_running(rq, p); |
2254 | on_rq = p->se.on_rq; | 2273 | on_rq = p->on_rq; |
2255 | ncsw = 0; | 2274 | ncsw = 0; |
2256 | if (!match_state || p->state == match_state) | 2275 | if (!match_state || p->state == match_state) |
2257 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | 2276 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
2258 | task_rq_unlock(rq, &flags); | 2277 | task_rq_unlock(rq, p, &flags); |
2259 | 2278 | ||
2260 | /* | 2279 | /* |
2261 | * If it changed from the expected state, bail out now. | 2280 | * If it changed from the expected state, bail out now. |
@@ -2330,7 +2349,7 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
2330 | 2349 | ||
2331 | #ifdef CONFIG_SMP | 2350 | #ifdef CONFIG_SMP |
2332 | /* | 2351 | /* |
2333 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2352 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
2334 | */ | 2353 | */ |
2335 | static int select_fallback_rq(int cpu, struct task_struct *p) | 2354 | static int select_fallback_rq(int cpu, struct task_struct *p) |
2336 | { | 2355 | { |
@@ -2363,12 +2382,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2363 | } | 2382 | } |
2364 | 2383 | ||
2365 | /* | 2384 | /* |
2366 | * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. | 2385 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. |
2367 | */ | 2386 | */ |
2368 | static inline | 2387 | static inline |
2369 | int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) | 2388 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2370 | { | 2389 | { |
2371 | int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); | 2390 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2372 | 2391 | ||
2373 | /* | 2392 | /* |
2374 | * In order not to call set_task_cpu() on a blocking task we need | 2393 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -2394,27 +2413,60 @@ static void update_avg(u64 *avg, u64 sample) | |||
2394 | } | 2413 | } |
2395 | #endif | 2414 | #endif |
2396 | 2415 | ||
2397 | static inline void ttwu_activate(struct task_struct *p, struct rq *rq, | 2416 | static void |
2398 | bool is_sync, bool is_migrate, bool is_local, | 2417 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
2399 | unsigned long en_flags) | ||
2400 | { | 2418 | { |
2419 | #ifdef CONFIG_SCHEDSTATS | ||
2420 | struct rq *rq = this_rq(); | ||
2421 | |||
2422 | #ifdef CONFIG_SMP | ||
2423 | int this_cpu = smp_processor_id(); | ||
2424 | |||
2425 | if (cpu == this_cpu) { | ||
2426 | schedstat_inc(rq, ttwu_local); | ||
2427 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2428 | } else { | ||
2429 | struct sched_domain *sd; | ||
2430 | |||
2431 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2432 | for_each_domain(this_cpu, sd) { | ||
2433 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2434 | schedstat_inc(sd, ttwu_wake_remote); | ||
2435 | break; | ||
2436 | } | ||
2437 | } | ||
2438 | } | ||
2439 | #endif /* CONFIG_SMP */ | ||
2440 | |||
2441 | schedstat_inc(rq, ttwu_count); | ||
2401 | schedstat_inc(p, se.statistics.nr_wakeups); | 2442 | schedstat_inc(p, se.statistics.nr_wakeups); |
2402 | if (is_sync) | 2443 | |
2444 | if (wake_flags & WF_SYNC) | ||
2403 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2445 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
2404 | if (is_migrate) | 2446 | |
2447 | if (cpu != task_cpu(p)) | ||
2405 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | 2448 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); |
2406 | if (is_local) | ||
2407 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2408 | else | ||
2409 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2410 | 2449 | ||
2450 | #endif /* CONFIG_SCHEDSTATS */ | ||
2451 | } | ||
2452 | |||
2453 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | ||
2454 | { | ||
2411 | activate_task(rq, p, en_flags); | 2455 | activate_task(rq, p, en_flags); |
2456 | p->on_rq = 1; | ||
2457 | |||
2458 | /* if a worker is waking up, notify workqueue */ | ||
2459 | if (p->flags & PF_WQ_WORKER) | ||
2460 | wq_worker_waking_up(p, cpu_of(rq)); | ||
2412 | } | 2461 | } |
2413 | 2462 | ||
2414 | static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | 2463 | /* |
2415 | int wake_flags, bool success) | 2464 | * Mark the task runnable and perform wakeup-preemption. |
2465 | */ | ||
2466 | static void | ||
2467 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2416 | { | 2468 | { |
2417 | trace_sched_wakeup(p, success); | 2469 | trace_sched_wakeup(p, true); |
2418 | check_preempt_curr(rq, p, wake_flags); | 2470 | check_preempt_curr(rq, p, wake_flags); |
2419 | 2471 | ||
2420 | p->state = TASK_RUNNING; | 2472 | p->state = TASK_RUNNING; |
@@ -2433,9 +2485,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2433 | rq->idle_stamp = 0; | 2485 | rq->idle_stamp = 0; |
2434 | } | 2486 | } |
2435 | #endif | 2487 | #endif |
2436 | /* if a worker is waking up, notify workqueue */ | 2488 | } |
2437 | if ((p->flags & PF_WQ_WORKER) && success) | 2489 | |
2438 | wq_worker_waking_up(p, cpu_of(rq)); | 2490 | static void |
2491 | ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2492 | { | ||
2493 | #ifdef CONFIG_SMP | ||
2494 | if (p->sched_contributes_to_load) | ||
2495 | rq->nr_uninterruptible--; | ||
2496 | #endif | ||
2497 | |||
2498 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); | ||
2499 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2500 | } | ||
2501 | |||
2502 | /* | ||
2503 | * Called in case the task @p isn't fully descheduled from its runqueue, | ||
2504 | * in this case we must do a remote wakeup. Its a 'light' wakeup though, | ||
2505 | * since all we need to do is flip p->state to TASK_RUNNING, since | ||
2506 | * the task is still ->on_rq. | ||
2507 | */ | ||
2508 | static int ttwu_remote(struct task_struct *p, int wake_flags) | ||
2509 | { | ||
2510 | struct rq *rq; | ||
2511 | int ret = 0; | ||
2512 | |||
2513 | rq = __task_rq_lock(p); | ||
2514 | if (p->on_rq) { | ||
2515 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2516 | ret = 1; | ||
2517 | } | ||
2518 | __task_rq_unlock(rq); | ||
2519 | |||
2520 | return ret; | ||
2521 | } | ||
2522 | |||
2523 | #ifdef CONFIG_SMP | ||
2524 | static void sched_ttwu_pending(void) | ||
2525 | { | ||
2526 | struct rq *rq = this_rq(); | ||
2527 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2528 | |||
2529 | if (!list) | ||
2530 | return; | ||
2531 | |||
2532 | raw_spin_lock(&rq->lock); | ||
2533 | |||
2534 | while (list) { | ||
2535 | struct task_struct *p = list; | ||
2536 | list = list->wake_entry; | ||
2537 | ttwu_do_activate(rq, p, 0); | ||
2538 | } | ||
2539 | |||
2540 | raw_spin_unlock(&rq->lock); | ||
2541 | } | ||
2542 | |||
2543 | void scheduler_ipi(void) | ||
2544 | { | ||
2545 | sched_ttwu_pending(); | ||
2546 | } | ||
2547 | |||
2548 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | ||
2549 | { | ||
2550 | struct rq *rq = cpu_rq(cpu); | ||
2551 | struct task_struct *next = rq->wake_list; | ||
2552 | |||
2553 | for (;;) { | ||
2554 | struct task_struct *old = next; | ||
2555 | |||
2556 | p->wake_entry = next; | ||
2557 | next = cmpxchg(&rq->wake_list, old, p); | ||
2558 | if (next == old) | ||
2559 | break; | ||
2560 | } | ||
2561 | |||
2562 | if (!next) | ||
2563 | smp_send_reschedule(cpu); | ||
2564 | } | ||
2565 | #endif | ||
2566 | |||
2567 | static void ttwu_queue(struct task_struct *p, int cpu) | ||
2568 | { | ||
2569 | struct rq *rq = cpu_rq(cpu); | ||
2570 | |||
2571 | #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) | ||
2572 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | ||
2573 | ttwu_queue_remote(p, cpu); | ||
2574 | return; | ||
2575 | } | ||
2576 | #endif | ||
2577 | |||
2578 | raw_spin_lock(&rq->lock); | ||
2579 | ttwu_do_activate(rq, p, 0); | ||
2580 | raw_spin_unlock(&rq->lock); | ||
2439 | } | 2581 | } |
2440 | 2582 | ||
2441 | /** | 2583 | /** |
@@ -2453,92 +2595,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2453 | * Returns %true if @p was woken up, %false if it was already running | 2595 | * Returns %true if @p was woken up, %false if it was already running |
2454 | * or @state didn't match @p's state. | 2596 | * or @state didn't match @p's state. |
2455 | */ | 2597 | */ |
2456 | static int try_to_wake_up(struct task_struct *p, unsigned int state, | 2598 | static int |
2457 | int wake_flags) | 2599 | try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
2458 | { | 2600 | { |
2459 | int cpu, orig_cpu, this_cpu, success = 0; | ||
2460 | unsigned long flags; | 2601 | unsigned long flags; |
2461 | unsigned long en_flags = ENQUEUE_WAKEUP; | 2602 | int cpu, success = 0; |
2462 | struct rq *rq; | ||
2463 | |||
2464 | this_cpu = get_cpu(); | ||
2465 | 2603 | ||
2466 | smp_wmb(); | 2604 | smp_wmb(); |
2467 | rq = task_rq_lock(p, &flags); | 2605 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2468 | if (!(p->state & state)) | 2606 | if (!(p->state & state)) |
2469 | goto out; | 2607 | goto out; |
2470 | 2608 | ||
2471 | if (p->se.on_rq) | 2609 | success = 1; /* we're going to change ->state */ |
2472 | goto out_running; | ||
2473 | |||
2474 | cpu = task_cpu(p); | 2610 | cpu = task_cpu(p); |
2475 | orig_cpu = cpu; | ||
2476 | 2611 | ||
2477 | #ifdef CONFIG_SMP | 2612 | if (p->on_rq && ttwu_remote(p, wake_flags)) |
2478 | if (unlikely(task_running(rq, p))) | 2613 | goto stat; |
2479 | goto out_activate; | ||
2480 | 2614 | ||
2615 | #ifdef CONFIG_SMP | ||
2481 | /* | 2616 | /* |
2482 | * In order to handle concurrent wakeups and release the rq->lock | 2617 | * If the owning (remote) cpu is still in the middle of schedule() with |
2483 | * we put the task in TASK_WAKING state. | 2618 | * this task as prev, wait until its done referencing the task. |
2484 | * | ||
2485 | * First fix up the nr_uninterruptible count: | ||
2486 | */ | 2619 | */ |
2487 | if (task_contributes_to_load(p)) { | 2620 | while (p->on_cpu) { |
2488 | if (likely(cpu_online(orig_cpu))) | 2621 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2489 | rq->nr_uninterruptible--; | 2622 | /* |
2490 | else | 2623 | * If called from interrupt context we could have landed in the |
2491 | this_rq()->nr_uninterruptible--; | 2624 | * middle of schedule(), in this case we should take care not |
2492 | } | 2625 | * to spin on ->on_cpu if p is current, since that would |
2493 | p->state = TASK_WAKING; | 2626 | * deadlock. |
2494 | 2627 | */ | |
2495 | if (p->sched_class->task_waking) { | 2628 | if (p == current) { |
2496 | p->sched_class->task_waking(rq, p); | 2629 | ttwu_queue(p, cpu); |
2497 | en_flags |= ENQUEUE_WAKING; | 2630 | goto stat; |
2631 | } | ||
2632 | #endif | ||
2633 | cpu_relax(); | ||
2498 | } | 2634 | } |
2499 | |||
2500 | cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags); | ||
2501 | if (cpu != orig_cpu) | ||
2502 | set_task_cpu(p, cpu); | ||
2503 | __task_rq_unlock(rq); | ||
2504 | |||
2505 | rq = cpu_rq(cpu); | ||
2506 | raw_spin_lock(&rq->lock); | ||
2507 | |||
2508 | /* | 2635 | /* |
2509 | * We migrated the task without holding either rq->lock, however | 2636 | * Pairs with the smp_wmb() in finish_lock_switch(). |
2510 | * since the task is not on the task list itself, nobody else | ||
2511 | * will try and migrate the task, hence the rq should match the | ||
2512 | * cpu we just moved it to. | ||
2513 | */ | 2637 | */ |
2514 | WARN_ON(task_cpu(p) != cpu); | 2638 | smp_rmb(); |
2515 | WARN_ON(p->state != TASK_WAKING); | ||
2516 | 2639 | ||
2517 | #ifdef CONFIG_SCHEDSTATS | 2640 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
2518 | schedstat_inc(rq, ttwu_count); | 2641 | p->state = TASK_WAKING; |
2519 | if (cpu == this_cpu) | 2642 | |
2520 | schedstat_inc(rq, ttwu_local); | 2643 | if (p->sched_class->task_waking) |
2521 | else { | 2644 | p->sched_class->task_waking(p); |
2522 | struct sched_domain *sd; | ||
2523 | for_each_domain(this_cpu, sd) { | ||
2524 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2525 | schedstat_inc(sd, ttwu_wake_remote); | ||
2526 | break; | ||
2527 | } | ||
2528 | } | ||
2529 | } | ||
2530 | #endif /* CONFIG_SCHEDSTATS */ | ||
2531 | 2645 | ||
2532 | out_activate: | 2646 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2647 | if (task_cpu(p) != cpu) | ||
2648 | set_task_cpu(p, cpu); | ||
2533 | #endif /* CONFIG_SMP */ | 2649 | #endif /* CONFIG_SMP */ |
2534 | ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, | 2650 | |
2535 | cpu == this_cpu, en_flags); | 2651 | ttwu_queue(p, cpu); |
2536 | success = 1; | 2652 | stat: |
2537 | out_running: | 2653 | ttwu_stat(p, cpu, wake_flags); |
2538 | ttwu_post_activation(p, rq, wake_flags, success); | ||
2539 | out: | 2654 | out: |
2540 | task_rq_unlock(rq, &flags); | 2655 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2541 | put_cpu(); | ||
2542 | 2656 | ||
2543 | return success; | 2657 | return success; |
2544 | } | 2658 | } |
@@ -2547,31 +2661,34 @@ out: | |||
2547 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2661 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2548 | * @p: the thread to be awakened | 2662 | * @p: the thread to be awakened |
2549 | * | 2663 | * |
2550 | * Put @p on the run-queue if it's not already there. The caller must | 2664 | * Put @p on the run-queue if it's not already there. The caller must |
2551 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2665 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2552 | * the current task. this_rq() stays locked over invocation. | 2666 | * the current task. |
2553 | */ | 2667 | */ |
2554 | static void try_to_wake_up_local(struct task_struct *p) | 2668 | static void try_to_wake_up_local(struct task_struct *p) |
2555 | { | 2669 | { |
2556 | struct rq *rq = task_rq(p); | 2670 | struct rq *rq = task_rq(p); |
2557 | bool success = false; | ||
2558 | 2671 | ||
2559 | BUG_ON(rq != this_rq()); | 2672 | BUG_ON(rq != this_rq()); |
2560 | BUG_ON(p == current); | 2673 | BUG_ON(p == current); |
2561 | lockdep_assert_held(&rq->lock); | 2674 | lockdep_assert_held(&rq->lock); |
2562 | 2675 | ||
2676 | if (!raw_spin_trylock(&p->pi_lock)) { | ||
2677 | raw_spin_unlock(&rq->lock); | ||
2678 | raw_spin_lock(&p->pi_lock); | ||
2679 | raw_spin_lock(&rq->lock); | ||
2680 | } | ||
2681 | |||
2563 | if (!(p->state & TASK_NORMAL)) | 2682 | if (!(p->state & TASK_NORMAL)) |
2564 | return; | 2683 | goto out; |
2565 | 2684 | ||
2566 | if (!p->se.on_rq) { | 2685 | if (!p->on_rq) |
2567 | if (likely(!task_running(rq, p))) { | 2686 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
2568 | schedstat_inc(rq, ttwu_count); | 2687 | |
2569 | schedstat_inc(rq, ttwu_local); | 2688 | ttwu_do_wakeup(rq, p, 0); |
2570 | } | 2689 | ttwu_stat(p, smp_processor_id(), 0); |
2571 | ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); | 2690 | out: |
2572 | success = true; | 2691 | raw_spin_unlock(&p->pi_lock); |
2573 | } | ||
2574 | ttwu_post_activation(p, rq, 0, success); | ||
2575 | } | 2692 | } |
2576 | 2693 | ||
2577 | /** | 2694 | /** |
@@ -2604,19 +2721,21 @@ int wake_up_state(struct task_struct *p, unsigned int state) | |||
2604 | */ | 2721 | */ |
2605 | static void __sched_fork(struct task_struct *p) | 2722 | static void __sched_fork(struct task_struct *p) |
2606 | { | 2723 | { |
2724 | p->on_rq = 0; | ||
2725 | |||
2726 | p->se.on_rq = 0; | ||
2607 | p->se.exec_start = 0; | 2727 | p->se.exec_start = 0; |
2608 | p->se.sum_exec_runtime = 0; | 2728 | p->se.sum_exec_runtime = 0; |
2609 | p->se.prev_sum_exec_runtime = 0; | 2729 | p->se.prev_sum_exec_runtime = 0; |
2610 | p->se.nr_migrations = 0; | 2730 | p->se.nr_migrations = 0; |
2611 | p->se.vruntime = 0; | 2731 | p->se.vruntime = 0; |
2732 | INIT_LIST_HEAD(&p->se.group_node); | ||
2612 | 2733 | ||
2613 | #ifdef CONFIG_SCHEDSTATS | 2734 | #ifdef CONFIG_SCHEDSTATS |
2614 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2735 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
2615 | #endif | 2736 | #endif |
2616 | 2737 | ||
2617 | INIT_LIST_HEAD(&p->rt.run_list); | 2738 | INIT_LIST_HEAD(&p->rt.run_list); |
2618 | p->se.on_rq = 0; | ||
2619 | INIT_LIST_HEAD(&p->se.group_node); | ||
2620 | 2739 | ||
2621 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2740 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2622 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2741 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
@@ -2628,6 +2747,7 @@ static void __sched_fork(struct task_struct *p) | |||
2628 | */ | 2747 | */ |
2629 | void sched_fork(struct task_struct *p, int clone_flags) | 2748 | void sched_fork(struct task_struct *p, int clone_flags) |
2630 | { | 2749 | { |
2750 | unsigned long flags; | ||
2631 | int cpu = get_cpu(); | 2751 | int cpu = get_cpu(); |
2632 | 2752 | ||
2633 | __sched_fork(p); | 2753 | __sched_fork(p); |
@@ -2678,16 +2798,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2678 | * | 2798 | * |
2679 | * Silence PROVE_RCU. | 2799 | * Silence PROVE_RCU. |
2680 | */ | 2800 | */ |
2681 | rcu_read_lock(); | 2801 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2682 | set_task_cpu(p, cpu); | 2802 | set_task_cpu(p, cpu); |
2683 | rcu_read_unlock(); | 2803 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2684 | 2804 | ||
2685 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2805 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2686 | if (likely(sched_info_on())) | 2806 | if (likely(sched_info_on())) |
2687 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2807 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
2688 | #endif | 2808 | #endif |
2689 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 2809 | #if defined(CONFIG_SMP) |
2690 | p->oncpu = 0; | 2810 | p->on_cpu = 0; |
2691 | #endif | 2811 | #endif |
2692 | #ifdef CONFIG_PREEMPT | 2812 | #ifdef CONFIG_PREEMPT |
2693 | /* Want to start with kernel preemption disabled. */ | 2813 | /* Want to start with kernel preemption disabled. */ |
@@ -2711,37 +2831,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2711 | { | 2831 | { |
2712 | unsigned long flags; | 2832 | unsigned long flags; |
2713 | struct rq *rq; | 2833 | struct rq *rq; |
2714 | int cpu __maybe_unused = get_cpu(); | ||
2715 | 2834 | ||
2835 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
2716 | #ifdef CONFIG_SMP | 2836 | #ifdef CONFIG_SMP |
2717 | rq = task_rq_lock(p, &flags); | ||
2718 | p->state = TASK_WAKING; | ||
2719 | |||
2720 | /* | 2837 | /* |
2721 | * Fork balancing, do it here and not earlier because: | 2838 | * Fork balancing, do it here and not earlier because: |
2722 | * - cpus_allowed can change in the fork path | 2839 | * - cpus_allowed can change in the fork path |
2723 | * - any previously selected cpu might disappear through hotplug | 2840 | * - any previously selected cpu might disappear through hotplug |
2724 | * | ||
2725 | * We set TASK_WAKING so that select_task_rq() can drop rq->lock | ||
2726 | * without people poking at ->cpus_allowed. | ||
2727 | */ | 2841 | */ |
2728 | cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); | 2842 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); |
2729 | set_task_cpu(p, cpu); | ||
2730 | |||
2731 | p->state = TASK_RUNNING; | ||
2732 | task_rq_unlock(rq, &flags); | ||
2733 | #endif | 2843 | #endif |
2734 | 2844 | ||
2735 | rq = task_rq_lock(p, &flags); | 2845 | rq = __task_rq_lock(p); |
2736 | activate_task(rq, p, 0); | 2846 | activate_task(rq, p, 0); |
2737 | trace_sched_wakeup_new(p, 1); | 2847 | p->on_rq = 1; |
2848 | trace_sched_wakeup_new(p, true); | ||
2738 | check_preempt_curr(rq, p, WF_FORK); | 2849 | check_preempt_curr(rq, p, WF_FORK); |
2739 | #ifdef CONFIG_SMP | 2850 | #ifdef CONFIG_SMP |
2740 | if (p->sched_class->task_woken) | 2851 | if (p->sched_class->task_woken) |
2741 | p->sched_class->task_woken(rq, p); | 2852 | p->sched_class->task_woken(rq, p); |
2742 | #endif | 2853 | #endif |
2743 | task_rq_unlock(rq, &flags); | 2854 | task_rq_unlock(rq, p, &flags); |
2744 | put_cpu(); | ||
2745 | } | 2855 | } |
2746 | 2856 | ||
2747 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2857 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -3450,27 +3560,22 @@ void sched_exec(void) | |||
3450 | { | 3560 | { |
3451 | struct task_struct *p = current; | 3561 | struct task_struct *p = current; |
3452 | unsigned long flags; | 3562 | unsigned long flags; |
3453 | struct rq *rq; | ||
3454 | int dest_cpu; | 3563 | int dest_cpu; |
3455 | 3564 | ||
3456 | rq = task_rq_lock(p, &flags); | 3565 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
3457 | dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); | 3566 | dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); |
3458 | if (dest_cpu == smp_processor_id()) | 3567 | if (dest_cpu == smp_processor_id()) |
3459 | goto unlock; | 3568 | goto unlock; |
3460 | 3569 | ||
3461 | /* | 3570 | if (likely(cpu_active(dest_cpu))) { |
3462 | * select_task_rq() can race against ->cpus_allowed | ||
3463 | */ | ||
3464 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | ||
3465 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { | ||
3466 | struct migration_arg arg = { p, dest_cpu }; | 3571 | struct migration_arg arg = { p, dest_cpu }; |
3467 | 3572 | ||
3468 | task_rq_unlock(rq, &flags); | 3573 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3469 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 3574 | stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); |
3470 | return; | 3575 | return; |
3471 | } | 3576 | } |
3472 | unlock: | 3577 | unlock: |
3473 | task_rq_unlock(rq, &flags); | 3578 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3474 | } | 3579 | } |
3475 | 3580 | ||
3476 | #endif | 3581 | #endif |
@@ -3507,7 +3612,7 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
3507 | 3612 | ||
3508 | rq = task_rq_lock(p, &flags); | 3613 | rq = task_rq_lock(p, &flags); |
3509 | ns = do_task_delta_exec(p, rq); | 3614 | ns = do_task_delta_exec(p, rq); |
3510 | task_rq_unlock(rq, &flags); | 3615 | task_rq_unlock(rq, p, &flags); |
3511 | 3616 | ||
3512 | return ns; | 3617 | return ns; |
3513 | } | 3618 | } |
@@ -3525,7 +3630,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
3525 | 3630 | ||
3526 | rq = task_rq_lock(p, &flags); | 3631 | rq = task_rq_lock(p, &flags); |
3527 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 3632 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); |
3528 | task_rq_unlock(rq, &flags); | 3633 | task_rq_unlock(rq, p, &flags); |
3529 | 3634 | ||
3530 | return ns; | 3635 | return ns; |
3531 | } | 3636 | } |
@@ -3549,7 +3654,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p) | |||
3549 | rq = task_rq_lock(p, &flags); | 3654 | rq = task_rq_lock(p, &flags); |
3550 | thread_group_cputime(p, &totals); | 3655 | thread_group_cputime(p, &totals); |
3551 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | 3656 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); |
3552 | task_rq_unlock(rq, &flags); | 3657 | task_rq_unlock(rq, p, &flags); |
3553 | 3658 | ||
3554 | return ns; | 3659 | return ns; |
3555 | } | 3660 | } |
@@ -4035,7 +4140,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4035 | 4140 | ||
4036 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 4141 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
4037 | { | 4142 | { |
4038 | if (prev->se.on_rq) | 4143 | if (prev->on_rq) |
4039 | update_rq_clock(rq); | 4144 | update_rq_clock(rq); |
4040 | prev->sched_class->put_prev_task(rq, prev); | 4145 | prev->sched_class->put_prev_task(rq, prev); |
4041 | } | 4146 | } |
@@ -4097,11 +4202,13 @@ need_resched: | |||
4097 | if (unlikely(signal_pending_state(prev->state, prev))) { | 4202 | if (unlikely(signal_pending_state(prev->state, prev))) { |
4098 | prev->state = TASK_RUNNING; | 4203 | prev->state = TASK_RUNNING; |
4099 | } else { | 4204 | } else { |
4205 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4206 | prev->on_rq = 0; | ||
4207 | |||
4100 | /* | 4208 | /* |
4101 | * If a worker is going to sleep, notify and | 4209 | * If a worker went to sleep, notify and ask workqueue |
4102 | * ask workqueue whether it wants to wake up a | 4210 | * whether it wants to wake up a task to maintain |
4103 | * task to maintain concurrency. If so, wake | 4211 | * concurrency. |
4104 | * up the task. | ||
4105 | */ | 4212 | */ |
4106 | if (prev->flags & PF_WQ_WORKER) { | 4213 | if (prev->flags & PF_WQ_WORKER) { |
4107 | struct task_struct *to_wakeup; | 4214 | struct task_struct *to_wakeup; |
@@ -4110,11 +4217,10 @@ need_resched: | |||
4110 | if (to_wakeup) | 4217 | if (to_wakeup) |
4111 | try_to_wake_up_local(to_wakeup); | 4218 | try_to_wake_up_local(to_wakeup); |
4112 | } | 4219 | } |
4113 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4114 | 4220 | ||
4115 | /* | 4221 | /* |
4116 | * If we are going to sleep and we have plugged IO queued, make | 4222 | * If we are going to sleep and we have plugged IO |
4117 | * sure to submit it to avoid deadlocks. | 4223 | * queued, make sure to submit it to avoid deadlocks. |
4118 | */ | 4224 | */ |
4119 | if (blk_needs_flush_plug(prev)) { | 4225 | if (blk_needs_flush_plug(prev)) { |
4120 | raw_spin_unlock(&rq->lock); | 4226 | raw_spin_unlock(&rq->lock); |
@@ -4161,70 +4267,53 @@ need_resched: | |||
4161 | EXPORT_SYMBOL(schedule); | 4267 | EXPORT_SYMBOL(schedule); |
4162 | 4268 | ||
4163 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4269 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4164 | /* | ||
4165 | * Look out! "owner" is an entirely speculative pointer | ||
4166 | * access and not reliable. | ||
4167 | */ | ||
4168 | int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | ||
4169 | { | ||
4170 | unsigned int cpu; | ||
4171 | struct rq *rq; | ||
4172 | 4270 | ||
4173 | if (!sched_feat(OWNER_SPIN)) | 4271 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
4174 | return 0; | 4272 | { |
4273 | bool ret = false; | ||
4175 | 4274 | ||
4176 | #ifdef CONFIG_DEBUG_PAGEALLOC | 4275 | rcu_read_lock(); |
4177 | /* | 4276 | if (lock->owner != owner) |
4178 | * Need to access the cpu field knowing that | 4277 | goto fail; |
4179 | * DEBUG_PAGEALLOC could have unmapped it if | ||
4180 | * the mutex owner just released it and exited. | ||
4181 | */ | ||
4182 | if (probe_kernel_address(&owner->cpu, cpu)) | ||
4183 | return 0; | ||
4184 | #else | ||
4185 | cpu = owner->cpu; | ||
4186 | #endif | ||
4187 | 4278 | ||
4188 | /* | 4279 | /* |
4189 | * Even if the access succeeded (likely case), | 4280 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
4190 | * the cpu field may no longer be valid. | 4281 | * lock->owner still matches owner, if that fails, owner might |
4282 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
4283 | * ensures the memory stays valid. | ||
4191 | */ | 4284 | */ |
4192 | if (cpu >= nr_cpumask_bits) | 4285 | barrier(); |
4193 | return 0; | ||
4194 | 4286 | ||
4195 | /* | 4287 | ret = owner->on_cpu; |
4196 | * We need to validate that we can do a | 4288 | fail: |
4197 | * get_cpu() and that we have the percpu area. | 4289 | rcu_read_unlock(); |
4198 | */ | ||
4199 | if (!cpu_online(cpu)) | ||
4200 | return 0; | ||
4201 | 4290 | ||
4202 | rq = cpu_rq(cpu); | 4291 | return ret; |
4292 | } | ||
4203 | 4293 | ||
4204 | for (;;) { | 4294 | /* |
4205 | /* | 4295 | * Look out! "owner" is an entirely speculative pointer |
4206 | * Owner changed, break to re-assess state. | 4296 | * access and not reliable. |
4207 | */ | 4297 | */ |
4208 | if (lock->owner != owner) { | 4298 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) |
4209 | /* | 4299 | { |
4210 | * If the lock has switched to a different owner, | 4300 | if (!sched_feat(OWNER_SPIN)) |
4211 | * we likely have heavy contention. Return 0 to quit | 4301 | return 0; |
4212 | * optimistic spinning and not contend further: | ||
4213 | */ | ||
4214 | if (lock->owner) | ||
4215 | return 0; | ||
4216 | break; | ||
4217 | } | ||
4218 | 4302 | ||
4219 | /* | 4303 | while (owner_running(lock, owner)) { |
4220 | * Is that owner really running on that cpu? | 4304 | if (need_resched()) |
4221 | */ | ||
4222 | if (task_thread_info(rq->curr) != owner || need_resched()) | ||
4223 | return 0; | 4305 | return 0; |
4224 | 4306 | ||
4225 | arch_mutex_cpu_relax(); | 4307 | arch_mutex_cpu_relax(); |
4226 | } | 4308 | } |
4227 | 4309 | ||
4310 | /* | ||
4311 | * If the owner changed to another task there is likely | ||
4312 | * heavy contention, stop spinning. | ||
4313 | */ | ||
4314 | if (lock->owner) | ||
4315 | return 0; | ||
4316 | |||
4228 | return 1; | 4317 | return 1; |
4229 | } | 4318 | } |
4230 | #endif | 4319 | #endif |
@@ -4684,19 +4773,18 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
4684 | */ | 4773 | */ |
4685 | void rt_mutex_setprio(struct task_struct *p, int prio) | 4774 | void rt_mutex_setprio(struct task_struct *p, int prio) |
4686 | { | 4775 | { |
4687 | unsigned long flags; | ||
4688 | int oldprio, on_rq, running; | 4776 | int oldprio, on_rq, running; |
4689 | struct rq *rq; | 4777 | struct rq *rq; |
4690 | const struct sched_class *prev_class; | 4778 | const struct sched_class *prev_class; |
4691 | 4779 | ||
4692 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 4780 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
4693 | 4781 | ||
4694 | rq = task_rq_lock(p, &flags); | 4782 | rq = __task_rq_lock(p); |
4695 | 4783 | ||
4696 | trace_sched_pi_setprio(p, prio); | 4784 | trace_sched_pi_setprio(p, prio); |
4697 | oldprio = p->prio; | 4785 | oldprio = p->prio; |
4698 | prev_class = p->sched_class; | 4786 | prev_class = p->sched_class; |
4699 | on_rq = p->se.on_rq; | 4787 | on_rq = p->on_rq; |
4700 | running = task_current(rq, p); | 4788 | running = task_current(rq, p); |
4701 | if (on_rq) | 4789 | if (on_rq) |
4702 | dequeue_task(rq, p, 0); | 4790 | dequeue_task(rq, p, 0); |
@@ -4716,7 +4804,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4716 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4804 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4717 | 4805 | ||
4718 | check_class_changed(rq, p, prev_class, oldprio); | 4806 | check_class_changed(rq, p, prev_class, oldprio); |
4719 | task_rq_unlock(rq, &flags); | 4807 | __task_rq_unlock(rq); |
4720 | } | 4808 | } |
4721 | 4809 | ||
4722 | #endif | 4810 | #endif |
@@ -4744,7 +4832,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4744 | p->static_prio = NICE_TO_PRIO(nice); | 4832 | p->static_prio = NICE_TO_PRIO(nice); |
4745 | goto out_unlock; | 4833 | goto out_unlock; |
4746 | } | 4834 | } |
4747 | on_rq = p->se.on_rq; | 4835 | on_rq = p->on_rq; |
4748 | if (on_rq) | 4836 | if (on_rq) |
4749 | dequeue_task(rq, p, 0); | 4837 | dequeue_task(rq, p, 0); |
4750 | 4838 | ||
@@ -4764,7 +4852,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4764 | resched_task(rq->curr); | 4852 | resched_task(rq->curr); |
4765 | } | 4853 | } |
4766 | out_unlock: | 4854 | out_unlock: |
4767 | task_rq_unlock(rq, &flags); | 4855 | task_rq_unlock(rq, p, &flags); |
4768 | } | 4856 | } |
4769 | EXPORT_SYMBOL(set_user_nice); | 4857 | EXPORT_SYMBOL(set_user_nice); |
4770 | 4858 | ||
@@ -4878,8 +4966,6 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
4878 | static void | 4966 | static void |
4879 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 4967 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) |
4880 | { | 4968 | { |
4881 | BUG_ON(p->se.on_rq); | ||
4882 | |||
4883 | p->policy = policy; | 4969 | p->policy = policy; |
4884 | p->rt_priority = prio; | 4970 | p->rt_priority = prio; |
4885 | p->normal_prio = normal_prio(p); | 4971 | p->normal_prio = normal_prio(p); |
@@ -4994,20 +5080,17 @@ recheck: | |||
4994 | /* | 5080 | /* |
4995 | * make sure no PI-waiters arrive (or leave) while we are | 5081 | * make sure no PI-waiters arrive (or leave) while we are |
4996 | * changing the priority of the task: | 5082 | * changing the priority of the task: |
4997 | */ | 5083 | * |
4998 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
4999 | /* | ||
5000 | * To be able to change p->policy safely, the appropriate | 5084 | * To be able to change p->policy safely, the appropriate |
5001 | * runqueue lock must be held. | 5085 | * runqueue lock must be held. |
5002 | */ | 5086 | */ |
5003 | rq = __task_rq_lock(p); | 5087 | rq = task_rq_lock(p, &flags); |
5004 | 5088 | ||
5005 | /* | 5089 | /* |
5006 | * Changing the policy of the stop threads its a very bad idea | 5090 | * Changing the policy of the stop threads its a very bad idea |
5007 | */ | 5091 | */ |
5008 | if (p == rq->stop) { | 5092 | if (p == rq->stop) { |
5009 | __task_rq_unlock(rq); | 5093 | task_rq_unlock(rq, p, &flags); |
5010 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5011 | return -EINVAL; | 5094 | return -EINVAL; |
5012 | } | 5095 | } |
5013 | 5096 | ||
@@ -5031,8 +5114,7 @@ recheck: | |||
5031 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 5114 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5032 | task_group(p)->rt_bandwidth.rt_runtime == 0 && | 5115 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
5033 | !task_group_is_autogroup(task_group(p))) { | 5116 | !task_group_is_autogroup(task_group(p))) { |
5034 | __task_rq_unlock(rq); | 5117 | task_rq_unlock(rq, p, &flags); |
5035 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5036 | return -EPERM; | 5118 | return -EPERM; |
5037 | } | 5119 | } |
5038 | } | 5120 | } |
@@ -5041,11 +5123,10 @@ recheck: | |||
5041 | /* recheck policy now with rq lock held */ | 5123 | /* recheck policy now with rq lock held */ |
5042 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 5124 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
5043 | policy = oldpolicy = -1; | 5125 | policy = oldpolicy = -1; |
5044 | __task_rq_unlock(rq); | 5126 | task_rq_unlock(rq, p, &flags); |
5045 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5046 | goto recheck; | 5127 | goto recheck; |
5047 | } | 5128 | } |
5048 | on_rq = p->se.on_rq; | 5129 | on_rq = p->on_rq; |
5049 | running = task_current(rq, p); | 5130 | running = task_current(rq, p); |
5050 | if (on_rq) | 5131 | if (on_rq) |
5051 | deactivate_task(rq, p, 0); | 5132 | deactivate_task(rq, p, 0); |
@@ -5064,8 +5145,7 @@ recheck: | |||
5064 | activate_task(rq, p, 0); | 5145 | activate_task(rq, p, 0); |
5065 | 5146 | ||
5066 | check_class_changed(rq, p, prev_class, oldprio); | 5147 | check_class_changed(rq, p, prev_class, oldprio); |
5067 | __task_rq_unlock(rq); | 5148 | task_rq_unlock(rq, p, &flags); |
5068 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5069 | 5149 | ||
5070 | rt_mutex_adjust_pi(p); | 5150 | rt_mutex_adjust_pi(p); |
5071 | 5151 | ||
@@ -5316,7 +5396,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5316 | { | 5396 | { |
5317 | struct task_struct *p; | 5397 | struct task_struct *p; |
5318 | unsigned long flags; | 5398 | unsigned long flags; |
5319 | struct rq *rq; | ||
5320 | int retval; | 5399 | int retval; |
5321 | 5400 | ||
5322 | get_online_cpus(); | 5401 | get_online_cpus(); |
@@ -5331,9 +5410,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5331 | if (retval) | 5410 | if (retval) |
5332 | goto out_unlock; | 5411 | goto out_unlock; |
5333 | 5412 | ||
5334 | rq = task_rq_lock(p, &flags); | 5413 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
5335 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 5414 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
5336 | task_rq_unlock(rq, &flags); | 5415 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
5337 | 5416 | ||
5338 | out_unlock: | 5417 | out_unlock: |
5339 | rcu_read_unlock(); | 5418 | rcu_read_unlock(); |
@@ -5658,7 +5737,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
5658 | 5737 | ||
5659 | rq = task_rq_lock(p, &flags); | 5738 | rq = task_rq_lock(p, &flags); |
5660 | time_slice = p->sched_class->get_rr_interval(rq, p); | 5739 | time_slice = p->sched_class->get_rr_interval(rq, p); |
5661 | task_rq_unlock(rq, &flags); | 5740 | task_rq_unlock(rq, p, &flags); |
5662 | 5741 | ||
5663 | rcu_read_unlock(); | 5742 | rcu_read_unlock(); |
5664 | jiffies_to_timespec(time_slice, &t); | 5743 | jiffies_to_timespec(time_slice, &t); |
@@ -5776,8 +5855,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5776 | rcu_read_unlock(); | 5855 | rcu_read_unlock(); |
5777 | 5856 | ||
5778 | rq->curr = rq->idle = idle; | 5857 | rq->curr = rq->idle = idle; |
5779 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5858 | #if defined(CONFIG_SMP) |
5780 | idle->oncpu = 1; | 5859 | idle->on_cpu = 1; |
5781 | #endif | 5860 | #endif |
5782 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5861 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5783 | 5862 | ||
@@ -5881,18 +5960,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5881 | unsigned int dest_cpu; | 5960 | unsigned int dest_cpu; |
5882 | int ret = 0; | 5961 | int ret = 0; |
5883 | 5962 | ||
5884 | /* | ||
5885 | * Serialize against TASK_WAKING so that ttwu() and wunt() can | ||
5886 | * drop the rq->lock and still rely on ->cpus_allowed. | ||
5887 | */ | ||
5888 | again: | ||
5889 | while (task_is_waking(p)) | ||
5890 | cpu_relax(); | ||
5891 | rq = task_rq_lock(p, &flags); | 5963 | rq = task_rq_lock(p, &flags); |
5892 | if (task_is_waking(p)) { | ||
5893 | task_rq_unlock(rq, &flags); | ||
5894 | goto again; | ||
5895 | } | ||
5896 | 5964 | ||
5897 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 5965 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
5898 | ret = -EINVAL; | 5966 | ret = -EINVAL; |
@@ -5917,16 +5985,16 @@ again: | |||
5917 | goto out; | 5985 | goto out; |
5918 | 5986 | ||
5919 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5987 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5920 | if (migrate_task(p, rq)) { | 5988 | if (need_migrate_task(p)) { |
5921 | struct migration_arg arg = { p, dest_cpu }; | 5989 | struct migration_arg arg = { p, dest_cpu }; |
5922 | /* Need help from migration thread: drop lock and wait. */ | 5990 | /* Need help from migration thread: drop lock and wait. */ |
5923 | task_rq_unlock(rq, &flags); | 5991 | task_rq_unlock(rq, p, &flags); |
5924 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 5992 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
5925 | tlb_migrate_finish(p->mm); | 5993 | tlb_migrate_finish(p->mm); |
5926 | return 0; | 5994 | return 0; |
5927 | } | 5995 | } |
5928 | out: | 5996 | out: |
5929 | task_rq_unlock(rq, &flags); | 5997 | task_rq_unlock(rq, p, &flags); |
5930 | 5998 | ||
5931 | return ret; | 5999 | return ret; |
5932 | } | 6000 | } |
@@ -5954,6 +6022,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5954 | rq_src = cpu_rq(src_cpu); | 6022 | rq_src = cpu_rq(src_cpu); |
5955 | rq_dest = cpu_rq(dest_cpu); | 6023 | rq_dest = cpu_rq(dest_cpu); |
5956 | 6024 | ||
6025 | raw_spin_lock(&p->pi_lock); | ||
5957 | double_rq_lock(rq_src, rq_dest); | 6026 | double_rq_lock(rq_src, rq_dest); |
5958 | /* Already moved. */ | 6027 | /* Already moved. */ |
5959 | if (task_cpu(p) != src_cpu) | 6028 | if (task_cpu(p) != src_cpu) |
@@ -5966,7 +6035,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5966 | * If we're not on a rq, the next wake-up will ensure we're | 6035 | * If we're not on a rq, the next wake-up will ensure we're |
5967 | * placed properly. | 6036 | * placed properly. |
5968 | */ | 6037 | */ |
5969 | if (p->se.on_rq) { | 6038 | if (p->on_rq) { |
5970 | deactivate_task(rq_src, p, 0); | 6039 | deactivate_task(rq_src, p, 0); |
5971 | set_task_cpu(p, dest_cpu); | 6040 | set_task_cpu(p, dest_cpu); |
5972 | activate_task(rq_dest, p, 0); | 6041 | activate_task(rq_dest, p, 0); |
@@ -5976,6 +6045,7 @@ done: | |||
5976 | ret = 1; | 6045 | ret = 1; |
5977 | fail: | 6046 | fail: |
5978 | double_rq_unlock(rq_src, rq_dest); | 6047 | double_rq_unlock(rq_src, rq_dest); |
6048 | raw_spin_unlock(&p->pi_lock); | ||
5979 | return ret; | 6049 | return ret; |
5980 | } | 6050 | } |
5981 | 6051 | ||
@@ -6316,6 +6386,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6316 | 6386 | ||
6317 | #ifdef CONFIG_HOTPLUG_CPU | 6387 | #ifdef CONFIG_HOTPLUG_CPU |
6318 | case CPU_DYING: | 6388 | case CPU_DYING: |
6389 | sched_ttwu_pending(); | ||
6319 | /* Update our root-domain */ | 6390 | /* Update our root-domain */ |
6320 | raw_spin_lock_irqsave(&rq->lock, flags); | 6391 | raw_spin_lock_irqsave(&rq->lock, flags); |
6321 | if (rq->rd) { | 6392 | if (rq->rd) { |
@@ -8340,7 +8411,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8340 | int old_prio = p->prio; | 8411 | int old_prio = p->prio; |
8341 | int on_rq; | 8412 | int on_rq; |
8342 | 8413 | ||
8343 | on_rq = p->se.on_rq; | 8414 | on_rq = p->on_rq; |
8344 | if (on_rq) | 8415 | if (on_rq) |
8345 | deactivate_task(rq, p, 0); | 8416 | deactivate_task(rq, p, 0); |
8346 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 8417 | __setscheduler(rq, p, SCHED_NORMAL, 0); |
@@ -8683,7 +8754,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8683 | rq = task_rq_lock(tsk, &flags); | 8754 | rq = task_rq_lock(tsk, &flags); |
8684 | 8755 | ||
8685 | running = task_current(rq, tsk); | 8756 | running = task_current(rq, tsk); |
8686 | on_rq = tsk->se.on_rq; | 8757 | on_rq = tsk->on_rq; |
8687 | 8758 | ||
8688 | if (on_rq) | 8759 | if (on_rq) |
8689 | dequeue_task(rq, tsk, 0); | 8760 | dequeue_task(rq, tsk, 0); |
@@ -8702,7 +8773,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8702 | if (on_rq) | 8773 | if (on_rq) |
8703 | enqueue_task(rq, tsk, 0); | 8774 | enqueue_task(rq, tsk, 0); |
8704 | 8775 | ||
8705 | task_rq_unlock(rq, &flags); | 8776 | task_rq_unlock(rq, tsk, &flags); |
8706 | } | 8777 | } |
8707 | #endif /* CONFIG_CGROUP_SCHED */ | 8778 | #endif /* CONFIG_CGROUP_SCHED */ |
8708 | 8779 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 7bacd83a4158..3669bec6e130 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
152 | read_lock_irqsave(&tasklist_lock, flags); | 152 | read_lock_irqsave(&tasklist_lock, flags); |
153 | 153 | ||
154 | do_each_thread(g, p) { | 154 | do_each_thread(g, p) { |
155 | if (!p->se.on_rq || task_cpu(p) != rq_cpu) | 155 | if (!p->on_rq || task_cpu(p) != rq_cpu) |
156 | continue; | 156 | continue; |
157 | 157 | ||
158 | print_task(m, rq, p); | 158 | print_task(m, rq, p); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6fa833ab2cb8..054cebb81f7b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) | |||
358 | } | 358 | } |
359 | 359 | ||
360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | 360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); |
361 | #ifndef CONFIG_64BIT | ||
362 | smp_wmb(); | ||
363 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
364 | #endif | ||
361 | } | 365 | } |
362 | 366 | ||
363 | /* | 367 | /* |
@@ -1372,12 +1376,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1372 | 1376 | ||
1373 | #ifdef CONFIG_SMP | 1377 | #ifdef CONFIG_SMP |
1374 | 1378 | ||
1375 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1379 | static void task_waking_fair(struct task_struct *p) |
1376 | { | 1380 | { |
1377 | struct sched_entity *se = &p->se; | 1381 | struct sched_entity *se = &p->se; |
1378 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1382 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1383 | u64 min_vruntime; | ||
1379 | 1384 | ||
1380 | se->vruntime -= cfs_rq->min_vruntime; | 1385 | #ifndef CONFIG_64BIT |
1386 | u64 min_vruntime_copy; | ||
1387 | |||
1388 | do { | ||
1389 | min_vruntime_copy = cfs_rq->min_vruntime_copy; | ||
1390 | smp_rmb(); | ||
1391 | min_vruntime = cfs_rq->min_vruntime; | ||
1392 | } while (min_vruntime != min_vruntime_copy); | ||
1393 | #else | ||
1394 | min_vruntime = cfs_rq->min_vruntime; | ||
1395 | #endif | ||
1396 | |||
1397 | se->vruntime -= min_vruntime; | ||
1381 | } | 1398 | } |
1382 | 1399 | ||
1383 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1400 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1657,7 +1674,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1657 | * preempt must be disabled. | 1674 | * preempt must be disabled. |
1658 | */ | 1675 | */ |
1659 | static int | 1676 | static int |
1660 | select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) | 1677 | select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) |
1661 | { | 1678 | { |
1662 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | 1679 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; |
1663 | int cpu = smp_processor_id(); | 1680 | int cpu = smp_processor_id(); |
@@ -1789,10 +1806,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | |||
1789 | * This is especially important for buddies when the leftmost | 1806 | * This is especially important for buddies when the leftmost |
1790 | * task is higher priority than the buddy. | 1807 | * task is higher priority than the buddy. |
1791 | */ | 1808 | */ |
1792 | if (unlikely(se->load.weight != NICE_0_LOAD)) | 1809 | return calc_delta_fair(gran, se); |
1793 | gran = calc_delta_fair(gran, se); | ||
1794 | |||
1795 | return gran; | ||
1796 | } | 1810 | } |
1797 | 1811 | ||
1798 | /* | 1812 | /* |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 68e69acc29b9..be40f7371ee1 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1) | |||
64 | * Decrement CPU power based on irq activity | 64 | * Decrement CPU power based on irq activity |
65 | */ | 65 | */ |
66 | SCHED_FEAT(NONIRQ_POWER, 1) | 66 | SCHED_FEAT(NONIRQ_POWER, 1) |
67 | |||
68 | /* | ||
69 | * Queue remote wakeups on the target CPU and process them | ||
70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | ||
71 | */ | ||
72 | SCHED_FEAT(TTWU_QUEUE, 1) | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index a776a6396427..0a51882534ea 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | static int | 9 | static int |
10 | select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 10 | select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
11 | { | 11 | { |
12 | return task_cpu(p); /* IDLE tasks as never migrated */ | 12 | return task_cpu(p); /* IDLE tasks as never migrated */ |
13 | } | 13 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index e7cebdc65f82..19ecb3127379 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq) | |||
977 | static int find_lowest_rq(struct task_struct *task); | 977 | static int find_lowest_rq(struct task_struct *task); |
978 | 978 | ||
979 | static int | 979 | static int |
980 | select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 980 | select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
981 | { | 981 | { |
982 | struct task_struct *curr; | ||
983 | struct rq *rq; | ||
984 | int cpu; | ||
985 | |||
982 | if (sd_flag != SD_BALANCE_WAKE) | 986 | if (sd_flag != SD_BALANCE_WAKE) |
983 | return smp_processor_id(); | 987 | return smp_processor_id(); |
984 | 988 | ||
989 | cpu = task_cpu(p); | ||
990 | rq = cpu_rq(cpu); | ||
991 | |||
992 | rcu_read_lock(); | ||
993 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | ||
994 | |||
985 | /* | 995 | /* |
986 | * If the current task is an RT task, then | 996 | * If the current task on @p's runqueue is an RT task, then |
987 | * try to see if we can wake this RT task up on another | 997 | * try to see if we can wake this RT task up on another |
988 | * runqueue. Otherwise simply start this RT task | 998 | * runqueue. Otherwise simply start this RT task |
989 | * on its current runqueue. | 999 | * on its current runqueue. |
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | |||
997 | * lock? | 1007 | * lock? |
998 | * | 1008 | * |
999 | * For equal prio tasks, we just let the scheduler sort it out. | 1009 | * For equal prio tasks, we just let the scheduler sort it out. |
1010 | * | ||
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | * | ||
1014 | * This test is optimistic, if we get it wrong the load-balancer | ||
1015 | * will have to sort it out. | ||
1000 | */ | 1016 | */ |
1001 | if (unlikely(rt_task(rq->curr)) && | 1017 | if (curr && unlikely(rt_task(curr)) && |
1002 | (rq->curr->rt.nr_cpus_allowed < 2 || | 1018 | (curr->rt.nr_cpus_allowed < 2 || |
1003 | rq->curr->prio < p->prio) && | 1019 | curr->prio < p->prio) && |
1004 | (p->rt.nr_cpus_allowed > 1)) { | 1020 | (p->rt.nr_cpus_allowed > 1)) { |
1005 | int cpu = find_lowest_rq(p); | 1021 | int target = find_lowest_rq(p); |
1006 | 1022 | ||
1007 | return (cpu == -1) ? task_cpu(p) : cpu; | 1023 | if (target != -1) |
1024 | cpu = target; | ||
1008 | } | 1025 | } |
1026 | rcu_read_unlock(); | ||
1009 | 1027 | ||
1010 | /* | 1028 | return cpu; |
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | */ | ||
1014 | return task_cpu(p); | ||
1015 | } | 1029 | } |
1016 | 1030 | ||
1017 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 1031 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
@@ -1136,7 +1150,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1136 | * The previous task needs to be made eligible for pushing | 1150 | * The previous task needs to be made eligible for pushing |
1137 | * if it is still active | 1151 | * if it is still active |
1138 | */ | 1152 | */ |
1139 | if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) | 1153 | if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) |
1140 | enqueue_pushable_task(rq, p); | 1154 | enqueue_pushable_task(rq, p); |
1141 | } | 1155 | } |
1142 | 1156 | ||
@@ -1287,7 +1301,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1287 | !cpumask_test_cpu(lowest_rq->cpu, | 1301 | !cpumask_test_cpu(lowest_rq->cpu, |
1288 | &task->cpus_allowed) || | 1302 | &task->cpus_allowed) || |
1289 | task_running(rq, task) || | 1303 | task_running(rq, task) || |
1290 | !task->se.on_rq)) { | 1304 | !task->on_rq)) { |
1291 | 1305 | ||
1292 | raw_spin_unlock(&lowest_rq->lock); | 1306 | raw_spin_unlock(&lowest_rq->lock); |
1293 | lowest_rq = NULL; | 1307 | lowest_rq = NULL; |
@@ -1321,7 +1335,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) | |||
1321 | BUG_ON(task_current(rq, p)); | 1335 | BUG_ON(task_current(rq, p)); |
1322 | BUG_ON(p->rt.nr_cpus_allowed <= 1); | 1336 | BUG_ON(p->rt.nr_cpus_allowed <= 1); |
1323 | 1337 | ||
1324 | BUG_ON(!p->se.on_rq); | 1338 | BUG_ON(!p->on_rq); |
1325 | BUG_ON(!rt_task(p)); | 1339 | BUG_ON(!rt_task(p)); |
1326 | 1340 | ||
1327 | return p; | 1341 | return p; |
@@ -1467,7 +1481,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1467 | */ | 1481 | */ |
1468 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { | 1482 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { |
1469 | WARN_ON(p == src_rq->curr); | 1483 | WARN_ON(p == src_rq->curr); |
1470 | WARN_ON(!p->se.on_rq); | 1484 | WARN_ON(!p->on_rq); |
1471 | 1485 | ||
1472 | /* | 1486 | /* |
1473 | * There's a chance that p is higher in priority | 1487 | * There's a chance that p is higher in priority |
@@ -1538,7 +1552,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1538 | * Update the migration status of the RQ if we have an RT task | 1552 | * Update the migration status of the RQ if we have an RT task |
1539 | * which is running AND changing its weight value. | 1553 | * which is running AND changing its weight value. |
1540 | */ | 1554 | */ |
1541 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { | 1555 | if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { |
1542 | struct rq *rq = task_rq(p); | 1556 | struct rq *rq = task_rq(p); |
1543 | 1557 | ||
1544 | if (!task_current(rq, p)) { | 1558 | if (!task_current(rq, p)) { |
@@ -1608,7 +1622,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1608 | * we may need to handle the pulling of RT tasks | 1622 | * we may need to handle the pulling of RT tasks |
1609 | * now. | 1623 | * now. |
1610 | */ | 1624 | */ |
1611 | if (p->se.on_rq && !rq->rt.rt_nr_running) | 1625 | if (p->on_rq && !rq->rt.rt_nr_running) |
1612 | pull_rt_task(rq); | 1626 | pull_rt_task(rq); |
1613 | } | 1627 | } |
1614 | 1628 | ||
@@ -1638,7 +1652,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1638 | * If that current running task is also an RT task | 1652 | * If that current running task is also an RT task |
1639 | * then see if we can move to another run queue. | 1653 | * then see if we can move to another run queue. |
1640 | */ | 1654 | */ |
1641 | if (p->se.on_rq && rq->curr != p) { | 1655 | if (p->on_rq && rq->curr != p) { |
1642 | #ifdef CONFIG_SMP | 1656 | #ifdef CONFIG_SMP |
1643 | if (rq->rt.overloaded && push_rt_task(rq) && | 1657 | if (rq->rt.overloaded && push_rt_task(rq) && |
1644 | /* Don't resched if we changed runqueues */ | 1658 | /* Don't resched if we changed runqueues */ |
@@ -1657,7 +1671,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1657 | static void | 1671 | static void |
1658 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | 1672 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) |
1659 | { | 1673 | { |
1660 | if (!p->se.on_rq) | 1674 | if (!p->on_rq) |
1661 | return; | 1675 | return; |
1662 | 1676 | ||
1663 | if (rq->curr == p) { | 1677 | if (rq->curr == p) { |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 1ba2bd40fdac..6f437632afab 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -9,8 +9,7 @@ | |||
9 | 9 | ||
10 | #ifdef CONFIG_SMP | 10 | #ifdef CONFIG_SMP |
11 | static int | 11 | static int |
12 | select_task_rq_stop(struct rq *rq, struct task_struct *p, | 12 | select_task_rq_stop(struct task_struct *p, int sd_flag, int flags) |
13 | int sd_flag, int flags) | ||
14 | { | 13 | { |
15 | return task_cpu(p); /* stop tasks as never migrate */ | 14 | return task_cpu(p); /* stop tasks as never migrate */ |
16 | } | 15 | } |
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) | |||
26 | { | 25 | { |
27 | struct task_struct *stop = rq->stop; | 26 | struct task_struct *stop = rq->stop; |
28 | 27 | ||
29 | if (stop && stop->se.on_rq) | 28 | if (stop && stop->on_rq) |
30 | return stop; | 29 | return stop; |
31 | 30 | ||
32 | return NULL; | 31 | return NULL; |