diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-04-18 08:53:18 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-04-18 08:53:33 -0400 |
commit | 6ddafdaab3f809b110ada253d2f2d4910ebd3ac5 (patch) | |
tree | 366bb7513511a05b6e11ab89bfe3b2dbd1d62a03 /kernel | |
parent | 3905c54f2bd2c6f937f87307987ca072eabc3e7b (diff) | |
parent | bd8e7dded88a3e1c085c333f19ff31387616f71a (diff) |
Merge branch 'sched/locking' into sched/core
Merge reason: the rq locking changes are stable,
propagate them into the .40 queue.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/mutex-debug.c | 2 | ||||
-rw-r--r-- | kernel/mutex-debug.h | 2 | ||||
-rw-r--r-- | kernel/mutex.c | 2 | ||||
-rw-r--r-- | kernel/mutex.h | 2 | ||||
-rw-r--r-- | kernel/power/Kconfig | 6 | ||||
-rw-r--r-- | kernel/sched.c | 650 | ||||
-rw-r--r-- | kernel/sched_debug.c | 2 | ||||
-rw-r--r-- | kernel/sched_fair.c | 23 | ||||
-rw-r--r-- | kernel/sched_features.h | 6 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 2 | ||||
-rw-r--r-- | kernel/sched_rt.c | 54 | ||||
-rw-r--r-- | kernel/sched_stoptask.c | 5 |
12 files changed, 426 insertions, 330 deletions
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c index ec815a960b5d..73da83aff418 100644 --- a/kernel/mutex-debug.c +++ b/kernel/mutex-debug.c | |||
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock) | |||
75 | return; | 75 | return; |
76 | 76 | ||
77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); | 77 | DEBUG_LOCKS_WARN_ON(lock->magic != lock); |
78 | DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info()); | 78 | DEBUG_LOCKS_WARN_ON(lock->owner != current); |
79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 79 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
80 | mutex_clear_owner(lock); | 80 | mutex_clear_owner(lock); |
81 | } | 81 | } |
diff --git a/kernel/mutex-debug.h b/kernel/mutex-debug.h index 57d527a16f9d..0799fd3e4cfa 100644 --- a/kernel/mutex-debug.h +++ b/kernel/mutex-debug.h | |||
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name, | |||
29 | 29 | ||
30 | static inline void mutex_set_owner(struct mutex *lock) | 30 | static inline void mutex_set_owner(struct mutex *lock) |
31 | { | 31 | { |
32 | lock->owner = current_thread_info(); | 32 | lock->owner = current; |
33 | } | 33 | } |
34 | 34 | ||
35 | static inline void mutex_clear_owner(struct mutex *lock) | 35 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/mutex.c b/kernel/mutex.c index c4195fa98900..fe4706cb0c5b 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c | |||
@@ -160,7 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
160 | */ | 160 | */ |
161 | 161 | ||
162 | for (;;) { | 162 | for (;;) { |
163 | struct thread_info *owner; | 163 | struct task_struct *owner; |
164 | 164 | ||
165 | /* | 165 | /* |
166 | * If we own the BKL, then don't spin. The owner of | 166 | * If we own the BKL, then don't spin. The owner of |
diff --git a/kernel/mutex.h b/kernel/mutex.h index 67578ca48f94..4115fbf83b12 100644 --- a/kernel/mutex.h +++ b/kernel/mutex.h | |||
@@ -19,7 +19,7 @@ | |||
19 | #ifdef CONFIG_SMP | 19 | #ifdef CONFIG_SMP |
20 | static inline void mutex_set_owner(struct mutex *lock) | 20 | static inline void mutex_set_owner(struct mutex *lock) |
21 | { | 21 | { |
22 | lock->owner = current_thread_info(); | 22 | lock->owner = current; |
23 | } | 23 | } |
24 | 24 | ||
25 | static inline void mutex_clear_owner(struct mutex *lock) | 25 | static inline void mutex_clear_owner(struct mutex *lock) |
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 4603f08dc47b..6de9a8fc3417 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig | |||
@@ -18,9 +18,13 @@ config SUSPEND_FREEZER | |||
18 | 18 | ||
19 | Turning OFF this setting is NOT recommended! If in doubt, say Y. | 19 | Turning OFF this setting is NOT recommended! If in doubt, say Y. |
20 | 20 | ||
21 | config HIBERNATE_CALLBACKS | ||
22 | bool | ||
23 | |||
21 | config HIBERNATION | 24 | config HIBERNATION |
22 | bool "Hibernation (aka 'suspend to disk')" | 25 | bool "Hibernation (aka 'suspend to disk')" |
23 | depends on SWAP && ARCH_HIBERNATION_POSSIBLE | 26 | depends on SWAP && ARCH_HIBERNATION_POSSIBLE |
27 | select HIBERNATE_CALLBACKS | ||
24 | select LZO_COMPRESS | 28 | select LZO_COMPRESS |
25 | select LZO_DECOMPRESS | 29 | select LZO_DECOMPRESS |
26 | ---help--- | 30 | ---help--- |
@@ -85,7 +89,7 @@ config PM_STD_PARTITION | |||
85 | 89 | ||
86 | config PM_SLEEP | 90 | config PM_SLEEP |
87 | def_bool y | 91 | def_bool y |
88 | depends on SUSPEND || HIBERNATION || XEN_SAVE_RESTORE | 92 | depends on SUSPEND || HIBERNATE_CALLBACKS |
89 | 93 | ||
90 | config PM_SLEEP_SMP | 94 | config PM_SLEEP_SMP |
91 | def_bool y | 95 | def_bool y |
diff --git a/kernel/sched.c b/kernel/sched.c index 506cb8147c70..0cfe0310ed5d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -312,6 +312,9 @@ struct cfs_rq { | |||
312 | 312 | ||
313 | u64 exec_clock; | 313 | u64 exec_clock; |
314 | u64 min_vruntime; | 314 | u64 min_vruntime; |
315 | #ifndef CONFIG_64BIT | ||
316 | u64 min_vruntime_copy; | ||
317 | #endif | ||
315 | 318 | ||
316 | struct rb_root tasks_timeline; | 319 | struct rb_root tasks_timeline; |
317 | struct rb_node *rb_leftmost; | 320 | struct rb_node *rb_leftmost; |
@@ -554,6 +557,10 @@ struct rq { | |||
554 | unsigned int ttwu_count; | 557 | unsigned int ttwu_count; |
555 | unsigned int ttwu_local; | 558 | unsigned int ttwu_local; |
556 | #endif | 559 | #endif |
560 | |||
561 | #ifdef CONFIG_SMP | ||
562 | struct task_struct *wake_list; | ||
563 | #endif | ||
557 | }; | 564 | }; |
558 | 565 | ||
559 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 566 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
@@ -597,7 +604,7 @@ static inline int cpu_of(struct rq *rq) | |||
597 | * Return the group to which this tasks belongs. | 604 | * Return the group to which this tasks belongs. |
598 | * | 605 | * |
599 | * We use task_subsys_state_check() and extend the RCU verification | 606 | * We use task_subsys_state_check() and extend the RCU verification |
600 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | 607 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() |
601 | * holds that lock for each task it moves into the cgroup. Therefore | 608 | * holds that lock for each task it moves into the cgroup. Therefore |
602 | * by holding that lock, we pin the task to the current cgroup. | 609 | * by holding that lock, we pin the task to the current cgroup. |
603 | */ | 610 | */ |
@@ -607,7 +614,7 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
607 | struct cgroup_subsys_state *css; | 614 | struct cgroup_subsys_state *css; |
608 | 615 | ||
609 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 616 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
610 | lockdep_is_held(&task_rq(p)->lock)); | 617 | lockdep_is_held(&p->pi_lock)); |
611 | tg = container_of(css, struct task_group, css); | 618 | tg = container_of(css, struct task_group, css); |
612 | 619 | ||
613 | return autogroup_task_group(p, tg); | 620 | return autogroup_task_group(p, tg); |
@@ -839,18 +846,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p) | |||
839 | return rq->curr == p; | 846 | return rq->curr == p; |
840 | } | 847 | } |
841 | 848 | ||
842 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
843 | static inline int task_running(struct rq *rq, struct task_struct *p) | 849 | static inline int task_running(struct rq *rq, struct task_struct *p) |
844 | { | 850 | { |
851 | #ifdef CONFIG_SMP | ||
852 | return p->on_cpu; | ||
853 | #else | ||
845 | return task_current(rq, p); | 854 | return task_current(rq, p); |
855 | #endif | ||
846 | } | 856 | } |
847 | 857 | ||
858 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
848 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 859 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
849 | { | 860 | { |
861 | #ifdef CONFIG_SMP | ||
862 | /* | ||
863 | * We can optimise this out completely for !SMP, because the | ||
864 | * SMP rebalancing from interrupt is the only thing that cares | ||
865 | * here. | ||
866 | */ | ||
867 | next->on_cpu = 1; | ||
868 | #endif | ||
850 | } | 869 | } |
851 | 870 | ||
852 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | 871 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
853 | { | 872 | { |
873 | #ifdef CONFIG_SMP | ||
874 | /* | ||
875 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
876 | * We must ensure this doesn't happen until the switch is completely | ||
877 | * finished. | ||
878 | */ | ||
879 | smp_wmb(); | ||
880 | prev->on_cpu = 0; | ||
881 | #endif | ||
854 | #ifdef CONFIG_DEBUG_SPINLOCK | 882 | #ifdef CONFIG_DEBUG_SPINLOCK |
855 | /* this is a valid case when another task releases the spinlock */ | 883 | /* this is a valid case when another task releases the spinlock */ |
856 | rq->lock.owner = current; | 884 | rq->lock.owner = current; |
@@ -866,15 +894,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
866 | } | 894 | } |
867 | 895 | ||
868 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 896 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
869 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
870 | { | ||
871 | #ifdef CONFIG_SMP | ||
872 | return p->oncpu; | ||
873 | #else | ||
874 | return task_current(rq, p); | ||
875 | #endif | ||
876 | } | ||
877 | |||
878 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 897 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
879 | { | 898 | { |
880 | #ifdef CONFIG_SMP | 899 | #ifdef CONFIG_SMP |
@@ -883,7 +902,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
883 | * SMP rebalancing from interrupt is the only thing that cares | 902 | * SMP rebalancing from interrupt is the only thing that cares |
884 | * here. | 903 | * here. |
885 | */ | 904 | */ |
886 | next->oncpu = 1; | 905 | next->on_cpu = 1; |
887 | #endif | 906 | #endif |
888 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 907 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
889 | raw_spin_unlock_irq(&rq->lock); | 908 | raw_spin_unlock_irq(&rq->lock); |
@@ -896,12 +915,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
896 | { | 915 | { |
897 | #ifdef CONFIG_SMP | 916 | #ifdef CONFIG_SMP |
898 | /* | 917 | /* |
899 | * After ->oncpu is cleared, the task can be moved to a different CPU. | 918 | * After ->on_cpu is cleared, the task can be moved to a different CPU. |
900 | * We must ensure this doesn't happen until the switch is completely | 919 | * We must ensure this doesn't happen until the switch is completely |
901 | * finished. | 920 | * finished. |
902 | */ | 921 | */ |
903 | smp_wmb(); | 922 | smp_wmb(); |
904 | prev->oncpu = 0; | 923 | prev->on_cpu = 0; |
905 | #endif | 924 | #endif |
906 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 925 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
907 | local_irq_enable(); | 926 | local_irq_enable(); |
@@ -910,23 +929,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
910 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 929 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
911 | 930 | ||
912 | /* | 931 | /* |
913 | * Check whether the task is waking, we use this to synchronize ->cpus_allowed | 932 | * __task_rq_lock - lock the rq @p resides on. |
914 | * against ttwu(). | ||
915 | */ | ||
916 | static inline int task_is_waking(struct task_struct *p) | ||
917 | { | ||
918 | return unlikely(p->state == TASK_WAKING); | ||
919 | } | ||
920 | |||
921 | /* | ||
922 | * __task_rq_lock - lock the runqueue a given task resides on. | ||
923 | * Must be called interrupts disabled. | ||
924 | */ | 933 | */ |
925 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 934 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
926 | __acquires(rq->lock) | 935 | __acquires(rq->lock) |
927 | { | 936 | { |
928 | struct rq *rq; | 937 | struct rq *rq; |
929 | 938 | ||
939 | lockdep_assert_held(&p->pi_lock); | ||
940 | |||
930 | for (;;) { | 941 | for (;;) { |
931 | rq = task_rq(p); | 942 | rq = task_rq(p); |
932 | raw_spin_lock(&rq->lock); | 943 | raw_spin_lock(&rq->lock); |
@@ -937,22 +948,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
937 | } | 948 | } |
938 | 949 | ||
939 | /* | 950 | /* |
940 | * task_rq_lock - lock the runqueue a given task resides on and disable | 951 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
941 | * interrupts. Note the ordering: we can safely lookup the task_rq without | ||
942 | * explicitly disabling preemption. | ||
943 | */ | 952 | */ |
944 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | 953 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
954 | __acquires(p->pi_lock) | ||
945 | __acquires(rq->lock) | 955 | __acquires(rq->lock) |
946 | { | 956 | { |
947 | struct rq *rq; | 957 | struct rq *rq; |
948 | 958 | ||
949 | for (;;) { | 959 | for (;;) { |
950 | local_irq_save(*flags); | 960 | raw_spin_lock_irqsave(&p->pi_lock, *flags); |
951 | rq = task_rq(p); | 961 | rq = task_rq(p); |
952 | raw_spin_lock(&rq->lock); | 962 | raw_spin_lock(&rq->lock); |
953 | if (likely(rq == task_rq(p))) | 963 | if (likely(rq == task_rq(p))) |
954 | return rq; | 964 | return rq; |
955 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 965 | raw_spin_unlock(&rq->lock); |
966 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
956 | } | 967 | } |
957 | } | 968 | } |
958 | 969 | ||
@@ -962,10 +973,13 @@ static void __task_rq_unlock(struct rq *rq) | |||
962 | raw_spin_unlock(&rq->lock); | 973 | raw_spin_unlock(&rq->lock); |
963 | } | 974 | } |
964 | 975 | ||
965 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 976 | static inline void |
977 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
966 | __releases(rq->lock) | 978 | __releases(rq->lock) |
979 | __releases(p->pi_lock) | ||
967 | { | 980 | { |
968 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 981 | raw_spin_unlock(&rq->lock); |
982 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
969 | } | 983 | } |
970 | 984 | ||
971 | /* | 985 | /* |
@@ -1774,7 +1788,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1774 | update_rq_clock(rq); | 1788 | update_rq_clock(rq); |
1775 | sched_info_queued(p); | 1789 | sched_info_queued(p); |
1776 | p->sched_class->enqueue_task(rq, p, flags); | 1790 | p->sched_class->enqueue_task(rq, p, flags); |
1777 | p->se.on_rq = 1; | ||
1778 | } | 1791 | } |
1779 | 1792 | ||
1780 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | 1793 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
@@ -1782,7 +1795,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1782 | update_rq_clock(rq); | 1795 | update_rq_clock(rq); |
1783 | sched_info_dequeued(p); | 1796 | sched_info_dequeued(p); |
1784 | p->sched_class->dequeue_task(rq, p, flags); | 1797 | p->sched_class->dequeue_task(rq, p, flags); |
1785 | p->se.on_rq = 0; | ||
1786 | } | 1798 | } |
1787 | 1799 | ||
1788 | /* | 1800 | /* |
@@ -2117,7 +2129,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
2117 | * A queue event has occurred, and we're going to schedule. In | 2129 | * A queue event has occurred, and we're going to schedule. In |
2118 | * this case, we can save a useless back to back clock update. | 2130 | * this case, we can save a useless back to back clock update. |
2119 | */ | 2131 | */ |
2120 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | 2132 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) |
2121 | rq->skip_clock_update = 1; | 2133 | rq->skip_clock_update = 1; |
2122 | } | 2134 | } |
2123 | 2135 | ||
@@ -2163,6 +2175,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2163 | */ | 2175 | */ |
2164 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | 2176 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && |
2165 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2177 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2178 | |||
2179 | #ifdef CONFIG_LOCKDEP | ||
2180 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | ||
2181 | lockdep_is_held(&task_rq(p)->lock))); | ||
2182 | #endif | ||
2166 | #endif | 2183 | #endif |
2167 | 2184 | ||
2168 | trace_sched_migrate_task(p, new_cpu); | 2185 | trace_sched_migrate_task(p, new_cpu); |
@@ -2183,19 +2200,6 @@ struct migration_arg { | |||
2183 | static int migration_cpu_stop(void *data); | 2200 | static int migration_cpu_stop(void *data); |
2184 | 2201 | ||
2185 | /* | 2202 | /* |
2186 | * The task's runqueue lock must be held. | ||
2187 | * Returns true if you have to wait for migration thread. | ||
2188 | */ | ||
2189 | static bool migrate_task(struct task_struct *p, struct rq *rq) | ||
2190 | { | ||
2191 | /* | ||
2192 | * If the task is not on a runqueue (and not running), then | ||
2193 | * the next wake-up will properly place the task. | ||
2194 | */ | ||
2195 | return p->se.on_rq || task_running(rq, p); | ||
2196 | } | ||
2197 | |||
2198 | /* | ||
2199 | * wait_task_inactive - wait for a thread to unschedule. | 2203 | * wait_task_inactive - wait for a thread to unschedule. |
2200 | * | 2204 | * |
2201 | * If @match_state is nonzero, it's the @p->state value just checked and | 2205 | * If @match_state is nonzero, it's the @p->state value just checked and |
@@ -2252,11 +2256,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2252 | rq = task_rq_lock(p, &flags); | 2256 | rq = task_rq_lock(p, &flags); |
2253 | trace_sched_wait_task(p); | 2257 | trace_sched_wait_task(p); |
2254 | running = task_running(rq, p); | 2258 | running = task_running(rq, p); |
2255 | on_rq = p->se.on_rq; | 2259 | on_rq = p->on_rq; |
2256 | ncsw = 0; | 2260 | ncsw = 0; |
2257 | if (!match_state || p->state == match_state) | 2261 | if (!match_state || p->state == match_state) |
2258 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | 2262 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
2259 | task_rq_unlock(rq, &flags); | 2263 | task_rq_unlock(rq, p, &flags); |
2260 | 2264 | ||
2261 | /* | 2265 | /* |
2262 | * If it changed from the expected state, bail out now. | 2266 | * If it changed from the expected state, bail out now. |
@@ -2331,7 +2335,7 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
2331 | 2335 | ||
2332 | #ifdef CONFIG_SMP | 2336 | #ifdef CONFIG_SMP |
2333 | /* | 2337 | /* |
2334 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2338 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
2335 | */ | 2339 | */ |
2336 | static int select_fallback_rq(int cpu, struct task_struct *p) | 2340 | static int select_fallback_rq(int cpu, struct task_struct *p) |
2337 | { | 2341 | { |
@@ -2364,12 +2368,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2364 | } | 2368 | } |
2365 | 2369 | ||
2366 | /* | 2370 | /* |
2367 | * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. | 2371 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. |
2368 | */ | 2372 | */ |
2369 | static inline | 2373 | static inline |
2370 | int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) | 2374 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2371 | { | 2375 | { |
2372 | int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); | 2376 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2373 | 2377 | ||
2374 | /* | 2378 | /* |
2375 | * In order not to call set_task_cpu() on a blocking task we need | 2379 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -2395,27 +2399,60 @@ static void update_avg(u64 *avg, u64 sample) | |||
2395 | } | 2399 | } |
2396 | #endif | 2400 | #endif |
2397 | 2401 | ||
2398 | static inline void ttwu_activate(struct task_struct *p, struct rq *rq, | 2402 | static void |
2399 | bool is_sync, bool is_migrate, bool is_local, | 2403 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
2400 | unsigned long en_flags) | ||
2401 | { | 2404 | { |
2405 | #ifdef CONFIG_SCHEDSTATS | ||
2406 | struct rq *rq = this_rq(); | ||
2407 | |||
2408 | #ifdef CONFIG_SMP | ||
2409 | int this_cpu = smp_processor_id(); | ||
2410 | |||
2411 | if (cpu == this_cpu) { | ||
2412 | schedstat_inc(rq, ttwu_local); | ||
2413 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2414 | } else { | ||
2415 | struct sched_domain *sd; | ||
2416 | |||
2417 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2418 | for_each_domain(this_cpu, sd) { | ||
2419 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2420 | schedstat_inc(sd, ttwu_wake_remote); | ||
2421 | break; | ||
2422 | } | ||
2423 | } | ||
2424 | } | ||
2425 | #endif /* CONFIG_SMP */ | ||
2426 | |||
2427 | schedstat_inc(rq, ttwu_count); | ||
2402 | schedstat_inc(p, se.statistics.nr_wakeups); | 2428 | schedstat_inc(p, se.statistics.nr_wakeups); |
2403 | if (is_sync) | 2429 | |
2430 | if (wake_flags & WF_SYNC) | ||
2404 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2431 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
2405 | if (is_migrate) | 2432 | |
2433 | if (cpu != task_cpu(p)) | ||
2406 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | 2434 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); |
2407 | if (is_local) | ||
2408 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2409 | else | ||
2410 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2411 | 2435 | ||
2436 | #endif /* CONFIG_SCHEDSTATS */ | ||
2437 | } | ||
2438 | |||
2439 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | ||
2440 | { | ||
2412 | activate_task(rq, p, en_flags); | 2441 | activate_task(rq, p, en_flags); |
2442 | p->on_rq = 1; | ||
2443 | |||
2444 | /* if a worker is waking up, notify workqueue */ | ||
2445 | if (p->flags & PF_WQ_WORKER) | ||
2446 | wq_worker_waking_up(p, cpu_of(rq)); | ||
2413 | } | 2447 | } |
2414 | 2448 | ||
2415 | static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | 2449 | /* |
2416 | int wake_flags, bool success) | 2450 | * Mark the task runnable and perform wakeup-preemption. |
2451 | */ | ||
2452 | static void | ||
2453 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2417 | { | 2454 | { |
2418 | trace_sched_wakeup(p, success); | 2455 | trace_sched_wakeup(p, true); |
2419 | check_preempt_curr(rq, p, wake_flags); | 2456 | check_preempt_curr(rq, p, wake_flags); |
2420 | 2457 | ||
2421 | p->state = TASK_RUNNING; | 2458 | p->state = TASK_RUNNING; |
@@ -2434,9 +2471,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2434 | rq->idle_stamp = 0; | 2471 | rq->idle_stamp = 0; |
2435 | } | 2472 | } |
2436 | #endif | 2473 | #endif |
2437 | /* if a worker is waking up, notify workqueue */ | 2474 | } |
2438 | if ((p->flags & PF_WQ_WORKER) && success) | 2475 | |
2439 | wq_worker_waking_up(p, cpu_of(rq)); | 2476 | static void |
2477 | ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2478 | { | ||
2479 | #ifdef CONFIG_SMP | ||
2480 | if (p->sched_contributes_to_load) | ||
2481 | rq->nr_uninterruptible--; | ||
2482 | #endif | ||
2483 | |||
2484 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); | ||
2485 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2486 | } | ||
2487 | |||
2488 | /* | ||
2489 | * Called in case the task @p isn't fully descheduled from its runqueue, | ||
2490 | * in this case we must do a remote wakeup. Its a 'light' wakeup though, | ||
2491 | * since all we need to do is flip p->state to TASK_RUNNING, since | ||
2492 | * the task is still ->on_rq. | ||
2493 | */ | ||
2494 | static int ttwu_remote(struct task_struct *p, int wake_flags) | ||
2495 | { | ||
2496 | struct rq *rq; | ||
2497 | int ret = 0; | ||
2498 | |||
2499 | rq = __task_rq_lock(p); | ||
2500 | if (p->on_rq) { | ||
2501 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2502 | ret = 1; | ||
2503 | } | ||
2504 | __task_rq_unlock(rq); | ||
2505 | |||
2506 | return ret; | ||
2507 | } | ||
2508 | |||
2509 | #ifdef CONFIG_SMP | ||
2510 | static void sched_ttwu_pending(void) | ||
2511 | { | ||
2512 | struct rq *rq = this_rq(); | ||
2513 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2514 | |||
2515 | if (!list) | ||
2516 | return; | ||
2517 | |||
2518 | raw_spin_lock(&rq->lock); | ||
2519 | |||
2520 | while (list) { | ||
2521 | struct task_struct *p = list; | ||
2522 | list = list->wake_entry; | ||
2523 | ttwu_do_activate(rq, p, 0); | ||
2524 | } | ||
2525 | |||
2526 | raw_spin_unlock(&rq->lock); | ||
2527 | } | ||
2528 | |||
2529 | void scheduler_ipi(void) | ||
2530 | { | ||
2531 | sched_ttwu_pending(); | ||
2532 | } | ||
2533 | |||
2534 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | ||
2535 | { | ||
2536 | struct rq *rq = cpu_rq(cpu); | ||
2537 | struct task_struct *next = rq->wake_list; | ||
2538 | |||
2539 | for (;;) { | ||
2540 | struct task_struct *old = next; | ||
2541 | |||
2542 | p->wake_entry = next; | ||
2543 | next = cmpxchg(&rq->wake_list, old, p); | ||
2544 | if (next == old) | ||
2545 | break; | ||
2546 | } | ||
2547 | |||
2548 | if (!next) | ||
2549 | smp_send_reschedule(cpu); | ||
2550 | } | ||
2551 | #endif | ||
2552 | |||
2553 | static void ttwu_queue(struct task_struct *p, int cpu) | ||
2554 | { | ||
2555 | struct rq *rq = cpu_rq(cpu); | ||
2556 | |||
2557 | #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) | ||
2558 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | ||
2559 | ttwu_queue_remote(p, cpu); | ||
2560 | return; | ||
2561 | } | ||
2562 | #endif | ||
2563 | |||
2564 | raw_spin_lock(&rq->lock); | ||
2565 | ttwu_do_activate(rq, p, 0); | ||
2566 | raw_spin_unlock(&rq->lock); | ||
2440 | } | 2567 | } |
2441 | 2568 | ||
2442 | /** | 2569 | /** |
@@ -2454,92 +2581,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2454 | * Returns %true if @p was woken up, %false if it was already running | 2581 | * Returns %true if @p was woken up, %false if it was already running |
2455 | * or @state didn't match @p's state. | 2582 | * or @state didn't match @p's state. |
2456 | */ | 2583 | */ |
2457 | static int try_to_wake_up(struct task_struct *p, unsigned int state, | 2584 | static int |
2458 | int wake_flags) | 2585 | try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
2459 | { | 2586 | { |
2460 | int cpu, orig_cpu, this_cpu, success = 0; | ||
2461 | unsigned long flags; | 2587 | unsigned long flags; |
2462 | unsigned long en_flags = ENQUEUE_WAKEUP; | 2588 | int cpu, success = 0; |
2463 | struct rq *rq; | ||
2464 | |||
2465 | this_cpu = get_cpu(); | ||
2466 | 2589 | ||
2467 | smp_wmb(); | 2590 | smp_wmb(); |
2468 | rq = task_rq_lock(p, &flags); | 2591 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2469 | if (!(p->state & state)) | 2592 | if (!(p->state & state)) |
2470 | goto out; | 2593 | goto out; |
2471 | 2594 | ||
2472 | if (p->se.on_rq) | 2595 | success = 1; /* we're going to change ->state */ |
2473 | goto out_running; | ||
2474 | |||
2475 | cpu = task_cpu(p); | 2596 | cpu = task_cpu(p); |
2476 | orig_cpu = cpu; | ||
2477 | 2597 | ||
2478 | #ifdef CONFIG_SMP | 2598 | if (p->on_rq && ttwu_remote(p, wake_flags)) |
2479 | if (unlikely(task_running(rq, p))) | 2599 | goto stat; |
2480 | goto out_activate; | ||
2481 | 2600 | ||
2601 | #ifdef CONFIG_SMP | ||
2482 | /* | 2602 | /* |
2483 | * In order to handle concurrent wakeups and release the rq->lock | 2603 | * If the owning (remote) cpu is still in the middle of schedule() with |
2484 | * we put the task in TASK_WAKING state. | 2604 | * this task as prev, wait until its done referencing the task. |
2485 | * | ||
2486 | * First fix up the nr_uninterruptible count: | ||
2487 | */ | 2605 | */ |
2488 | if (task_contributes_to_load(p)) { | 2606 | while (p->on_cpu) { |
2489 | if (likely(cpu_online(orig_cpu))) | 2607 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2490 | rq->nr_uninterruptible--; | 2608 | /* |
2491 | else | 2609 | * If called from interrupt context we could have landed in the |
2492 | this_rq()->nr_uninterruptible--; | 2610 | * middle of schedule(), in this case we should take care not |
2493 | } | 2611 | * to spin on ->on_cpu if p is current, since that would |
2494 | p->state = TASK_WAKING; | 2612 | * deadlock. |
2495 | 2613 | */ | |
2496 | if (p->sched_class->task_waking) { | 2614 | if (p == current) { |
2497 | p->sched_class->task_waking(rq, p); | 2615 | ttwu_queue(p, cpu); |
2498 | en_flags |= ENQUEUE_WAKING; | 2616 | goto stat; |
2617 | } | ||
2618 | #endif | ||
2619 | cpu_relax(); | ||
2499 | } | 2620 | } |
2500 | |||
2501 | cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags); | ||
2502 | if (cpu != orig_cpu) | ||
2503 | set_task_cpu(p, cpu); | ||
2504 | __task_rq_unlock(rq); | ||
2505 | |||
2506 | rq = cpu_rq(cpu); | ||
2507 | raw_spin_lock(&rq->lock); | ||
2508 | |||
2509 | /* | 2621 | /* |
2510 | * We migrated the task without holding either rq->lock, however | 2622 | * Pairs with the smp_wmb() in finish_lock_switch(). |
2511 | * since the task is not on the task list itself, nobody else | ||
2512 | * will try and migrate the task, hence the rq should match the | ||
2513 | * cpu we just moved it to. | ||
2514 | */ | 2623 | */ |
2515 | WARN_ON(task_cpu(p) != cpu); | 2624 | smp_rmb(); |
2516 | WARN_ON(p->state != TASK_WAKING); | ||
2517 | 2625 | ||
2518 | #ifdef CONFIG_SCHEDSTATS | 2626 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
2519 | schedstat_inc(rq, ttwu_count); | 2627 | p->state = TASK_WAKING; |
2520 | if (cpu == this_cpu) | ||
2521 | schedstat_inc(rq, ttwu_local); | ||
2522 | else { | ||
2523 | struct sched_domain *sd; | ||
2524 | for_each_domain(this_cpu, sd) { | ||
2525 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2526 | schedstat_inc(sd, ttwu_wake_remote); | ||
2527 | break; | ||
2528 | } | ||
2529 | } | ||
2530 | } | ||
2531 | #endif /* CONFIG_SCHEDSTATS */ | ||
2532 | 2628 | ||
2533 | out_activate: | 2629 | if (p->sched_class->task_waking) |
2630 | p->sched_class->task_waking(p); | ||
2631 | |||
2632 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2633 | if (task_cpu(p) != cpu) | ||
2634 | set_task_cpu(p, cpu); | ||
2534 | #endif /* CONFIG_SMP */ | 2635 | #endif /* CONFIG_SMP */ |
2535 | ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, | 2636 | |
2536 | cpu == this_cpu, en_flags); | 2637 | ttwu_queue(p, cpu); |
2537 | success = 1; | 2638 | stat: |
2538 | out_running: | 2639 | ttwu_stat(p, cpu, wake_flags); |
2539 | ttwu_post_activation(p, rq, wake_flags, success); | ||
2540 | out: | 2640 | out: |
2541 | task_rq_unlock(rq, &flags); | 2641 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2542 | put_cpu(); | ||
2543 | 2642 | ||
2544 | return success; | 2643 | return success; |
2545 | } | 2644 | } |
@@ -2548,31 +2647,34 @@ out: | |||
2548 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2647 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2549 | * @p: the thread to be awakened | 2648 | * @p: the thread to be awakened |
2550 | * | 2649 | * |
2551 | * Put @p on the run-queue if it's not already there. The caller must | 2650 | * Put @p on the run-queue if it's not already there. The caller must |
2552 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2651 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2553 | * the current task. this_rq() stays locked over invocation. | 2652 | * the current task. |
2554 | */ | 2653 | */ |
2555 | static void try_to_wake_up_local(struct task_struct *p) | 2654 | static void try_to_wake_up_local(struct task_struct *p) |
2556 | { | 2655 | { |
2557 | struct rq *rq = task_rq(p); | 2656 | struct rq *rq = task_rq(p); |
2558 | bool success = false; | ||
2559 | 2657 | ||
2560 | BUG_ON(rq != this_rq()); | 2658 | BUG_ON(rq != this_rq()); |
2561 | BUG_ON(p == current); | 2659 | BUG_ON(p == current); |
2562 | lockdep_assert_held(&rq->lock); | 2660 | lockdep_assert_held(&rq->lock); |
2563 | 2661 | ||
2662 | if (!raw_spin_trylock(&p->pi_lock)) { | ||
2663 | raw_spin_unlock(&rq->lock); | ||
2664 | raw_spin_lock(&p->pi_lock); | ||
2665 | raw_spin_lock(&rq->lock); | ||
2666 | } | ||
2667 | |||
2564 | if (!(p->state & TASK_NORMAL)) | 2668 | if (!(p->state & TASK_NORMAL)) |
2565 | return; | 2669 | goto out; |
2566 | 2670 | ||
2567 | if (!p->se.on_rq) { | 2671 | if (!p->on_rq) |
2568 | if (likely(!task_running(rq, p))) { | 2672 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
2569 | schedstat_inc(rq, ttwu_count); | 2673 | |
2570 | schedstat_inc(rq, ttwu_local); | 2674 | ttwu_do_wakeup(rq, p, 0); |
2571 | } | 2675 | ttwu_stat(p, smp_processor_id(), 0); |
2572 | ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); | 2676 | out: |
2573 | success = true; | 2677 | raw_spin_unlock(&p->pi_lock); |
2574 | } | ||
2575 | ttwu_post_activation(p, rq, 0, success); | ||
2576 | } | 2678 | } |
2577 | 2679 | ||
2578 | /** | 2680 | /** |
@@ -2605,19 +2707,21 @@ int wake_up_state(struct task_struct *p, unsigned int state) | |||
2605 | */ | 2707 | */ |
2606 | static void __sched_fork(struct task_struct *p) | 2708 | static void __sched_fork(struct task_struct *p) |
2607 | { | 2709 | { |
2710 | p->on_rq = 0; | ||
2711 | |||
2712 | p->se.on_rq = 0; | ||
2608 | p->se.exec_start = 0; | 2713 | p->se.exec_start = 0; |
2609 | p->se.sum_exec_runtime = 0; | 2714 | p->se.sum_exec_runtime = 0; |
2610 | p->se.prev_sum_exec_runtime = 0; | 2715 | p->se.prev_sum_exec_runtime = 0; |
2611 | p->se.nr_migrations = 0; | 2716 | p->se.nr_migrations = 0; |
2612 | p->se.vruntime = 0; | 2717 | p->se.vruntime = 0; |
2718 | INIT_LIST_HEAD(&p->se.group_node); | ||
2613 | 2719 | ||
2614 | #ifdef CONFIG_SCHEDSTATS | 2720 | #ifdef CONFIG_SCHEDSTATS |
2615 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2721 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
2616 | #endif | 2722 | #endif |
2617 | 2723 | ||
2618 | INIT_LIST_HEAD(&p->rt.run_list); | 2724 | INIT_LIST_HEAD(&p->rt.run_list); |
2619 | p->se.on_rq = 0; | ||
2620 | INIT_LIST_HEAD(&p->se.group_node); | ||
2621 | 2725 | ||
2622 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2726 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2623 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2727 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
@@ -2629,6 +2733,7 @@ static void __sched_fork(struct task_struct *p) | |||
2629 | */ | 2733 | */ |
2630 | void sched_fork(struct task_struct *p, int clone_flags) | 2734 | void sched_fork(struct task_struct *p, int clone_flags) |
2631 | { | 2735 | { |
2736 | unsigned long flags; | ||
2632 | int cpu = get_cpu(); | 2737 | int cpu = get_cpu(); |
2633 | 2738 | ||
2634 | __sched_fork(p); | 2739 | __sched_fork(p); |
@@ -2679,16 +2784,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2679 | * | 2784 | * |
2680 | * Silence PROVE_RCU. | 2785 | * Silence PROVE_RCU. |
2681 | */ | 2786 | */ |
2682 | rcu_read_lock(); | 2787 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2683 | set_task_cpu(p, cpu); | 2788 | set_task_cpu(p, cpu); |
2684 | rcu_read_unlock(); | 2789 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2685 | 2790 | ||
2686 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2791 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2687 | if (likely(sched_info_on())) | 2792 | if (likely(sched_info_on())) |
2688 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2793 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
2689 | #endif | 2794 | #endif |
2690 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 2795 | #if defined(CONFIG_SMP) |
2691 | p->oncpu = 0; | 2796 | p->on_cpu = 0; |
2692 | #endif | 2797 | #endif |
2693 | #ifdef CONFIG_PREEMPT | 2798 | #ifdef CONFIG_PREEMPT |
2694 | /* Want to start with kernel preemption disabled. */ | 2799 | /* Want to start with kernel preemption disabled. */ |
@@ -2712,37 +2817,27 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2712 | { | 2817 | { |
2713 | unsigned long flags; | 2818 | unsigned long flags; |
2714 | struct rq *rq; | 2819 | struct rq *rq; |
2715 | int cpu __maybe_unused = get_cpu(); | ||
2716 | 2820 | ||
2821 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
2717 | #ifdef CONFIG_SMP | 2822 | #ifdef CONFIG_SMP |
2718 | rq = task_rq_lock(p, &flags); | ||
2719 | p->state = TASK_WAKING; | ||
2720 | |||
2721 | /* | 2823 | /* |
2722 | * Fork balancing, do it here and not earlier because: | 2824 | * Fork balancing, do it here and not earlier because: |
2723 | * - cpus_allowed can change in the fork path | 2825 | * - cpus_allowed can change in the fork path |
2724 | * - any previously selected cpu might disappear through hotplug | 2826 | * - any previously selected cpu might disappear through hotplug |
2725 | * | ||
2726 | * We set TASK_WAKING so that select_task_rq() can drop rq->lock | ||
2727 | * without people poking at ->cpus_allowed. | ||
2728 | */ | 2827 | */ |
2729 | cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); | 2828 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); |
2730 | set_task_cpu(p, cpu); | ||
2731 | |||
2732 | p->state = TASK_RUNNING; | ||
2733 | task_rq_unlock(rq, &flags); | ||
2734 | #endif | 2829 | #endif |
2735 | 2830 | ||
2736 | rq = task_rq_lock(p, &flags); | 2831 | rq = __task_rq_lock(p); |
2737 | activate_task(rq, p, 0); | 2832 | activate_task(rq, p, 0); |
2738 | trace_sched_wakeup_new(p, 1); | 2833 | p->on_rq = 1; |
2834 | trace_sched_wakeup_new(p, true); | ||
2739 | check_preempt_curr(rq, p, WF_FORK); | 2835 | check_preempt_curr(rq, p, WF_FORK); |
2740 | #ifdef CONFIG_SMP | 2836 | #ifdef CONFIG_SMP |
2741 | if (p->sched_class->task_woken) | 2837 | if (p->sched_class->task_woken) |
2742 | p->sched_class->task_woken(rq, p); | 2838 | p->sched_class->task_woken(rq, p); |
2743 | #endif | 2839 | #endif |
2744 | task_rq_unlock(rq, &flags); | 2840 | task_rq_unlock(rq, p, &flags); |
2745 | put_cpu(); | ||
2746 | } | 2841 | } |
2747 | 2842 | ||
2748 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2843 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -3451,27 +3546,22 @@ void sched_exec(void) | |||
3451 | { | 3546 | { |
3452 | struct task_struct *p = current; | 3547 | struct task_struct *p = current; |
3453 | unsigned long flags; | 3548 | unsigned long flags; |
3454 | struct rq *rq; | ||
3455 | int dest_cpu; | 3549 | int dest_cpu; |
3456 | 3550 | ||
3457 | rq = task_rq_lock(p, &flags); | 3551 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
3458 | dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); | 3552 | dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); |
3459 | if (dest_cpu == smp_processor_id()) | 3553 | if (dest_cpu == smp_processor_id()) |
3460 | goto unlock; | 3554 | goto unlock; |
3461 | 3555 | ||
3462 | /* | 3556 | if (likely(cpu_active(dest_cpu))) { |
3463 | * select_task_rq() can race against ->cpus_allowed | ||
3464 | */ | ||
3465 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | ||
3466 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { | ||
3467 | struct migration_arg arg = { p, dest_cpu }; | 3557 | struct migration_arg arg = { p, dest_cpu }; |
3468 | 3558 | ||
3469 | task_rq_unlock(rq, &flags); | 3559 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3470 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 3560 | stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); |
3471 | return; | 3561 | return; |
3472 | } | 3562 | } |
3473 | unlock: | 3563 | unlock: |
3474 | task_rq_unlock(rq, &flags); | 3564 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3475 | } | 3565 | } |
3476 | 3566 | ||
3477 | #endif | 3567 | #endif |
@@ -3508,7 +3598,7 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
3508 | 3598 | ||
3509 | rq = task_rq_lock(p, &flags); | 3599 | rq = task_rq_lock(p, &flags); |
3510 | ns = do_task_delta_exec(p, rq); | 3600 | ns = do_task_delta_exec(p, rq); |
3511 | task_rq_unlock(rq, &flags); | 3601 | task_rq_unlock(rq, p, &flags); |
3512 | 3602 | ||
3513 | return ns; | 3603 | return ns; |
3514 | } | 3604 | } |
@@ -3526,7 +3616,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
3526 | 3616 | ||
3527 | rq = task_rq_lock(p, &flags); | 3617 | rq = task_rq_lock(p, &flags); |
3528 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 3618 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); |
3529 | task_rq_unlock(rq, &flags); | 3619 | task_rq_unlock(rq, p, &flags); |
3530 | 3620 | ||
3531 | return ns; | 3621 | return ns; |
3532 | } | 3622 | } |
@@ -3550,7 +3640,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p) | |||
3550 | rq = task_rq_lock(p, &flags); | 3640 | rq = task_rq_lock(p, &flags); |
3551 | thread_group_cputime(p, &totals); | 3641 | thread_group_cputime(p, &totals); |
3552 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | 3642 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); |
3553 | task_rq_unlock(rq, &flags); | 3643 | task_rq_unlock(rq, p, &flags); |
3554 | 3644 | ||
3555 | return ns; | 3645 | return ns; |
3556 | } | 3646 | } |
@@ -4036,7 +4126,7 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4036 | 4126 | ||
4037 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 4127 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
4038 | { | 4128 | { |
4039 | if (prev->se.on_rq) | 4129 | if (prev->on_rq) |
4040 | update_rq_clock(rq); | 4130 | update_rq_clock(rq); |
4041 | prev->sched_class->put_prev_task(rq, prev); | 4131 | prev->sched_class->put_prev_task(rq, prev); |
4042 | } | 4132 | } |
@@ -4098,11 +4188,13 @@ need_resched: | |||
4098 | if (unlikely(signal_pending_state(prev->state, prev))) { | 4188 | if (unlikely(signal_pending_state(prev->state, prev))) { |
4099 | prev->state = TASK_RUNNING; | 4189 | prev->state = TASK_RUNNING; |
4100 | } else { | 4190 | } else { |
4191 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4192 | prev->on_rq = 0; | ||
4193 | |||
4101 | /* | 4194 | /* |
4102 | * If a worker is going to sleep, notify and | 4195 | * If a worker went to sleep, notify and ask workqueue |
4103 | * ask workqueue whether it wants to wake up a | 4196 | * whether it wants to wake up a task to maintain |
4104 | * task to maintain concurrency. If so, wake | 4197 | * concurrency. |
4105 | * up the task. | ||
4106 | */ | 4198 | */ |
4107 | if (prev->flags & PF_WQ_WORKER) { | 4199 | if (prev->flags & PF_WQ_WORKER) { |
4108 | struct task_struct *to_wakeup; | 4200 | struct task_struct *to_wakeup; |
@@ -4111,21 +4203,20 @@ need_resched: | |||
4111 | if (to_wakeup) | 4203 | if (to_wakeup) |
4112 | try_to_wake_up_local(to_wakeup); | 4204 | try_to_wake_up_local(to_wakeup); |
4113 | } | 4205 | } |
4114 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | 4206 | |
4207 | /* | ||
4208 | * If we are going to sleep and we have plugged IO | ||
4209 | * queued, make sure to submit it to avoid deadlocks. | ||
4210 | */ | ||
4211 | if (blk_needs_flush_plug(prev)) { | ||
4212 | raw_spin_unlock(&rq->lock); | ||
4213 | blk_flush_plug(prev); | ||
4214 | raw_spin_lock(&rq->lock); | ||
4215 | } | ||
4115 | } | 4216 | } |
4116 | switch_count = &prev->nvcsw; | 4217 | switch_count = &prev->nvcsw; |
4117 | } | 4218 | } |
4118 | 4219 | ||
4119 | /* | ||
4120 | * If we are going to sleep and we have plugged IO queued, make | ||
4121 | * sure to submit it to avoid deadlocks. | ||
4122 | */ | ||
4123 | if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) { | ||
4124 | raw_spin_unlock(&rq->lock); | ||
4125 | blk_flush_plug(prev); | ||
4126 | raw_spin_lock(&rq->lock); | ||
4127 | } | ||
4128 | |||
4129 | pre_schedule(rq, prev); | 4220 | pre_schedule(rq, prev); |
4130 | 4221 | ||
4131 | if (unlikely(!rq->nr_running)) | 4222 | if (unlikely(!rq->nr_running)) |
@@ -4162,70 +4253,53 @@ need_resched: | |||
4162 | EXPORT_SYMBOL(schedule); | 4253 | EXPORT_SYMBOL(schedule); |
4163 | 4254 | ||
4164 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4255 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4165 | /* | ||
4166 | * Look out! "owner" is an entirely speculative pointer | ||
4167 | * access and not reliable. | ||
4168 | */ | ||
4169 | int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | ||
4170 | { | ||
4171 | unsigned int cpu; | ||
4172 | struct rq *rq; | ||
4173 | 4256 | ||
4174 | if (!sched_feat(OWNER_SPIN)) | 4257 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
4175 | return 0; | 4258 | { |
4259 | bool ret = false; | ||
4176 | 4260 | ||
4177 | #ifdef CONFIG_DEBUG_PAGEALLOC | 4261 | rcu_read_lock(); |
4178 | /* | 4262 | if (lock->owner != owner) |
4179 | * Need to access the cpu field knowing that | 4263 | goto fail; |
4180 | * DEBUG_PAGEALLOC could have unmapped it if | ||
4181 | * the mutex owner just released it and exited. | ||
4182 | */ | ||
4183 | if (probe_kernel_address(&owner->cpu, cpu)) | ||
4184 | return 0; | ||
4185 | #else | ||
4186 | cpu = owner->cpu; | ||
4187 | #endif | ||
4188 | 4264 | ||
4189 | /* | 4265 | /* |
4190 | * Even if the access succeeded (likely case), | 4266 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
4191 | * the cpu field may no longer be valid. | 4267 | * lock->owner still matches owner, if that fails, owner might |
4268 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
4269 | * ensures the memory stays valid. | ||
4192 | */ | 4270 | */ |
4193 | if (cpu >= nr_cpumask_bits) | 4271 | barrier(); |
4194 | return 0; | ||
4195 | 4272 | ||
4196 | /* | 4273 | ret = owner->on_cpu; |
4197 | * We need to validate that we can do a | 4274 | fail: |
4198 | * get_cpu() and that we have the percpu area. | 4275 | rcu_read_unlock(); |
4199 | */ | ||
4200 | if (!cpu_online(cpu)) | ||
4201 | return 0; | ||
4202 | 4276 | ||
4203 | rq = cpu_rq(cpu); | 4277 | return ret; |
4278 | } | ||
4204 | 4279 | ||
4205 | for (;;) { | 4280 | /* |
4206 | /* | 4281 | * Look out! "owner" is an entirely speculative pointer |
4207 | * Owner changed, break to re-assess state. | 4282 | * access and not reliable. |
4208 | */ | 4283 | */ |
4209 | if (lock->owner != owner) { | 4284 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) |
4210 | /* | 4285 | { |
4211 | * If the lock has switched to a different owner, | 4286 | if (!sched_feat(OWNER_SPIN)) |
4212 | * we likely have heavy contention. Return 0 to quit | 4287 | return 0; |
4213 | * optimistic spinning and not contend further: | ||
4214 | */ | ||
4215 | if (lock->owner) | ||
4216 | return 0; | ||
4217 | break; | ||
4218 | } | ||
4219 | 4288 | ||
4220 | /* | 4289 | while (owner_running(lock, owner)) { |
4221 | * Is that owner really running on that cpu? | 4290 | if (need_resched()) |
4222 | */ | ||
4223 | if (task_thread_info(rq->curr) != owner || need_resched()) | ||
4224 | return 0; | 4291 | return 0; |
4225 | 4292 | ||
4226 | arch_mutex_cpu_relax(); | 4293 | arch_mutex_cpu_relax(); |
4227 | } | 4294 | } |
4228 | 4295 | ||
4296 | /* | ||
4297 | * If the owner changed to another task there is likely | ||
4298 | * heavy contention, stop spinning. | ||
4299 | */ | ||
4300 | if (lock->owner) | ||
4301 | return 0; | ||
4302 | |||
4229 | return 1; | 4303 | return 1; |
4230 | } | 4304 | } |
4231 | #endif | 4305 | #endif |
@@ -4685,19 +4759,18 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
4685 | */ | 4759 | */ |
4686 | void rt_mutex_setprio(struct task_struct *p, int prio) | 4760 | void rt_mutex_setprio(struct task_struct *p, int prio) |
4687 | { | 4761 | { |
4688 | unsigned long flags; | ||
4689 | int oldprio, on_rq, running; | 4762 | int oldprio, on_rq, running; |
4690 | struct rq *rq; | 4763 | struct rq *rq; |
4691 | const struct sched_class *prev_class; | 4764 | const struct sched_class *prev_class; |
4692 | 4765 | ||
4693 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 4766 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
4694 | 4767 | ||
4695 | rq = task_rq_lock(p, &flags); | 4768 | rq = __task_rq_lock(p); |
4696 | 4769 | ||
4697 | trace_sched_pi_setprio(p, prio); | 4770 | trace_sched_pi_setprio(p, prio); |
4698 | oldprio = p->prio; | 4771 | oldprio = p->prio; |
4699 | prev_class = p->sched_class; | 4772 | prev_class = p->sched_class; |
4700 | on_rq = p->se.on_rq; | 4773 | on_rq = p->on_rq; |
4701 | running = task_current(rq, p); | 4774 | running = task_current(rq, p); |
4702 | if (on_rq) | 4775 | if (on_rq) |
4703 | dequeue_task(rq, p, 0); | 4776 | dequeue_task(rq, p, 0); |
@@ -4717,7 +4790,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4717 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4790 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4718 | 4791 | ||
4719 | check_class_changed(rq, p, prev_class, oldprio); | 4792 | check_class_changed(rq, p, prev_class, oldprio); |
4720 | task_rq_unlock(rq, &flags); | 4793 | __task_rq_unlock(rq); |
4721 | } | 4794 | } |
4722 | 4795 | ||
4723 | #endif | 4796 | #endif |
@@ -4745,7 +4818,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4745 | p->static_prio = NICE_TO_PRIO(nice); | 4818 | p->static_prio = NICE_TO_PRIO(nice); |
4746 | goto out_unlock; | 4819 | goto out_unlock; |
4747 | } | 4820 | } |
4748 | on_rq = p->se.on_rq; | 4821 | on_rq = p->on_rq; |
4749 | if (on_rq) | 4822 | if (on_rq) |
4750 | dequeue_task(rq, p, 0); | 4823 | dequeue_task(rq, p, 0); |
4751 | 4824 | ||
@@ -4765,7 +4838,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4765 | resched_task(rq->curr); | 4838 | resched_task(rq->curr); |
4766 | } | 4839 | } |
4767 | out_unlock: | 4840 | out_unlock: |
4768 | task_rq_unlock(rq, &flags); | 4841 | task_rq_unlock(rq, p, &flags); |
4769 | } | 4842 | } |
4770 | EXPORT_SYMBOL(set_user_nice); | 4843 | EXPORT_SYMBOL(set_user_nice); |
4771 | 4844 | ||
@@ -4879,8 +4952,6 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
4879 | static void | 4952 | static void |
4880 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 4953 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) |
4881 | { | 4954 | { |
4882 | BUG_ON(p->se.on_rq); | ||
4883 | |||
4884 | p->policy = policy; | 4955 | p->policy = policy; |
4885 | p->rt_priority = prio; | 4956 | p->rt_priority = prio; |
4886 | p->normal_prio = normal_prio(p); | 4957 | p->normal_prio = normal_prio(p); |
@@ -4995,20 +5066,17 @@ recheck: | |||
4995 | /* | 5066 | /* |
4996 | * make sure no PI-waiters arrive (or leave) while we are | 5067 | * make sure no PI-waiters arrive (or leave) while we are |
4997 | * changing the priority of the task: | 5068 | * changing the priority of the task: |
4998 | */ | 5069 | * |
4999 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
5000 | /* | ||
5001 | * To be able to change p->policy safely, the appropriate | 5070 | * To be able to change p->policy safely, the appropriate |
5002 | * runqueue lock must be held. | 5071 | * runqueue lock must be held. |
5003 | */ | 5072 | */ |
5004 | rq = __task_rq_lock(p); | 5073 | rq = task_rq_lock(p, &flags); |
5005 | 5074 | ||
5006 | /* | 5075 | /* |
5007 | * Changing the policy of the stop threads its a very bad idea | 5076 | * Changing the policy of the stop threads its a very bad idea |
5008 | */ | 5077 | */ |
5009 | if (p == rq->stop) { | 5078 | if (p == rq->stop) { |
5010 | __task_rq_unlock(rq); | 5079 | task_rq_unlock(rq, p, &flags); |
5011 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5012 | return -EINVAL; | 5080 | return -EINVAL; |
5013 | } | 5081 | } |
5014 | 5082 | ||
@@ -5032,8 +5100,7 @@ recheck: | |||
5032 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 5100 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5033 | task_group(p)->rt_bandwidth.rt_runtime == 0 && | 5101 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
5034 | !task_group_is_autogroup(task_group(p))) { | 5102 | !task_group_is_autogroup(task_group(p))) { |
5035 | __task_rq_unlock(rq); | 5103 | task_rq_unlock(rq, p, &flags); |
5036 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5037 | return -EPERM; | 5104 | return -EPERM; |
5038 | } | 5105 | } |
5039 | } | 5106 | } |
@@ -5042,11 +5109,10 @@ recheck: | |||
5042 | /* recheck policy now with rq lock held */ | 5109 | /* recheck policy now with rq lock held */ |
5043 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 5110 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
5044 | policy = oldpolicy = -1; | 5111 | policy = oldpolicy = -1; |
5045 | __task_rq_unlock(rq); | 5112 | task_rq_unlock(rq, p, &flags); |
5046 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5047 | goto recheck; | 5113 | goto recheck; |
5048 | } | 5114 | } |
5049 | on_rq = p->se.on_rq; | 5115 | on_rq = p->on_rq; |
5050 | running = task_current(rq, p); | 5116 | running = task_current(rq, p); |
5051 | if (on_rq) | 5117 | if (on_rq) |
5052 | deactivate_task(rq, p, 0); | 5118 | deactivate_task(rq, p, 0); |
@@ -5065,8 +5131,7 @@ recheck: | |||
5065 | activate_task(rq, p, 0); | 5131 | activate_task(rq, p, 0); |
5066 | 5132 | ||
5067 | check_class_changed(rq, p, prev_class, oldprio); | 5133 | check_class_changed(rq, p, prev_class, oldprio); |
5068 | __task_rq_unlock(rq); | 5134 | task_rq_unlock(rq, p, &flags); |
5069 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5070 | 5135 | ||
5071 | rt_mutex_adjust_pi(p); | 5136 | rt_mutex_adjust_pi(p); |
5072 | 5137 | ||
@@ -5317,7 +5382,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5317 | { | 5382 | { |
5318 | struct task_struct *p; | 5383 | struct task_struct *p; |
5319 | unsigned long flags; | 5384 | unsigned long flags; |
5320 | struct rq *rq; | ||
5321 | int retval; | 5385 | int retval; |
5322 | 5386 | ||
5323 | get_online_cpus(); | 5387 | get_online_cpus(); |
@@ -5332,9 +5396,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5332 | if (retval) | 5396 | if (retval) |
5333 | goto out_unlock; | 5397 | goto out_unlock; |
5334 | 5398 | ||
5335 | rq = task_rq_lock(p, &flags); | 5399 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
5336 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 5400 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
5337 | task_rq_unlock(rq, &flags); | 5401 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
5338 | 5402 | ||
5339 | out_unlock: | 5403 | out_unlock: |
5340 | rcu_read_unlock(); | 5404 | rcu_read_unlock(); |
@@ -5659,7 +5723,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
5659 | 5723 | ||
5660 | rq = task_rq_lock(p, &flags); | 5724 | rq = task_rq_lock(p, &flags); |
5661 | time_slice = p->sched_class->get_rr_interval(rq, p); | 5725 | time_slice = p->sched_class->get_rr_interval(rq, p); |
5662 | task_rq_unlock(rq, &flags); | 5726 | task_rq_unlock(rq, p, &flags); |
5663 | 5727 | ||
5664 | rcu_read_unlock(); | 5728 | rcu_read_unlock(); |
5665 | jiffies_to_timespec(time_slice, &t); | 5729 | jiffies_to_timespec(time_slice, &t); |
@@ -5777,8 +5841,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5777 | rcu_read_unlock(); | 5841 | rcu_read_unlock(); |
5778 | 5842 | ||
5779 | rq->curr = rq->idle = idle; | 5843 | rq->curr = rq->idle = idle; |
5780 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5844 | #if defined(CONFIG_SMP) |
5781 | idle->oncpu = 1; | 5845 | idle->on_cpu = 1; |
5782 | #endif | 5846 | #endif |
5783 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5847 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5784 | 5848 | ||
@@ -5882,18 +5946,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5882 | unsigned int dest_cpu; | 5946 | unsigned int dest_cpu; |
5883 | int ret = 0; | 5947 | int ret = 0; |
5884 | 5948 | ||
5885 | /* | ||
5886 | * Serialize against TASK_WAKING so that ttwu() and wunt() can | ||
5887 | * drop the rq->lock and still rely on ->cpus_allowed. | ||
5888 | */ | ||
5889 | again: | ||
5890 | while (task_is_waking(p)) | ||
5891 | cpu_relax(); | ||
5892 | rq = task_rq_lock(p, &flags); | 5949 | rq = task_rq_lock(p, &flags); |
5893 | if (task_is_waking(p)) { | ||
5894 | task_rq_unlock(rq, &flags); | ||
5895 | goto again; | ||
5896 | } | ||
5897 | 5950 | ||
5898 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 5951 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
5899 | ret = -EINVAL; | 5952 | ret = -EINVAL; |
@@ -5918,16 +5971,16 @@ again: | |||
5918 | goto out; | 5971 | goto out; |
5919 | 5972 | ||
5920 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5973 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5921 | if (migrate_task(p, rq)) { | 5974 | if (p->on_rq) { |
5922 | struct migration_arg arg = { p, dest_cpu }; | 5975 | struct migration_arg arg = { p, dest_cpu }; |
5923 | /* Need help from migration thread: drop lock and wait. */ | 5976 | /* Need help from migration thread: drop lock and wait. */ |
5924 | task_rq_unlock(rq, &flags); | 5977 | task_rq_unlock(rq, p, &flags); |
5925 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 5978 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
5926 | tlb_migrate_finish(p->mm); | 5979 | tlb_migrate_finish(p->mm); |
5927 | return 0; | 5980 | return 0; |
5928 | } | 5981 | } |
5929 | out: | 5982 | out: |
5930 | task_rq_unlock(rq, &flags); | 5983 | task_rq_unlock(rq, p, &flags); |
5931 | 5984 | ||
5932 | return ret; | 5985 | return ret; |
5933 | } | 5986 | } |
@@ -5955,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5955 | rq_src = cpu_rq(src_cpu); | 6008 | rq_src = cpu_rq(src_cpu); |
5956 | rq_dest = cpu_rq(dest_cpu); | 6009 | rq_dest = cpu_rq(dest_cpu); |
5957 | 6010 | ||
6011 | raw_spin_lock(&p->pi_lock); | ||
5958 | double_rq_lock(rq_src, rq_dest); | 6012 | double_rq_lock(rq_src, rq_dest); |
5959 | /* Already moved. */ | 6013 | /* Already moved. */ |
5960 | if (task_cpu(p) != src_cpu) | 6014 | if (task_cpu(p) != src_cpu) |
@@ -5967,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5967 | * If we're not on a rq, the next wake-up will ensure we're | 6021 | * If we're not on a rq, the next wake-up will ensure we're |
5968 | * placed properly. | 6022 | * placed properly. |
5969 | */ | 6023 | */ |
5970 | if (p->se.on_rq) { | 6024 | if (p->on_rq) { |
5971 | deactivate_task(rq_src, p, 0); | 6025 | deactivate_task(rq_src, p, 0); |
5972 | set_task_cpu(p, dest_cpu); | 6026 | set_task_cpu(p, dest_cpu); |
5973 | activate_task(rq_dest, p, 0); | 6027 | activate_task(rq_dest, p, 0); |
@@ -5977,6 +6031,7 @@ done: | |||
5977 | ret = 1; | 6031 | ret = 1; |
5978 | fail: | 6032 | fail: |
5979 | double_rq_unlock(rq_src, rq_dest); | 6033 | double_rq_unlock(rq_src, rq_dest); |
6034 | raw_spin_unlock(&p->pi_lock); | ||
5980 | return ret; | 6035 | return ret; |
5981 | } | 6036 | } |
5982 | 6037 | ||
@@ -6317,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6317 | 6372 | ||
6318 | #ifdef CONFIG_HOTPLUG_CPU | 6373 | #ifdef CONFIG_HOTPLUG_CPU |
6319 | case CPU_DYING: | 6374 | case CPU_DYING: |
6375 | sched_ttwu_pending(); | ||
6320 | /* Update our root-domain */ | 6376 | /* Update our root-domain */ |
6321 | raw_spin_lock_irqsave(&rq->lock, flags); | 6377 | raw_spin_lock_irqsave(&rq->lock, flags); |
6322 | if (rq->rd) { | 6378 | if (rq->rd) { |
@@ -7961,7 +8017,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
7961 | int old_prio = p->prio; | 8017 | int old_prio = p->prio; |
7962 | int on_rq; | 8018 | int on_rq; |
7963 | 8019 | ||
7964 | on_rq = p->se.on_rq; | 8020 | on_rq = p->on_rq; |
7965 | if (on_rq) | 8021 | if (on_rq) |
7966 | deactivate_task(rq, p, 0); | 8022 | deactivate_task(rq, p, 0); |
7967 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 8023 | __setscheduler(rq, p, SCHED_NORMAL, 0); |
@@ -8304,7 +8360,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8304 | rq = task_rq_lock(tsk, &flags); | 8360 | rq = task_rq_lock(tsk, &flags); |
8305 | 8361 | ||
8306 | running = task_current(rq, tsk); | 8362 | running = task_current(rq, tsk); |
8307 | on_rq = tsk->se.on_rq; | 8363 | on_rq = tsk->on_rq; |
8308 | 8364 | ||
8309 | if (on_rq) | 8365 | if (on_rq) |
8310 | dequeue_task(rq, tsk, 0); | 8366 | dequeue_task(rq, tsk, 0); |
@@ -8323,7 +8379,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8323 | if (on_rq) | 8379 | if (on_rq) |
8324 | enqueue_task(rq, tsk, 0); | 8380 | enqueue_task(rq, tsk, 0); |
8325 | 8381 | ||
8326 | task_rq_unlock(rq, &flags); | 8382 | task_rq_unlock(rq, tsk, &flags); |
8327 | } | 8383 | } |
8328 | #endif /* CONFIG_CGROUP_SCHED */ | 8384 | #endif /* CONFIG_CGROUP_SCHED */ |
8329 | 8385 | ||
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 7bacd83a4158..3669bec6e130 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) | |||
152 | read_lock_irqsave(&tasklist_lock, flags); | 152 | read_lock_irqsave(&tasklist_lock, flags); |
153 | 153 | ||
154 | do_each_thread(g, p) { | 154 | do_each_thread(g, p) { |
155 | if (!p->se.on_rq || task_cpu(p) != rq_cpu) | 155 | if (!p->on_rq || task_cpu(p) != rq_cpu) |
156 | continue; | 156 | continue; |
157 | 157 | ||
158 | print_task(m, rq, p); | 158 | print_task(m, rq, p); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9c5679cfe3b0..87445931a179 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) | |||
358 | } | 358 | } |
359 | 359 | ||
360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | 360 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); |
361 | #ifndef CONFIG_64BIT | ||
362 | smp_wmb(); | ||
363 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
364 | #endif | ||
361 | } | 365 | } |
362 | 366 | ||
363 | /* | 367 | /* |
@@ -1372,12 +1376,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1372 | 1376 | ||
1373 | #ifdef CONFIG_SMP | 1377 | #ifdef CONFIG_SMP |
1374 | 1378 | ||
1375 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1379 | static void task_waking_fair(struct task_struct *p) |
1376 | { | 1380 | { |
1377 | struct sched_entity *se = &p->se; | 1381 | struct sched_entity *se = &p->se; |
1378 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1382 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1383 | u64 min_vruntime; | ||
1379 | 1384 | ||
1380 | se->vruntime -= cfs_rq->min_vruntime; | 1385 | #ifndef CONFIG_64BIT |
1386 | u64 min_vruntime_copy; | ||
1387 | |||
1388 | do { | ||
1389 | min_vruntime_copy = cfs_rq->min_vruntime_copy; | ||
1390 | smp_rmb(); | ||
1391 | min_vruntime = cfs_rq->min_vruntime; | ||
1392 | } while (min_vruntime != min_vruntime_copy); | ||
1393 | #else | ||
1394 | min_vruntime = cfs_rq->min_vruntime; | ||
1395 | #endif | ||
1396 | |||
1397 | se->vruntime -= min_vruntime; | ||
1381 | } | 1398 | } |
1382 | 1399 | ||
1383 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1400 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -1659,7 +1676,7 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
1659 | * preempt must be disabled. | 1676 | * preempt must be disabled. |
1660 | */ | 1677 | */ |
1661 | static int | 1678 | static int |
1662 | select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags) | 1679 | select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) |
1663 | { | 1680 | { |
1664 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; | 1681 | struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL; |
1665 | int cpu = smp_processor_id(); | 1682 | int cpu = smp_processor_id(); |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 68e69acc29b9..be40f7371ee1 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1) | |||
64 | * Decrement CPU power based on irq activity | 64 | * Decrement CPU power based on irq activity |
65 | */ | 65 | */ |
66 | SCHED_FEAT(NONIRQ_POWER, 1) | 66 | SCHED_FEAT(NONIRQ_POWER, 1) |
67 | |||
68 | /* | ||
69 | * Queue remote wakeups on the target CPU and process them | ||
70 | * using the scheduler IPI. Reduces rq->lock contention/bounces. | ||
71 | */ | ||
72 | SCHED_FEAT(TTWU_QUEUE, 1) | ||
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index a776a6396427..0a51882534ea 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #ifdef CONFIG_SMP | 8 | #ifdef CONFIG_SMP |
9 | static int | 9 | static int |
10 | select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 10 | select_task_rq_idle(struct task_struct *p, int sd_flag, int flags) |
11 | { | 11 | { |
12 | return task_cpu(p); /* IDLE tasks as never migrated */ | 12 | return task_cpu(p); /* IDLE tasks as never migrated */ |
13 | } | 13 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index e7cebdc65f82..19ecb3127379 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -977,13 +977,23 @@ static void yield_task_rt(struct rq *rq) | |||
977 | static int find_lowest_rq(struct task_struct *task); | 977 | static int find_lowest_rq(struct task_struct *task); |
978 | 978 | ||
979 | static int | 979 | static int |
980 | select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | 980 | select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) |
981 | { | 981 | { |
982 | struct task_struct *curr; | ||
983 | struct rq *rq; | ||
984 | int cpu; | ||
985 | |||
982 | if (sd_flag != SD_BALANCE_WAKE) | 986 | if (sd_flag != SD_BALANCE_WAKE) |
983 | return smp_processor_id(); | 987 | return smp_processor_id(); |
984 | 988 | ||
989 | cpu = task_cpu(p); | ||
990 | rq = cpu_rq(cpu); | ||
991 | |||
992 | rcu_read_lock(); | ||
993 | curr = ACCESS_ONCE(rq->curr); /* unlocked access */ | ||
994 | |||
985 | /* | 995 | /* |
986 | * If the current task is an RT task, then | 996 | * If the current task on @p's runqueue is an RT task, then |
987 | * try to see if we can wake this RT task up on another | 997 | * try to see if we can wake this RT task up on another |
988 | * runqueue. Otherwise simply start this RT task | 998 | * runqueue. Otherwise simply start this RT task |
989 | * on its current runqueue. | 999 | * on its current runqueue. |
@@ -997,21 +1007,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags) | |||
997 | * lock? | 1007 | * lock? |
998 | * | 1008 | * |
999 | * For equal prio tasks, we just let the scheduler sort it out. | 1009 | * For equal prio tasks, we just let the scheduler sort it out. |
1010 | * | ||
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | * | ||
1014 | * This test is optimistic, if we get it wrong the load-balancer | ||
1015 | * will have to sort it out. | ||
1000 | */ | 1016 | */ |
1001 | if (unlikely(rt_task(rq->curr)) && | 1017 | if (curr && unlikely(rt_task(curr)) && |
1002 | (rq->curr->rt.nr_cpus_allowed < 2 || | 1018 | (curr->rt.nr_cpus_allowed < 2 || |
1003 | rq->curr->prio < p->prio) && | 1019 | curr->prio < p->prio) && |
1004 | (p->rt.nr_cpus_allowed > 1)) { | 1020 | (p->rt.nr_cpus_allowed > 1)) { |
1005 | int cpu = find_lowest_rq(p); | 1021 | int target = find_lowest_rq(p); |
1006 | 1022 | ||
1007 | return (cpu == -1) ? task_cpu(p) : cpu; | 1023 | if (target != -1) |
1024 | cpu = target; | ||
1008 | } | 1025 | } |
1026 | rcu_read_unlock(); | ||
1009 | 1027 | ||
1010 | /* | 1028 | return cpu; |
1011 | * Otherwise, just let it ride on the affined RQ and the | ||
1012 | * post-schedule router will push the preempted task away | ||
1013 | */ | ||
1014 | return task_cpu(p); | ||
1015 | } | 1029 | } |
1016 | 1030 | ||
1017 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | 1031 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) |
@@ -1136,7 +1150,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
1136 | * The previous task needs to be made eligible for pushing | 1150 | * The previous task needs to be made eligible for pushing |
1137 | * if it is still active | 1151 | * if it is still active |
1138 | */ | 1152 | */ |
1139 | if (p->se.on_rq && p->rt.nr_cpus_allowed > 1) | 1153 | if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) |
1140 | enqueue_pushable_task(rq, p); | 1154 | enqueue_pushable_task(rq, p); |
1141 | } | 1155 | } |
1142 | 1156 | ||
@@ -1287,7 +1301,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
1287 | !cpumask_test_cpu(lowest_rq->cpu, | 1301 | !cpumask_test_cpu(lowest_rq->cpu, |
1288 | &task->cpus_allowed) || | 1302 | &task->cpus_allowed) || |
1289 | task_running(rq, task) || | 1303 | task_running(rq, task) || |
1290 | !task->se.on_rq)) { | 1304 | !task->on_rq)) { |
1291 | 1305 | ||
1292 | raw_spin_unlock(&lowest_rq->lock); | 1306 | raw_spin_unlock(&lowest_rq->lock); |
1293 | lowest_rq = NULL; | 1307 | lowest_rq = NULL; |
@@ -1321,7 +1335,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) | |||
1321 | BUG_ON(task_current(rq, p)); | 1335 | BUG_ON(task_current(rq, p)); |
1322 | BUG_ON(p->rt.nr_cpus_allowed <= 1); | 1336 | BUG_ON(p->rt.nr_cpus_allowed <= 1); |
1323 | 1337 | ||
1324 | BUG_ON(!p->se.on_rq); | 1338 | BUG_ON(!p->on_rq); |
1325 | BUG_ON(!rt_task(p)); | 1339 | BUG_ON(!rt_task(p)); |
1326 | 1340 | ||
1327 | return p; | 1341 | return p; |
@@ -1467,7 +1481,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1467 | */ | 1481 | */ |
1468 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { | 1482 | if (p && (p->prio < this_rq->rt.highest_prio.curr)) { |
1469 | WARN_ON(p == src_rq->curr); | 1483 | WARN_ON(p == src_rq->curr); |
1470 | WARN_ON(!p->se.on_rq); | 1484 | WARN_ON(!p->on_rq); |
1471 | 1485 | ||
1472 | /* | 1486 | /* |
1473 | * There's a chance that p is higher in priority | 1487 | * There's a chance that p is higher in priority |
@@ -1538,7 +1552,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1538 | * Update the migration status of the RQ if we have an RT task | 1552 | * Update the migration status of the RQ if we have an RT task |
1539 | * which is running AND changing its weight value. | 1553 | * which is running AND changing its weight value. |
1540 | */ | 1554 | */ |
1541 | if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) { | 1555 | if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) { |
1542 | struct rq *rq = task_rq(p); | 1556 | struct rq *rq = task_rq(p); |
1543 | 1557 | ||
1544 | if (!task_current(rq, p)) { | 1558 | if (!task_current(rq, p)) { |
@@ -1608,7 +1622,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p) | |||
1608 | * we may need to handle the pulling of RT tasks | 1622 | * we may need to handle the pulling of RT tasks |
1609 | * now. | 1623 | * now. |
1610 | */ | 1624 | */ |
1611 | if (p->se.on_rq && !rq->rt.rt_nr_running) | 1625 | if (p->on_rq && !rq->rt.rt_nr_running) |
1612 | pull_rt_task(rq); | 1626 | pull_rt_task(rq); |
1613 | } | 1627 | } |
1614 | 1628 | ||
@@ -1638,7 +1652,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1638 | * If that current running task is also an RT task | 1652 | * If that current running task is also an RT task |
1639 | * then see if we can move to another run queue. | 1653 | * then see if we can move to another run queue. |
1640 | */ | 1654 | */ |
1641 | if (p->se.on_rq && rq->curr != p) { | 1655 | if (p->on_rq && rq->curr != p) { |
1642 | #ifdef CONFIG_SMP | 1656 | #ifdef CONFIG_SMP |
1643 | if (rq->rt.overloaded && push_rt_task(rq) && | 1657 | if (rq->rt.overloaded && push_rt_task(rq) && |
1644 | /* Don't resched if we changed runqueues */ | 1658 | /* Don't resched if we changed runqueues */ |
@@ -1657,7 +1671,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) | |||
1657 | static void | 1671 | static void |
1658 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) | 1672 | prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) |
1659 | { | 1673 | { |
1660 | if (!p->se.on_rq) | 1674 | if (!p->on_rq) |
1661 | return; | 1675 | return; |
1662 | 1676 | ||
1663 | if (rq->curr == p) { | 1677 | if (rq->curr == p) { |
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c index 1ba2bd40fdac..6f437632afab 100644 --- a/kernel/sched_stoptask.c +++ b/kernel/sched_stoptask.c | |||
@@ -9,8 +9,7 @@ | |||
9 | 9 | ||
10 | #ifdef CONFIG_SMP | 10 | #ifdef CONFIG_SMP |
11 | static int | 11 | static int |
12 | select_task_rq_stop(struct rq *rq, struct task_struct *p, | 12 | select_task_rq_stop(struct task_struct *p, int sd_flag, int flags) |
13 | int sd_flag, int flags) | ||
14 | { | 13 | { |
15 | return task_cpu(p); /* stop tasks as never migrate */ | 14 | return task_cpu(p); /* stop tasks as never migrate */ |
16 | } | 15 | } |
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq) | |||
26 | { | 25 | { |
27 | struct task_struct *stop = rq->stop; | 26 | struct task_struct *stop = rq->stop; |
28 | 27 | ||
29 | if (stop && stop->se.on_rq) | 28 | if (stop && stop->on_rq) |
30 | return stop; | 29 | return stop; |
31 | 30 | ||
32 | return NULL; | 31 | return NULL; |