diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 1658 |
1 files changed, 667 insertions, 991 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 312f8b95c2d4..c62acf45d3b9 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -231,7 +231,7 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
231 | #endif | 231 | #endif |
232 | 232 | ||
233 | /* | 233 | /* |
234 | * sched_domains_mutex serializes calls to arch_init_sched_domains, | 234 | * sched_domains_mutex serializes calls to init_sched_domains, |
235 | * detach_destroy_domains and partition_sched_domains. | 235 | * detach_destroy_domains and partition_sched_domains. |
236 | */ | 236 | */ |
237 | static DEFINE_MUTEX(sched_domains_mutex); | 237 | static DEFINE_MUTEX(sched_domains_mutex); |
@@ -312,6 +312,9 @@ struct cfs_rq { | |||
312 | 312 | ||
313 | u64 exec_clock; | 313 | u64 exec_clock; |
314 | u64 min_vruntime; | 314 | u64 min_vruntime; |
315 | #ifndef CONFIG_64BIT | ||
316 | u64 min_vruntime_copy; | ||
317 | #endif | ||
315 | 318 | ||
316 | struct rb_root tasks_timeline; | 319 | struct rb_root tasks_timeline; |
317 | struct rb_node *rb_leftmost; | 320 | struct rb_node *rb_leftmost; |
@@ -325,7 +328,9 @@ struct cfs_rq { | |||
325 | */ | 328 | */ |
326 | struct sched_entity *curr, *next, *last, *skip; | 329 | struct sched_entity *curr, *next, *last, *skip; |
327 | 330 | ||
331 | #ifdef CONFIG_SCHED_DEBUG | ||
328 | unsigned int nr_spread_over; | 332 | unsigned int nr_spread_over; |
333 | #endif | ||
329 | 334 | ||
330 | #ifdef CONFIG_FAIR_GROUP_SCHED | 335 | #ifdef CONFIG_FAIR_GROUP_SCHED |
331 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ | 336 | struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ |
@@ -417,6 +422,7 @@ struct rt_rq { | |||
417 | */ | 422 | */ |
418 | struct root_domain { | 423 | struct root_domain { |
419 | atomic_t refcount; | 424 | atomic_t refcount; |
425 | struct rcu_head rcu; | ||
420 | cpumask_var_t span; | 426 | cpumask_var_t span; |
421 | cpumask_var_t online; | 427 | cpumask_var_t online; |
422 | 428 | ||
@@ -460,7 +466,7 @@ struct rq { | |||
460 | u64 nohz_stamp; | 466 | u64 nohz_stamp; |
461 | unsigned char nohz_balance_kick; | 467 | unsigned char nohz_balance_kick; |
462 | #endif | 468 | #endif |
463 | unsigned int skip_clock_update; | 469 | int skip_clock_update; |
464 | 470 | ||
465 | /* capture load from *all* tasks on this cpu: */ | 471 | /* capture load from *all* tasks on this cpu: */ |
466 | struct load_weight load; | 472 | struct load_weight load; |
@@ -553,6 +559,10 @@ struct rq { | |||
553 | unsigned int ttwu_count; | 559 | unsigned int ttwu_count; |
554 | unsigned int ttwu_local; | 560 | unsigned int ttwu_local; |
555 | #endif | 561 | #endif |
562 | |||
563 | #ifdef CONFIG_SMP | ||
564 | struct task_struct *wake_list; | ||
565 | #endif | ||
556 | }; | 566 | }; |
557 | 567 | ||
558 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 568 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
@@ -571,7 +581,7 @@ static inline int cpu_of(struct rq *rq) | |||
571 | 581 | ||
572 | #define rcu_dereference_check_sched_domain(p) \ | 582 | #define rcu_dereference_check_sched_domain(p) \ |
573 | rcu_dereference_check((p), \ | 583 | rcu_dereference_check((p), \ |
574 | rcu_read_lock_sched_held() || \ | 584 | rcu_read_lock_held() || \ |
575 | lockdep_is_held(&sched_domains_mutex)) | 585 | lockdep_is_held(&sched_domains_mutex)) |
576 | 586 | ||
577 | /* | 587 | /* |
@@ -596,7 +606,7 @@ static inline int cpu_of(struct rq *rq) | |||
596 | * Return the group to which this tasks belongs. | 606 | * Return the group to which this tasks belongs. |
597 | * | 607 | * |
598 | * We use task_subsys_state_check() and extend the RCU verification | 608 | * We use task_subsys_state_check() and extend the RCU verification |
599 | * with lockdep_is_held(&task_rq(p)->lock) because cpu_cgroup_attach() | 609 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() |
600 | * holds that lock for each task it moves into the cgroup. Therefore | 610 | * holds that lock for each task it moves into the cgroup. Therefore |
601 | * by holding that lock, we pin the task to the current cgroup. | 611 | * by holding that lock, we pin the task to the current cgroup. |
602 | */ | 612 | */ |
@@ -606,7 +616,7 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
606 | struct cgroup_subsys_state *css; | 616 | struct cgroup_subsys_state *css; |
607 | 617 | ||
608 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 618 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
609 | lockdep_is_held(&task_rq(p)->lock)); | 619 | lockdep_is_held(&p->pi_lock)); |
610 | tg = container_of(css, struct task_group, css); | 620 | tg = container_of(css, struct task_group, css); |
611 | 621 | ||
612 | return autogroup_task_group(p, tg); | 622 | return autogroup_task_group(p, tg); |
@@ -642,7 +652,7 @@ static void update_rq_clock(struct rq *rq) | |||
642 | { | 652 | { |
643 | s64 delta; | 653 | s64 delta; |
644 | 654 | ||
645 | if (rq->skip_clock_update) | 655 | if (rq->skip_clock_update > 0) |
646 | return; | 656 | return; |
647 | 657 | ||
648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; | 658 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; |
@@ -838,18 +848,39 @@ static inline int task_current(struct rq *rq, struct task_struct *p) | |||
838 | return rq->curr == p; | 848 | return rq->curr == p; |
839 | } | 849 | } |
840 | 850 | ||
841 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
842 | static inline int task_running(struct rq *rq, struct task_struct *p) | 851 | static inline int task_running(struct rq *rq, struct task_struct *p) |
843 | { | 852 | { |
853 | #ifdef CONFIG_SMP | ||
854 | return p->on_cpu; | ||
855 | #else | ||
844 | return task_current(rq, p); | 856 | return task_current(rq, p); |
857 | #endif | ||
845 | } | 858 | } |
846 | 859 | ||
860 | #ifndef __ARCH_WANT_UNLOCKED_CTXSW | ||
847 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 861 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
848 | { | 862 | { |
863 | #ifdef CONFIG_SMP | ||
864 | /* | ||
865 | * We can optimise this out completely for !SMP, because the | ||
866 | * SMP rebalancing from interrupt is the only thing that cares | ||
867 | * here. | ||
868 | */ | ||
869 | next->on_cpu = 1; | ||
870 | #endif | ||
849 | } | 871 | } |
850 | 872 | ||
851 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | 873 | static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) |
852 | { | 874 | { |
875 | #ifdef CONFIG_SMP | ||
876 | /* | ||
877 | * After ->on_cpu is cleared, the task can be moved to a different CPU. | ||
878 | * We must ensure this doesn't happen until the switch is completely | ||
879 | * finished. | ||
880 | */ | ||
881 | smp_wmb(); | ||
882 | prev->on_cpu = 0; | ||
883 | #endif | ||
853 | #ifdef CONFIG_DEBUG_SPINLOCK | 884 | #ifdef CONFIG_DEBUG_SPINLOCK |
854 | /* this is a valid case when another task releases the spinlock */ | 885 | /* this is a valid case when another task releases the spinlock */ |
855 | rq->lock.owner = current; | 886 | rq->lock.owner = current; |
@@ -865,15 +896,6 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
865 | } | 896 | } |
866 | 897 | ||
867 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 898 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
868 | static inline int task_running(struct rq *rq, struct task_struct *p) | ||
869 | { | ||
870 | #ifdef CONFIG_SMP | ||
871 | return p->oncpu; | ||
872 | #else | ||
873 | return task_current(rq, p); | ||
874 | #endif | ||
875 | } | ||
876 | |||
877 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | 899 | static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) |
878 | { | 900 | { |
879 | #ifdef CONFIG_SMP | 901 | #ifdef CONFIG_SMP |
@@ -882,7 +904,7 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
882 | * SMP rebalancing from interrupt is the only thing that cares | 904 | * SMP rebalancing from interrupt is the only thing that cares |
883 | * here. | 905 | * here. |
884 | */ | 906 | */ |
885 | next->oncpu = 1; | 907 | next->on_cpu = 1; |
886 | #endif | 908 | #endif |
887 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 909 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
888 | raw_spin_unlock_irq(&rq->lock); | 910 | raw_spin_unlock_irq(&rq->lock); |
@@ -895,12 +917,12 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
895 | { | 917 | { |
896 | #ifdef CONFIG_SMP | 918 | #ifdef CONFIG_SMP |
897 | /* | 919 | /* |
898 | * After ->oncpu is cleared, the task can be moved to a different CPU. | 920 | * After ->on_cpu is cleared, the task can be moved to a different CPU. |
899 | * We must ensure this doesn't happen until the switch is completely | 921 | * We must ensure this doesn't happen until the switch is completely |
900 | * finished. | 922 | * finished. |
901 | */ | 923 | */ |
902 | smp_wmb(); | 924 | smp_wmb(); |
903 | prev->oncpu = 0; | 925 | prev->on_cpu = 0; |
904 | #endif | 926 | #endif |
905 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 927 | #ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
906 | local_irq_enable(); | 928 | local_irq_enable(); |
@@ -909,23 +931,15 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
909 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 931 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
910 | 932 | ||
911 | /* | 933 | /* |
912 | * Check whether the task is waking, we use this to synchronize ->cpus_allowed | 934 | * __task_rq_lock - lock the rq @p resides on. |
913 | * against ttwu(). | ||
914 | */ | ||
915 | static inline int task_is_waking(struct task_struct *p) | ||
916 | { | ||
917 | return unlikely(p->state == TASK_WAKING); | ||
918 | } | ||
919 | |||
920 | /* | ||
921 | * __task_rq_lock - lock the runqueue a given task resides on. | ||
922 | * Must be called interrupts disabled. | ||
923 | */ | 935 | */ |
924 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 936 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
925 | __acquires(rq->lock) | 937 | __acquires(rq->lock) |
926 | { | 938 | { |
927 | struct rq *rq; | 939 | struct rq *rq; |
928 | 940 | ||
941 | lockdep_assert_held(&p->pi_lock); | ||
942 | |||
929 | for (;;) { | 943 | for (;;) { |
930 | rq = task_rq(p); | 944 | rq = task_rq(p); |
931 | raw_spin_lock(&rq->lock); | 945 | raw_spin_lock(&rq->lock); |
@@ -936,22 +950,22 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
936 | } | 950 | } |
937 | 951 | ||
938 | /* | 952 | /* |
939 | * task_rq_lock - lock the runqueue a given task resides on and disable | 953 | * task_rq_lock - lock p->pi_lock and lock the rq @p resides on. |
940 | * interrupts. Note the ordering: we can safely lookup the task_rq without | ||
941 | * explicitly disabling preemption. | ||
942 | */ | 954 | */ |
943 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | 955 | static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) |
956 | __acquires(p->pi_lock) | ||
944 | __acquires(rq->lock) | 957 | __acquires(rq->lock) |
945 | { | 958 | { |
946 | struct rq *rq; | 959 | struct rq *rq; |
947 | 960 | ||
948 | for (;;) { | 961 | for (;;) { |
949 | local_irq_save(*flags); | 962 | raw_spin_lock_irqsave(&p->pi_lock, *flags); |
950 | rq = task_rq(p); | 963 | rq = task_rq(p); |
951 | raw_spin_lock(&rq->lock); | 964 | raw_spin_lock(&rq->lock); |
952 | if (likely(rq == task_rq(p))) | 965 | if (likely(rq == task_rq(p))) |
953 | return rq; | 966 | return rq; |
954 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 967 | raw_spin_unlock(&rq->lock); |
968 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
955 | } | 969 | } |
956 | } | 970 | } |
957 | 971 | ||
@@ -961,10 +975,13 @@ static void __task_rq_unlock(struct rq *rq) | |||
961 | raw_spin_unlock(&rq->lock); | 975 | raw_spin_unlock(&rq->lock); |
962 | } | 976 | } |
963 | 977 | ||
964 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 978 | static inline void |
979 | task_rq_unlock(struct rq *rq, struct task_struct *p, unsigned long *flags) | ||
965 | __releases(rq->lock) | 980 | __releases(rq->lock) |
981 | __releases(p->pi_lock) | ||
966 | { | 982 | { |
967 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 983 | raw_spin_unlock(&rq->lock); |
984 | raw_spin_unlock_irqrestore(&p->pi_lock, *flags); | ||
968 | } | 985 | } |
969 | 986 | ||
970 | /* | 987 | /* |
@@ -1193,11 +1210,17 @@ int get_nohz_timer_target(void) | |||
1193 | int i; | 1210 | int i; |
1194 | struct sched_domain *sd; | 1211 | struct sched_domain *sd; |
1195 | 1212 | ||
1213 | rcu_read_lock(); | ||
1196 | for_each_domain(cpu, sd) { | 1214 | for_each_domain(cpu, sd) { |
1197 | for_each_cpu(i, sched_domain_span(sd)) | 1215 | for_each_cpu(i, sched_domain_span(sd)) { |
1198 | if (!idle_cpu(i)) | 1216 | if (!idle_cpu(i)) { |
1199 | return i; | 1217 | cpu = i; |
1218 | goto unlock; | ||
1219 | } | ||
1220 | } | ||
1200 | } | 1221 | } |
1222 | unlock: | ||
1223 | rcu_read_unlock(); | ||
1201 | return cpu; | 1224 | return cpu; |
1202 | } | 1225 | } |
1203 | /* | 1226 | /* |
@@ -1307,15 +1330,15 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1307 | { | 1330 | { |
1308 | u64 tmp; | 1331 | u64 tmp; |
1309 | 1332 | ||
1333 | tmp = (u64)delta_exec * weight; | ||
1334 | |||
1310 | if (!lw->inv_weight) { | 1335 | if (!lw->inv_weight) { |
1311 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) | 1336 | if (BITS_PER_LONG > 32 && unlikely(lw->weight >= WMULT_CONST)) |
1312 | lw->inv_weight = 1; | 1337 | lw->inv_weight = 1; |
1313 | else | 1338 | else |
1314 | lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2) | 1339 | lw->inv_weight = WMULT_CONST / lw->weight; |
1315 | / (lw->weight+1); | ||
1316 | } | 1340 | } |
1317 | 1341 | ||
1318 | tmp = (u64)delta_exec * weight; | ||
1319 | /* | 1342 | /* |
1320 | * Check whether we'd overflow the 64-bit multiplication: | 1343 | * Check whether we'd overflow the 64-bit multiplication: |
1321 | */ | 1344 | */ |
@@ -1773,7 +1796,6 @@ static void enqueue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1773 | update_rq_clock(rq); | 1796 | update_rq_clock(rq); |
1774 | sched_info_queued(p); | 1797 | sched_info_queued(p); |
1775 | p->sched_class->enqueue_task(rq, p, flags); | 1798 | p->sched_class->enqueue_task(rq, p, flags); |
1776 | p->se.on_rq = 1; | ||
1777 | } | 1799 | } |
1778 | 1800 | ||
1779 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | 1801 | static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) |
@@ -1781,7 +1803,6 @@ static void dequeue_task(struct rq *rq, struct task_struct *p, int flags) | |||
1781 | update_rq_clock(rq); | 1803 | update_rq_clock(rq); |
1782 | sched_info_dequeued(p); | 1804 | sched_info_dequeued(p); |
1783 | p->sched_class->dequeue_task(rq, p, flags); | 1805 | p->sched_class->dequeue_task(rq, p, flags); |
1784 | p->se.on_rq = 0; | ||
1785 | } | 1806 | } |
1786 | 1807 | ||
1787 | /* | 1808 | /* |
@@ -2116,7 +2137,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | |||
2116 | * A queue event has occurred, and we're going to schedule. In | 2137 | * A queue event has occurred, and we're going to schedule. In |
2117 | * this case, we can save a useless back to back clock update. | 2138 | * this case, we can save a useless back to back clock update. |
2118 | */ | 2139 | */ |
2119 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | 2140 | if (rq->curr->on_rq && test_tsk_need_resched(rq->curr)) |
2120 | rq->skip_clock_update = 1; | 2141 | rq->skip_clock_update = 1; |
2121 | } | 2142 | } |
2122 | 2143 | ||
@@ -2162,6 +2183,11 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
2162 | */ | 2183 | */ |
2163 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | 2184 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && |
2164 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2185 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2186 | |||
2187 | #ifdef CONFIG_LOCKDEP | ||
2188 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | ||
2189 | lockdep_is_held(&task_rq(p)->lock))); | ||
2190 | #endif | ||
2165 | #endif | 2191 | #endif |
2166 | 2192 | ||
2167 | trace_sched_migrate_task(p, new_cpu); | 2193 | trace_sched_migrate_task(p, new_cpu); |
@@ -2182,19 +2208,6 @@ struct migration_arg { | |||
2182 | static int migration_cpu_stop(void *data); | 2208 | static int migration_cpu_stop(void *data); |
2183 | 2209 | ||
2184 | /* | 2210 | /* |
2185 | * The task's runqueue lock must be held. | ||
2186 | * Returns true if you have to wait for migration thread. | ||
2187 | */ | ||
2188 | static bool migrate_task(struct task_struct *p, struct rq *rq) | ||
2189 | { | ||
2190 | /* | ||
2191 | * If the task is not on a runqueue (and not running), then | ||
2192 | * the next wake-up will properly place the task. | ||
2193 | */ | ||
2194 | return p->se.on_rq || task_running(rq, p); | ||
2195 | } | ||
2196 | |||
2197 | /* | ||
2198 | * wait_task_inactive - wait for a thread to unschedule. | 2211 | * wait_task_inactive - wait for a thread to unschedule. |
2199 | * | 2212 | * |
2200 | * If @match_state is nonzero, it's the @p->state value just checked and | 2213 | * If @match_state is nonzero, it's the @p->state value just checked and |
@@ -2251,11 +2264,11 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state) | |||
2251 | rq = task_rq_lock(p, &flags); | 2264 | rq = task_rq_lock(p, &flags); |
2252 | trace_sched_wait_task(p); | 2265 | trace_sched_wait_task(p); |
2253 | running = task_running(rq, p); | 2266 | running = task_running(rq, p); |
2254 | on_rq = p->se.on_rq; | 2267 | on_rq = p->on_rq; |
2255 | ncsw = 0; | 2268 | ncsw = 0; |
2256 | if (!match_state || p->state == match_state) | 2269 | if (!match_state || p->state == match_state) |
2257 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ | 2270 | ncsw = p->nvcsw | LONG_MIN; /* sets MSB */ |
2258 | task_rq_unlock(rq, &flags); | 2271 | task_rq_unlock(rq, p, &flags); |
2259 | 2272 | ||
2260 | /* | 2273 | /* |
2261 | * If it changed from the expected state, bail out now. | 2274 | * If it changed from the expected state, bail out now. |
@@ -2330,7 +2343,7 @@ EXPORT_SYMBOL_GPL(kick_process); | |||
2330 | 2343 | ||
2331 | #ifdef CONFIG_SMP | 2344 | #ifdef CONFIG_SMP |
2332 | /* | 2345 | /* |
2333 | * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held. | 2346 | * ->cpus_allowed is protected by both rq->lock and p->pi_lock |
2334 | */ | 2347 | */ |
2335 | static int select_fallback_rq(int cpu, struct task_struct *p) | 2348 | static int select_fallback_rq(int cpu, struct task_struct *p) |
2336 | { | 2349 | { |
@@ -2363,12 +2376,12 @@ static int select_fallback_rq(int cpu, struct task_struct *p) | |||
2363 | } | 2376 | } |
2364 | 2377 | ||
2365 | /* | 2378 | /* |
2366 | * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable. | 2379 | * The caller (fork, wakeup) owns p->pi_lock, ->cpus_allowed is stable. |
2367 | */ | 2380 | */ |
2368 | static inline | 2381 | static inline |
2369 | int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags) | 2382 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) |
2370 | { | 2383 | { |
2371 | int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags); | 2384 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); |
2372 | 2385 | ||
2373 | /* | 2386 | /* |
2374 | * In order not to call set_task_cpu() on a blocking task we need | 2387 | * In order not to call set_task_cpu() on a blocking task we need |
@@ -2394,27 +2407,62 @@ static void update_avg(u64 *avg, u64 sample) | |||
2394 | } | 2407 | } |
2395 | #endif | 2408 | #endif |
2396 | 2409 | ||
2397 | static inline void ttwu_activate(struct task_struct *p, struct rq *rq, | 2410 | static void |
2398 | bool is_sync, bool is_migrate, bool is_local, | 2411 | ttwu_stat(struct task_struct *p, int cpu, int wake_flags) |
2399 | unsigned long en_flags) | ||
2400 | { | 2412 | { |
2413 | #ifdef CONFIG_SCHEDSTATS | ||
2414 | struct rq *rq = this_rq(); | ||
2415 | |||
2416 | #ifdef CONFIG_SMP | ||
2417 | int this_cpu = smp_processor_id(); | ||
2418 | |||
2419 | if (cpu == this_cpu) { | ||
2420 | schedstat_inc(rq, ttwu_local); | ||
2421 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2422 | } else { | ||
2423 | struct sched_domain *sd; | ||
2424 | |||
2425 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2426 | rcu_read_lock(); | ||
2427 | for_each_domain(this_cpu, sd) { | ||
2428 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2429 | schedstat_inc(sd, ttwu_wake_remote); | ||
2430 | break; | ||
2431 | } | ||
2432 | } | ||
2433 | rcu_read_unlock(); | ||
2434 | } | ||
2435 | #endif /* CONFIG_SMP */ | ||
2436 | |||
2437 | schedstat_inc(rq, ttwu_count); | ||
2401 | schedstat_inc(p, se.statistics.nr_wakeups); | 2438 | schedstat_inc(p, se.statistics.nr_wakeups); |
2402 | if (is_sync) | 2439 | |
2440 | if (wake_flags & WF_SYNC) | ||
2403 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2441 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
2404 | if (is_migrate) | 2442 | |
2443 | if (cpu != task_cpu(p)) | ||
2405 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | 2444 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); |
2406 | if (is_local) | ||
2407 | schedstat_inc(p, se.statistics.nr_wakeups_local); | ||
2408 | else | ||
2409 | schedstat_inc(p, se.statistics.nr_wakeups_remote); | ||
2410 | 2445 | ||
2446 | #endif /* CONFIG_SCHEDSTATS */ | ||
2447 | } | ||
2448 | |||
2449 | static void ttwu_activate(struct rq *rq, struct task_struct *p, int en_flags) | ||
2450 | { | ||
2411 | activate_task(rq, p, en_flags); | 2451 | activate_task(rq, p, en_flags); |
2452 | p->on_rq = 1; | ||
2453 | |||
2454 | /* if a worker is waking up, notify workqueue */ | ||
2455 | if (p->flags & PF_WQ_WORKER) | ||
2456 | wq_worker_waking_up(p, cpu_of(rq)); | ||
2412 | } | 2457 | } |
2413 | 2458 | ||
2414 | static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | 2459 | /* |
2415 | int wake_flags, bool success) | 2460 | * Mark the task runnable and perform wakeup-preemption. |
2461 | */ | ||
2462 | static void | ||
2463 | ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2416 | { | 2464 | { |
2417 | trace_sched_wakeup(p, success); | 2465 | trace_sched_wakeup(p, true); |
2418 | check_preempt_curr(rq, p, wake_flags); | 2466 | check_preempt_curr(rq, p, wake_flags); |
2419 | 2467 | ||
2420 | p->state = TASK_RUNNING; | 2468 | p->state = TASK_RUNNING; |
@@ -2433,9 +2481,99 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2433 | rq->idle_stamp = 0; | 2481 | rq->idle_stamp = 0; |
2434 | } | 2482 | } |
2435 | #endif | 2483 | #endif |
2436 | /* if a worker is waking up, notify workqueue */ | 2484 | } |
2437 | if ((p->flags & PF_WQ_WORKER) && success) | 2485 | |
2438 | wq_worker_waking_up(p, cpu_of(rq)); | 2486 | static void |
2487 | ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) | ||
2488 | { | ||
2489 | #ifdef CONFIG_SMP | ||
2490 | if (p->sched_contributes_to_load) | ||
2491 | rq->nr_uninterruptible--; | ||
2492 | #endif | ||
2493 | |||
2494 | ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); | ||
2495 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2496 | } | ||
2497 | |||
2498 | /* | ||
2499 | * Called in case the task @p isn't fully descheduled from its runqueue, | ||
2500 | * in this case we must do a remote wakeup. Its a 'light' wakeup though, | ||
2501 | * since all we need to do is flip p->state to TASK_RUNNING, since | ||
2502 | * the task is still ->on_rq. | ||
2503 | */ | ||
2504 | static int ttwu_remote(struct task_struct *p, int wake_flags) | ||
2505 | { | ||
2506 | struct rq *rq; | ||
2507 | int ret = 0; | ||
2508 | |||
2509 | rq = __task_rq_lock(p); | ||
2510 | if (p->on_rq) { | ||
2511 | ttwu_do_wakeup(rq, p, wake_flags); | ||
2512 | ret = 1; | ||
2513 | } | ||
2514 | __task_rq_unlock(rq); | ||
2515 | |||
2516 | return ret; | ||
2517 | } | ||
2518 | |||
2519 | #ifdef CONFIG_SMP | ||
2520 | static void sched_ttwu_pending(void) | ||
2521 | { | ||
2522 | struct rq *rq = this_rq(); | ||
2523 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
2524 | |||
2525 | if (!list) | ||
2526 | return; | ||
2527 | |||
2528 | raw_spin_lock(&rq->lock); | ||
2529 | |||
2530 | while (list) { | ||
2531 | struct task_struct *p = list; | ||
2532 | list = list->wake_entry; | ||
2533 | ttwu_do_activate(rq, p, 0); | ||
2534 | } | ||
2535 | |||
2536 | raw_spin_unlock(&rq->lock); | ||
2537 | } | ||
2538 | |||
2539 | void scheduler_ipi(void) | ||
2540 | { | ||
2541 | sched_ttwu_pending(); | ||
2542 | } | ||
2543 | |||
2544 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | ||
2545 | { | ||
2546 | struct rq *rq = cpu_rq(cpu); | ||
2547 | struct task_struct *next = rq->wake_list; | ||
2548 | |||
2549 | for (;;) { | ||
2550 | struct task_struct *old = next; | ||
2551 | |||
2552 | p->wake_entry = next; | ||
2553 | next = cmpxchg(&rq->wake_list, old, p); | ||
2554 | if (next == old) | ||
2555 | break; | ||
2556 | } | ||
2557 | |||
2558 | if (!next) | ||
2559 | smp_send_reschedule(cpu); | ||
2560 | } | ||
2561 | #endif | ||
2562 | |||
2563 | static void ttwu_queue(struct task_struct *p, int cpu) | ||
2564 | { | ||
2565 | struct rq *rq = cpu_rq(cpu); | ||
2566 | |||
2567 | #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_TTWU_QUEUE) | ||
2568 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | ||
2569 | ttwu_queue_remote(p, cpu); | ||
2570 | return; | ||
2571 | } | ||
2572 | #endif | ||
2573 | |||
2574 | raw_spin_lock(&rq->lock); | ||
2575 | ttwu_do_activate(rq, p, 0); | ||
2576 | raw_spin_unlock(&rq->lock); | ||
2439 | } | 2577 | } |
2440 | 2578 | ||
2441 | /** | 2579 | /** |
@@ -2453,92 +2591,64 @@ static inline void ttwu_post_activation(struct task_struct *p, struct rq *rq, | |||
2453 | * Returns %true if @p was woken up, %false if it was already running | 2591 | * Returns %true if @p was woken up, %false if it was already running |
2454 | * or @state didn't match @p's state. | 2592 | * or @state didn't match @p's state. |
2455 | */ | 2593 | */ |
2456 | static int try_to_wake_up(struct task_struct *p, unsigned int state, | 2594 | static int |
2457 | int wake_flags) | 2595 | try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) |
2458 | { | 2596 | { |
2459 | int cpu, orig_cpu, this_cpu, success = 0; | ||
2460 | unsigned long flags; | 2597 | unsigned long flags; |
2461 | unsigned long en_flags = ENQUEUE_WAKEUP; | 2598 | int cpu, success = 0; |
2462 | struct rq *rq; | ||
2463 | |||
2464 | this_cpu = get_cpu(); | ||
2465 | 2599 | ||
2466 | smp_wmb(); | 2600 | smp_wmb(); |
2467 | rq = task_rq_lock(p, &flags); | 2601 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2468 | if (!(p->state & state)) | 2602 | if (!(p->state & state)) |
2469 | goto out; | 2603 | goto out; |
2470 | 2604 | ||
2471 | if (p->se.on_rq) | 2605 | success = 1; /* we're going to change ->state */ |
2472 | goto out_running; | ||
2473 | |||
2474 | cpu = task_cpu(p); | 2606 | cpu = task_cpu(p); |
2475 | orig_cpu = cpu; | ||
2476 | 2607 | ||
2477 | #ifdef CONFIG_SMP | 2608 | if (p->on_rq && ttwu_remote(p, wake_flags)) |
2478 | if (unlikely(task_running(rq, p))) | 2609 | goto stat; |
2479 | goto out_activate; | ||
2480 | 2610 | ||
2611 | #ifdef CONFIG_SMP | ||
2481 | /* | 2612 | /* |
2482 | * In order to handle concurrent wakeups and release the rq->lock | 2613 | * If the owning (remote) cpu is still in the middle of schedule() with |
2483 | * we put the task in TASK_WAKING state. | 2614 | * this task as prev, wait until its done referencing the task. |
2484 | * | ||
2485 | * First fix up the nr_uninterruptible count: | ||
2486 | */ | 2615 | */ |
2487 | if (task_contributes_to_load(p)) { | 2616 | while (p->on_cpu) { |
2488 | if (likely(cpu_online(orig_cpu))) | 2617 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
2489 | rq->nr_uninterruptible--; | 2618 | /* |
2490 | else | 2619 | * If called from interrupt context we could have landed in the |
2491 | this_rq()->nr_uninterruptible--; | 2620 | * middle of schedule(), in this case we should take care not |
2492 | } | 2621 | * to spin on ->on_cpu if p is current, since that would |
2493 | p->state = TASK_WAKING; | 2622 | * deadlock. |
2494 | 2623 | */ | |
2495 | if (p->sched_class->task_waking) { | 2624 | if (p == current) { |
2496 | p->sched_class->task_waking(rq, p); | 2625 | ttwu_queue(p, cpu); |
2497 | en_flags |= ENQUEUE_WAKING; | 2626 | goto stat; |
2627 | } | ||
2628 | #endif | ||
2629 | cpu_relax(); | ||
2498 | } | 2630 | } |
2499 | |||
2500 | cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags); | ||
2501 | if (cpu != orig_cpu) | ||
2502 | set_task_cpu(p, cpu); | ||
2503 | __task_rq_unlock(rq); | ||
2504 | |||
2505 | rq = cpu_rq(cpu); | ||
2506 | raw_spin_lock(&rq->lock); | ||
2507 | |||
2508 | /* | 2631 | /* |
2509 | * We migrated the task without holding either rq->lock, however | 2632 | * Pairs with the smp_wmb() in finish_lock_switch(). |
2510 | * since the task is not on the task list itself, nobody else | ||
2511 | * will try and migrate the task, hence the rq should match the | ||
2512 | * cpu we just moved it to. | ||
2513 | */ | 2633 | */ |
2514 | WARN_ON(task_cpu(p) != cpu); | 2634 | smp_rmb(); |
2515 | WARN_ON(p->state != TASK_WAKING); | ||
2516 | 2635 | ||
2517 | #ifdef CONFIG_SCHEDSTATS | 2636 | p->sched_contributes_to_load = !!task_contributes_to_load(p); |
2518 | schedstat_inc(rq, ttwu_count); | 2637 | p->state = TASK_WAKING; |
2519 | if (cpu == this_cpu) | 2638 | |
2520 | schedstat_inc(rq, ttwu_local); | 2639 | if (p->sched_class->task_waking) |
2521 | else { | 2640 | p->sched_class->task_waking(p); |
2522 | struct sched_domain *sd; | ||
2523 | for_each_domain(this_cpu, sd) { | ||
2524 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2525 | schedstat_inc(sd, ttwu_wake_remote); | ||
2526 | break; | ||
2527 | } | ||
2528 | } | ||
2529 | } | ||
2530 | #endif /* CONFIG_SCHEDSTATS */ | ||
2531 | 2641 | ||
2532 | out_activate: | 2642 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2643 | if (task_cpu(p) != cpu) | ||
2644 | set_task_cpu(p, cpu); | ||
2533 | #endif /* CONFIG_SMP */ | 2645 | #endif /* CONFIG_SMP */ |
2534 | ttwu_activate(p, rq, wake_flags & WF_SYNC, orig_cpu != cpu, | 2646 | |
2535 | cpu == this_cpu, en_flags); | 2647 | ttwu_queue(p, cpu); |
2536 | success = 1; | 2648 | stat: |
2537 | out_running: | 2649 | ttwu_stat(p, cpu, wake_flags); |
2538 | ttwu_post_activation(p, rq, wake_flags, success); | ||
2539 | out: | 2650 | out: |
2540 | task_rq_unlock(rq, &flags); | 2651 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2541 | put_cpu(); | ||
2542 | 2652 | ||
2543 | return success; | 2653 | return success; |
2544 | } | 2654 | } |
@@ -2547,31 +2657,34 @@ out: | |||
2547 | * try_to_wake_up_local - try to wake up a local task with rq lock held | 2657 | * try_to_wake_up_local - try to wake up a local task with rq lock held |
2548 | * @p: the thread to be awakened | 2658 | * @p: the thread to be awakened |
2549 | * | 2659 | * |
2550 | * Put @p on the run-queue if it's not already there. The caller must | 2660 | * Put @p on the run-queue if it's not already there. The caller must |
2551 | * ensure that this_rq() is locked, @p is bound to this_rq() and not | 2661 | * ensure that this_rq() is locked, @p is bound to this_rq() and not |
2552 | * the current task. this_rq() stays locked over invocation. | 2662 | * the current task. |
2553 | */ | 2663 | */ |
2554 | static void try_to_wake_up_local(struct task_struct *p) | 2664 | static void try_to_wake_up_local(struct task_struct *p) |
2555 | { | 2665 | { |
2556 | struct rq *rq = task_rq(p); | 2666 | struct rq *rq = task_rq(p); |
2557 | bool success = false; | ||
2558 | 2667 | ||
2559 | BUG_ON(rq != this_rq()); | 2668 | BUG_ON(rq != this_rq()); |
2560 | BUG_ON(p == current); | 2669 | BUG_ON(p == current); |
2561 | lockdep_assert_held(&rq->lock); | 2670 | lockdep_assert_held(&rq->lock); |
2562 | 2671 | ||
2672 | if (!raw_spin_trylock(&p->pi_lock)) { | ||
2673 | raw_spin_unlock(&rq->lock); | ||
2674 | raw_spin_lock(&p->pi_lock); | ||
2675 | raw_spin_lock(&rq->lock); | ||
2676 | } | ||
2677 | |||
2563 | if (!(p->state & TASK_NORMAL)) | 2678 | if (!(p->state & TASK_NORMAL)) |
2564 | return; | 2679 | goto out; |
2565 | 2680 | ||
2566 | if (!p->se.on_rq) { | 2681 | if (!p->on_rq) |
2567 | if (likely(!task_running(rq, p))) { | 2682 | ttwu_activate(rq, p, ENQUEUE_WAKEUP); |
2568 | schedstat_inc(rq, ttwu_count); | 2683 | |
2569 | schedstat_inc(rq, ttwu_local); | 2684 | ttwu_do_wakeup(rq, p, 0); |
2570 | } | 2685 | ttwu_stat(p, smp_processor_id(), 0); |
2571 | ttwu_activate(p, rq, false, false, true, ENQUEUE_WAKEUP); | 2686 | out: |
2572 | success = true; | 2687 | raw_spin_unlock(&p->pi_lock); |
2573 | } | ||
2574 | ttwu_post_activation(p, rq, 0, success); | ||
2575 | } | 2688 | } |
2576 | 2689 | ||
2577 | /** | 2690 | /** |
@@ -2604,19 +2717,21 @@ int wake_up_state(struct task_struct *p, unsigned int state) | |||
2604 | */ | 2717 | */ |
2605 | static void __sched_fork(struct task_struct *p) | 2718 | static void __sched_fork(struct task_struct *p) |
2606 | { | 2719 | { |
2720 | p->on_rq = 0; | ||
2721 | |||
2722 | p->se.on_rq = 0; | ||
2607 | p->se.exec_start = 0; | 2723 | p->se.exec_start = 0; |
2608 | p->se.sum_exec_runtime = 0; | 2724 | p->se.sum_exec_runtime = 0; |
2609 | p->se.prev_sum_exec_runtime = 0; | 2725 | p->se.prev_sum_exec_runtime = 0; |
2610 | p->se.nr_migrations = 0; | 2726 | p->se.nr_migrations = 0; |
2611 | p->se.vruntime = 0; | 2727 | p->se.vruntime = 0; |
2728 | INIT_LIST_HEAD(&p->se.group_node); | ||
2612 | 2729 | ||
2613 | #ifdef CONFIG_SCHEDSTATS | 2730 | #ifdef CONFIG_SCHEDSTATS |
2614 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); | 2731 | memset(&p->se.statistics, 0, sizeof(p->se.statistics)); |
2615 | #endif | 2732 | #endif |
2616 | 2733 | ||
2617 | INIT_LIST_HEAD(&p->rt.run_list); | 2734 | INIT_LIST_HEAD(&p->rt.run_list); |
2618 | p->se.on_rq = 0; | ||
2619 | INIT_LIST_HEAD(&p->se.group_node); | ||
2620 | 2735 | ||
2621 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2736 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2622 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2737 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
@@ -2626,8 +2741,9 @@ static void __sched_fork(struct task_struct *p) | |||
2626 | /* | 2741 | /* |
2627 | * fork()/clone()-time setup: | 2742 | * fork()/clone()-time setup: |
2628 | */ | 2743 | */ |
2629 | void sched_fork(struct task_struct *p, int clone_flags) | 2744 | void sched_fork(struct task_struct *p) |
2630 | { | 2745 | { |
2746 | unsigned long flags; | ||
2631 | int cpu = get_cpu(); | 2747 | int cpu = get_cpu(); |
2632 | 2748 | ||
2633 | __sched_fork(p); | 2749 | __sched_fork(p); |
@@ -2678,16 +2794,16 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2678 | * | 2794 | * |
2679 | * Silence PROVE_RCU. | 2795 | * Silence PROVE_RCU. |
2680 | */ | 2796 | */ |
2681 | rcu_read_lock(); | 2797 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
2682 | set_task_cpu(p, cpu); | 2798 | set_task_cpu(p, cpu); |
2683 | rcu_read_unlock(); | 2799 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
2684 | 2800 | ||
2685 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2801 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2686 | if (likely(sched_info_on())) | 2802 | if (likely(sched_info_on())) |
2687 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2803 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
2688 | #endif | 2804 | #endif |
2689 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 2805 | #if defined(CONFIG_SMP) |
2690 | p->oncpu = 0; | 2806 | p->on_cpu = 0; |
2691 | #endif | 2807 | #endif |
2692 | #ifdef CONFIG_PREEMPT | 2808 | #ifdef CONFIG_PREEMPT |
2693 | /* Want to start with kernel preemption disabled. */ | 2809 | /* Want to start with kernel preemption disabled. */ |
@@ -2707,41 +2823,31 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2707 | * that must be done for every newly created context, then puts the task | 2823 | * that must be done for every newly created context, then puts the task |
2708 | * on the runqueue and wakes it. | 2824 | * on the runqueue and wakes it. |
2709 | */ | 2825 | */ |
2710 | void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | 2826 | void wake_up_new_task(struct task_struct *p) |
2711 | { | 2827 | { |
2712 | unsigned long flags; | 2828 | unsigned long flags; |
2713 | struct rq *rq; | 2829 | struct rq *rq; |
2714 | int cpu __maybe_unused = get_cpu(); | ||
2715 | 2830 | ||
2831 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
2716 | #ifdef CONFIG_SMP | 2832 | #ifdef CONFIG_SMP |
2717 | rq = task_rq_lock(p, &flags); | ||
2718 | p->state = TASK_WAKING; | ||
2719 | |||
2720 | /* | 2833 | /* |
2721 | * Fork balancing, do it here and not earlier because: | 2834 | * Fork balancing, do it here and not earlier because: |
2722 | * - cpus_allowed can change in the fork path | 2835 | * - cpus_allowed can change in the fork path |
2723 | * - any previously selected cpu might disappear through hotplug | 2836 | * - any previously selected cpu might disappear through hotplug |
2724 | * | ||
2725 | * We set TASK_WAKING so that select_task_rq() can drop rq->lock | ||
2726 | * without people poking at ->cpus_allowed. | ||
2727 | */ | 2837 | */ |
2728 | cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0); | 2838 | set_task_cpu(p, select_task_rq(p, SD_BALANCE_FORK, 0)); |
2729 | set_task_cpu(p, cpu); | ||
2730 | |||
2731 | p->state = TASK_RUNNING; | ||
2732 | task_rq_unlock(rq, &flags); | ||
2733 | #endif | 2839 | #endif |
2734 | 2840 | ||
2735 | rq = task_rq_lock(p, &flags); | 2841 | rq = __task_rq_lock(p); |
2736 | activate_task(rq, p, 0); | 2842 | activate_task(rq, p, 0); |
2737 | trace_sched_wakeup_new(p, 1); | 2843 | p->on_rq = 1; |
2844 | trace_sched_wakeup_new(p, true); | ||
2738 | check_preempt_curr(rq, p, WF_FORK); | 2845 | check_preempt_curr(rq, p, WF_FORK); |
2739 | #ifdef CONFIG_SMP | 2846 | #ifdef CONFIG_SMP |
2740 | if (p->sched_class->task_woken) | 2847 | if (p->sched_class->task_woken) |
2741 | p->sched_class->task_woken(rq, p); | 2848 | p->sched_class->task_woken(rq, p); |
2742 | #endif | 2849 | #endif |
2743 | task_rq_unlock(rq, &flags); | 2850 | task_rq_unlock(rq, p, &flags); |
2744 | put_cpu(); | ||
2745 | } | 2851 | } |
2746 | 2852 | ||
2747 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2853 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
@@ -3450,27 +3556,22 @@ void sched_exec(void) | |||
3450 | { | 3556 | { |
3451 | struct task_struct *p = current; | 3557 | struct task_struct *p = current; |
3452 | unsigned long flags; | 3558 | unsigned long flags; |
3453 | struct rq *rq; | ||
3454 | int dest_cpu; | 3559 | int dest_cpu; |
3455 | 3560 | ||
3456 | rq = task_rq_lock(p, &flags); | 3561 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
3457 | dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0); | 3562 | dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0); |
3458 | if (dest_cpu == smp_processor_id()) | 3563 | if (dest_cpu == smp_processor_id()) |
3459 | goto unlock; | 3564 | goto unlock; |
3460 | 3565 | ||
3461 | /* | 3566 | if (likely(cpu_active(dest_cpu))) { |
3462 | * select_task_rq() can race against ->cpus_allowed | ||
3463 | */ | ||
3464 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) && | ||
3465 | likely(cpu_active(dest_cpu)) && migrate_task(p, rq)) { | ||
3466 | struct migration_arg arg = { p, dest_cpu }; | 3567 | struct migration_arg arg = { p, dest_cpu }; |
3467 | 3568 | ||
3468 | task_rq_unlock(rq, &flags); | 3569 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3469 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 3570 | stop_one_cpu(task_cpu(p), migration_cpu_stop, &arg); |
3470 | return; | 3571 | return; |
3471 | } | 3572 | } |
3472 | unlock: | 3573 | unlock: |
3473 | task_rq_unlock(rq, &flags); | 3574 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
3474 | } | 3575 | } |
3475 | 3576 | ||
3476 | #endif | 3577 | #endif |
@@ -3507,7 +3608,7 @@ unsigned long long task_delta_exec(struct task_struct *p) | |||
3507 | 3608 | ||
3508 | rq = task_rq_lock(p, &flags); | 3609 | rq = task_rq_lock(p, &flags); |
3509 | ns = do_task_delta_exec(p, rq); | 3610 | ns = do_task_delta_exec(p, rq); |
3510 | task_rq_unlock(rq, &flags); | 3611 | task_rq_unlock(rq, p, &flags); |
3511 | 3612 | ||
3512 | return ns; | 3613 | return ns; |
3513 | } | 3614 | } |
@@ -3525,7 +3626,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
3525 | 3626 | ||
3526 | rq = task_rq_lock(p, &flags); | 3627 | rq = task_rq_lock(p, &flags); |
3527 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 3628 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); |
3528 | task_rq_unlock(rq, &flags); | 3629 | task_rq_unlock(rq, p, &flags); |
3529 | 3630 | ||
3530 | return ns; | 3631 | return ns; |
3531 | } | 3632 | } |
@@ -3549,7 +3650,7 @@ unsigned long long thread_group_sched_runtime(struct task_struct *p) | |||
3549 | rq = task_rq_lock(p, &flags); | 3650 | rq = task_rq_lock(p, &flags); |
3550 | thread_group_cputime(p, &totals); | 3651 | thread_group_cputime(p, &totals); |
3551 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); | 3652 | ns = totals.sum_exec_runtime + do_task_delta_exec(p, rq); |
3552 | task_rq_unlock(rq, &flags); | 3653 | task_rq_unlock(rq, p, &flags); |
3553 | 3654 | ||
3554 | return ns; | 3655 | return ns; |
3555 | } | 3656 | } |
@@ -3903,9 +4004,6 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | |||
3903 | /* | 4004 | /* |
3904 | * This function gets called by the timer code, with HZ frequency. | 4005 | * This function gets called by the timer code, with HZ frequency. |
3905 | * We call it with interrupts disabled. | 4006 | * We call it with interrupts disabled. |
3906 | * | ||
3907 | * It also gets called by the fork code, when changing the parent's | ||
3908 | * timeslices. | ||
3909 | */ | 4007 | */ |
3910 | void scheduler_tick(void) | 4008 | void scheduler_tick(void) |
3911 | { | 4009 | { |
@@ -4025,17 +4123,11 @@ static inline void schedule_debug(struct task_struct *prev) | |||
4025 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); | 4123 | profile_hit(SCHED_PROFILING, __builtin_return_address(0)); |
4026 | 4124 | ||
4027 | schedstat_inc(this_rq(), sched_count); | 4125 | schedstat_inc(this_rq(), sched_count); |
4028 | #ifdef CONFIG_SCHEDSTATS | ||
4029 | if (unlikely(prev->lock_depth >= 0)) { | ||
4030 | schedstat_inc(this_rq(), rq_sched_info.bkl_count); | ||
4031 | schedstat_inc(prev, sched_info.bkl_count); | ||
4032 | } | ||
4033 | #endif | ||
4034 | } | 4126 | } |
4035 | 4127 | ||
4036 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 4128 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
4037 | { | 4129 | { |
4038 | if (prev->se.on_rq) | 4130 | if (prev->on_rq || rq->skip_clock_update < 0) |
4039 | update_rq_clock(rq); | 4131 | update_rq_clock(rq); |
4040 | prev->sched_class->put_prev_task(rq, prev); | 4132 | prev->sched_class->put_prev_task(rq, prev); |
4041 | } | 4133 | } |
@@ -4097,11 +4189,13 @@ need_resched: | |||
4097 | if (unlikely(signal_pending_state(prev->state, prev))) { | 4189 | if (unlikely(signal_pending_state(prev->state, prev))) { |
4098 | prev->state = TASK_RUNNING; | 4190 | prev->state = TASK_RUNNING; |
4099 | } else { | 4191 | } else { |
4192 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4193 | prev->on_rq = 0; | ||
4194 | |||
4100 | /* | 4195 | /* |
4101 | * If a worker is going to sleep, notify and | 4196 | * If a worker went to sleep, notify and ask workqueue |
4102 | * ask workqueue whether it wants to wake up a | 4197 | * whether it wants to wake up a task to maintain |
4103 | * task to maintain concurrency. If so, wake | 4198 | * concurrency. |
4104 | * up the task. | ||
4105 | */ | 4199 | */ |
4106 | if (prev->flags & PF_WQ_WORKER) { | 4200 | if (prev->flags & PF_WQ_WORKER) { |
4107 | struct task_struct *to_wakeup; | 4201 | struct task_struct *to_wakeup; |
@@ -4110,11 +4204,10 @@ need_resched: | |||
4110 | if (to_wakeup) | 4204 | if (to_wakeup) |
4111 | try_to_wake_up_local(to_wakeup); | 4205 | try_to_wake_up_local(to_wakeup); |
4112 | } | 4206 | } |
4113 | deactivate_task(rq, prev, DEQUEUE_SLEEP); | ||
4114 | 4207 | ||
4115 | /* | 4208 | /* |
4116 | * If we are going to sleep and we have plugged IO queued, make | 4209 | * If we are going to sleep and we have plugged IO |
4117 | * sure to submit it to avoid deadlocks. | 4210 | * queued, make sure to submit it to avoid deadlocks. |
4118 | */ | 4211 | */ |
4119 | if (blk_needs_flush_plug(prev)) { | 4212 | if (blk_needs_flush_plug(prev)) { |
4120 | raw_spin_unlock(&rq->lock); | 4213 | raw_spin_unlock(&rq->lock); |
@@ -4161,70 +4254,53 @@ need_resched: | |||
4161 | EXPORT_SYMBOL(schedule); | 4254 | EXPORT_SYMBOL(schedule); |
4162 | 4255 | ||
4163 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 4256 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
4164 | /* | ||
4165 | * Look out! "owner" is an entirely speculative pointer | ||
4166 | * access and not reliable. | ||
4167 | */ | ||
4168 | int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner) | ||
4169 | { | ||
4170 | unsigned int cpu; | ||
4171 | struct rq *rq; | ||
4172 | 4257 | ||
4173 | if (!sched_feat(OWNER_SPIN)) | 4258 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
4174 | return 0; | 4259 | { |
4260 | bool ret = false; | ||
4175 | 4261 | ||
4176 | #ifdef CONFIG_DEBUG_PAGEALLOC | 4262 | rcu_read_lock(); |
4177 | /* | 4263 | if (lock->owner != owner) |
4178 | * Need to access the cpu field knowing that | 4264 | goto fail; |
4179 | * DEBUG_PAGEALLOC could have unmapped it if | ||
4180 | * the mutex owner just released it and exited. | ||
4181 | */ | ||
4182 | if (probe_kernel_address(&owner->cpu, cpu)) | ||
4183 | return 0; | ||
4184 | #else | ||
4185 | cpu = owner->cpu; | ||
4186 | #endif | ||
4187 | 4265 | ||
4188 | /* | 4266 | /* |
4189 | * Even if the access succeeded (likely case), | 4267 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
4190 | * the cpu field may no longer be valid. | 4268 | * lock->owner still matches owner, if that fails, owner might |
4269 | * point to free()d memory, if it still matches, the rcu_read_lock() | ||
4270 | * ensures the memory stays valid. | ||
4191 | */ | 4271 | */ |
4192 | if (cpu >= nr_cpumask_bits) | 4272 | barrier(); |
4193 | return 0; | ||
4194 | 4273 | ||
4195 | /* | 4274 | ret = owner->on_cpu; |
4196 | * We need to validate that we can do a | 4275 | fail: |
4197 | * get_cpu() and that we have the percpu area. | 4276 | rcu_read_unlock(); |
4198 | */ | ||
4199 | if (!cpu_online(cpu)) | ||
4200 | return 0; | ||
4201 | 4277 | ||
4202 | rq = cpu_rq(cpu); | 4278 | return ret; |
4279 | } | ||
4203 | 4280 | ||
4204 | for (;;) { | 4281 | /* |
4205 | /* | 4282 | * Look out! "owner" is an entirely speculative pointer |
4206 | * Owner changed, break to re-assess state. | 4283 | * access and not reliable. |
4207 | */ | 4284 | */ |
4208 | if (lock->owner != owner) { | 4285 | int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) |
4209 | /* | 4286 | { |
4210 | * If the lock has switched to a different owner, | 4287 | if (!sched_feat(OWNER_SPIN)) |
4211 | * we likely have heavy contention. Return 0 to quit | 4288 | return 0; |
4212 | * optimistic spinning and not contend further: | ||
4213 | */ | ||
4214 | if (lock->owner) | ||
4215 | return 0; | ||
4216 | break; | ||
4217 | } | ||
4218 | 4289 | ||
4219 | /* | 4290 | while (owner_running(lock, owner)) { |
4220 | * Is that owner really running on that cpu? | 4291 | if (need_resched()) |
4221 | */ | ||
4222 | if (task_thread_info(rq->curr) != owner || need_resched()) | ||
4223 | return 0; | 4292 | return 0; |
4224 | 4293 | ||
4225 | arch_mutex_cpu_relax(); | 4294 | arch_mutex_cpu_relax(); |
4226 | } | 4295 | } |
4227 | 4296 | ||
4297 | /* | ||
4298 | * If the owner changed to another task there is likely | ||
4299 | * heavy contention, stop spinning. | ||
4300 | */ | ||
4301 | if (lock->owner) | ||
4302 | return 0; | ||
4303 | |||
4228 | return 1; | 4304 | return 1; |
4229 | } | 4305 | } |
4230 | #endif | 4306 | #endif |
@@ -4684,19 +4760,18 @@ EXPORT_SYMBOL(sleep_on_timeout); | |||
4684 | */ | 4760 | */ |
4685 | void rt_mutex_setprio(struct task_struct *p, int prio) | 4761 | void rt_mutex_setprio(struct task_struct *p, int prio) |
4686 | { | 4762 | { |
4687 | unsigned long flags; | ||
4688 | int oldprio, on_rq, running; | 4763 | int oldprio, on_rq, running; |
4689 | struct rq *rq; | 4764 | struct rq *rq; |
4690 | const struct sched_class *prev_class; | 4765 | const struct sched_class *prev_class; |
4691 | 4766 | ||
4692 | BUG_ON(prio < 0 || prio > MAX_PRIO); | 4767 | BUG_ON(prio < 0 || prio > MAX_PRIO); |
4693 | 4768 | ||
4694 | rq = task_rq_lock(p, &flags); | 4769 | rq = __task_rq_lock(p); |
4695 | 4770 | ||
4696 | trace_sched_pi_setprio(p, prio); | 4771 | trace_sched_pi_setprio(p, prio); |
4697 | oldprio = p->prio; | 4772 | oldprio = p->prio; |
4698 | prev_class = p->sched_class; | 4773 | prev_class = p->sched_class; |
4699 | on_rq = p->se.on_rq; | 4774 | on_rq = p->on_rq; |
4700 | running = task_current(rq, p); | 4775 | running = task_current(rq, p); |
4701 | if (on_rq) | 4776 | if (on_rq) |
4702 | dequeue_task(rq, p, 0); | 4777 | dequeue_task(rq, p, 0); |
@@ -4716,7 +4791,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4716 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); | 4791 | enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0); |
4717 | 4792 | ||
4718 | check_class_changed(rq, p, prev_class, oldprio); | 4793 | check_class_changed(rq, p, prev_class, oldprio); |
4719 | task_rq_unlock(rq, &flags); | 4794 | __task_rq_unlock(rq); |
4720 | } | 4795 | } |
4721 | 4796 | ||
4722 | #endif | 4797 | #endif |
@@ -4744,7 +4819,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4744 | p->static_prio = NICE_TO_PRIO(nice); | 4819 | p->static_prio = NICE_TO_PRIO(nice); |
4745 | goto out_unlock; | 4820 | goto out_unlock; |
4746 | } | 4821 | } |
4747 | on_rq = p->se.on_rq; | 4822 | on_rq = p->on_rq; |
4748 | if (on_rq) | 4823 | if (on_rq) |
4749 | dequeue_task(rq, p, 0); | 4824 | dequeue_task(rq, p, 0); |
4750 | 4825 | ||
@@ -4764,7 +4839,7 @@ void set_user_nice(struct task_struct *p, long nice) | |||
4764 | resched_task(rq->curr); | 4839 | resched_task(rq->curr); |
4765 | } | 4840 | } |
4766 | out_unlock: | 4841 | out_unlock: |
4767 | task_rq_unlock(rq, &flags); | 4842 | task_rq_unlock(rq, p, &flags); |
4768 | } | 4843 | } |
4769 | EXPORT_SYMBOL(set_user_nice); | 4844 | EXPORT_SYMBOL(set_user_nice); |
4770 | 4845 | ||
@@ -4878,8 +4953,6 @@ static struct task_struct *find_process_by_pid(pid_t pid) | |||
4878 | static void | 4953 | static void |
4879 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | 4954 | __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) |
4880 | { | 4955 | { |
4881 | BUG_ON(p->se.on_rq); | ||
4882 | |||
4883 | p->policy = policy; | 4956 | p->policy = policy; |
4884 | p->rt_priority = prio; | 4957 | p->rt_priority = prio; |
4885 | p->normal_prio = normal_prio(p); | 4958 | p->normal_prio = normal_prio(p); |
@@ -4994,20 +5067,17 @@ recheck: | |||
4994 | /* | 5067 | /* |
4995 | * make sure no PI-waiters arrive (or leave) while we are | 5068 | * make sure no PI-waiters arrive (or leave) while we are |
4996 | * changing the priority of the task: | 5069 | * changing the priority of the task: |
4997 | */ | 5070 | * |
4998 | raw_spin_lock_irqsave(&p->pi_lock, flags); | ||
4999 | /* | ||
5000 | * To be able to change p->policy safely, the appropriate | 5071 | * To be able to change p->policy safely, the appropriate |
5001 | * runqueue lock must be held. | 5072 | * runqueue lock must be held. |
5002 | */ | 5073 | */ |
5003 | rq = __task_rq_lock(p); | 5074 | rq = task_rq_lock(p, &flags); |
5004 | 5075 | ||
5005 | /* | 5076 | /* |
5006 | * Changing the policy of the stop threads its a very bad idea | 5077 | * Changing the policy of the stop threads its a very bad idea |
5007 | */ | 5078 | */ |
5008 | if (p == rq->stop) { | 5079 | if (p == rq->stop) { |
5009 | __task_rq_unlock(rq); | 5080 | task_rq_unlock(rq, p, &flags); |
5010 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5011 | return -EINVAL; | 5081 | return -EINVAL; |
5012 | } | 5082 | } |
5013 | 5083 | ||
@@ -5031,8 +5101,7 @@ recheck: | |||
5031 | if (rt_bandwidth_enabled() && rt_policy(policy) && | 5101 | if (rt_bandwidth_enabled() && rt_policy(policy) && |
5032 | task_group(p)->rt_bandwidth.rt_runtime == 0 && | 5102 | task_group(p)->rt_bandwidth.rt_runtime == 0 && |
5033 | !task_group_is_autogroup(task_group(p))) { | 5103 | !task_group_is_autogroup(task_group(p))) { |
5034 | __task_rq_unlock(rq); | 5104 | task_rq_unlock(rq, p, &flags); |
5035 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5036 | return -EPERM; | 5105 | return -EPERM; |
5037 | } | 5106 | } |
5038 | } | 5107 | } |
@@ -5041,11 +5110,10 @@ recheck: | |||
5041 | /* recheck policy now with rq lock held */ | 5110 | /* recheck policy now with rq lock held */ |
5042 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 5111 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
5043 | policy = oldpolicy = -1; | 5112 | policy = oldpolicy = -1; |
5044 | __task_rq_unlock(rq); | 5113 | task_rq_unlock(rq, p, &flags); |
5045 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5046 | goto recheck; | 5114 | goto recheck; |
5047 | } | 5115 | } |
5048 | on_rq = p->se.on_rq; | 5116 | on_rq = p->on_rq; |
5049 | running = task_current(rq, p); | 5117 | running = task_current(rq, p); |
5050 | if (on_rq) | 5118 | if (on_rq) |
5051 | deactivate_task(rq, p, 0); | 5119 | deactivate_task(rq, p, 0); |
@@ -5064,8 +5132,7 @@ recheck: | |||
5064 | activate_task(rq, p, 0); | 5132 | activate_task(rq, p, 0); |
5065 | 5133 | ||
5066 | check_class_changed(rq, p, prev_class, oldprio); | 5134 | check_class_changed(rq, p, prev_class, oldprio); |
5067 | __task_rq_unlock(rq); | 5135 | task_rq_unlock(rq, p, &flags); |
5068 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
5069 | 5136 | ||
5070 | rt_mutex_adjust_pi(p); | 5137 | rt_mutex_adjust_pi(p); |
5071 | 5138 | ||
@@ -5316,7 +5383,6 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5316 | { | 5383 | { |
5317 | struct task_struct *p; | 5384 | struct task_struct *p; |
5318 | unsigned long flags; | 5385 | unsigned long flags; |
5319 | struct rq *rq; | ||
5320 | int retval; | 5386 | int retval; |
5321 | 5387 | ||
5322 | get_online_cpus(); | 5388 | get_online_cpus(); |
@@ -5331,9 +5397,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
5331 | if (retval) | 5397 | if (retval) |
5332 | goto out_unlock; | 5398 | goto out_unlock; |
5333 | 5399 | ||
5334 | rq = task_rq_lock(p, &flags); | 5400 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
5335 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 5401 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
5336 | task_rq_unlock(rq, &flags); | 5402 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
5337 | 5403 | ||
5338 | out_unlock: | 5404 | out_unlock: |
5339 | rcu_read_unlock(); | 5405 | rcu_read_unlock(); |
@@ -5658,7 +5724,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
5658 | 5724 | ||
5659 | rq = task_rq_lock(p, &flags); | 5725 | rq = task_rq_lock(p, &flags); |
5660 | time_slice = p->sched_class->get_rr_interval(rq, p); | 5726 | time_slice = p->sched_class->get_rr_interval(rq, p); |
5661 | task_rq_unlock(rq, &flags); | 5727 | task_rq_unlock(rq, p, &flags); |
5662 | 5728 | ||
5663 | rcu_read_unlock(); | 5729 | rcu_read_unlock(); |
5664 | jiffies_to_timespec(time_slice, &t); | 5730 | jiffies_to_timespec(time_slice, &t); |
@@ -5776,17 +5842,14 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
5776 | rcu_read_unlock(); | 5842 | rcu_read_unlock(); |
5777 | 5843 | ||
5778 | rq->curr = rq->idle = idle; | 5844 | rq->curr = rq->idle = idle; |
5779 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 5845 | #if defined(CONFIG_SMP) |
5780 | idle->oncpu = 1; | 5846 | idle->on_cpu = 1; |
5781 | #endif | 5847 | #endif |
5782 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 5848 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
5783 | 5849 | ||
5784 | /* Set the preempt count _outside_ the spinlocks! */ | 5850 | /* Set the preempt count _outside_ the spinlocks! */ |
5785 | #if defined(CONFIG_PREEMPT) | ||
5786 | task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); | ||
5787 | #else | ||
5788 | task_thread_info(idle)->preempt_count = 0; | 5851 | task_thread_info(idle)->preempt_count = 0; |
5789 | #endif | 5852 | |
5790 | /* | 5853 | /* |
5791 | * The idle tasks have their own, simple scheduling class: | 5854 | * The idle tasks have their own, simple scheduling class: |
5792 | */ | 5855 | */ |
@@ -5881,26 +5944,17 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
5881 | unsigned int dest_cpu; | 5944 | unsigned int dest_cpu; |
5882 | int ret = 0; | 5945 | int ret = 0; |
5883 | 5946 | ||
5884 | /* | ||
5885 | * Serialize against TASK_WAKING so that ttwu() and wunt() can | ||
5886 | * drop the rq->lock and still rely on ->cpus_allowed. | ||
5887 | */ | ||
5888 | again: | ||
5889 | while (task_is_waking(p)) | ||
5890 | cpu_relax(); | ||
5891 | rq = task_rq_lock(p, &flags); | 5947 | rq = task_rq_lock(p, &flags); |
5892 | if (task_is_waking(p)) { | 5948 | |
5893 | task_rq_unlock(rq, &flags); | 5949 | if (cpumask_equal(&p->cpus_allowed, new_mask)) |
5894 | goto again; | 5950 | goto out; |
5895 | } | ||
5896 | 5951 | ||
5897 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 5952 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
5898 | ret = -EINVAL; | 5953 | ret = -EINVAL; |
5899 | goto out; | 5954 | goto out; |
5900 | } | 5955 | } |
5901 | 5956 | ||
5902 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current && | 5957 | if (unlikely((p->flags & PF_THREAD_BOUND) && p != current)) { |
5903 | !cpumask_equal(&p->cpus_allowed, new_mask))) { | ||
5904 | ret = -EINVAL; | 5958 | ret = -EINVAL; |
5905 | goto out; | 5959 | goto out; |
5906 | } | 5960 | } |
@@ -5917,16 +5971,16 @@ again: | |||
5917 | goto out; | 5971 | goto out; |
5918 | 5972 | ||
5919 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); | 5973 | dest_cpu = cpumask_any_and(cpu_active_mask, new_mask); |
5920 | if (migrate_task(p, rq)) { | 5974 | if (p->on_rq) { |
5921 | struct migration_arg arg = { p, dest_cpu }; | 5975 | struct migration_arg arg = { p, dest_cpu }; |
5922 | /* Need help from migration thread: drop lock and wait. */ | 5976 | /* Need help from migration thread: drop lock and wait. */ |
5923 | task_rq_unlock(rq, &flags); | 5977 | task_rq_unlock(rq, p, &flags); |
5924 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); | 5978 | stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); |
5925 | tlb_migrate_finish(p->mm); | 5979 | tlb_migrate_finish(p->mm); |
5926 | return 0; | 5980 | return 0; |
5927 | } | 5981 | } |
5928 | out: | 5982 | out: |
5929 | task_rq_unlock(rq, &flags); | 5983 | task_rq_unlock(rq, p, &flags); |
5930 | 5984 | ||
5931 | return ret; | 5985 | return ret; |
5932 | } | 5986 | } |
@@ -5954,6 +6008,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5954 | rq_src = cpu_rq(src_cpu); | 6008 | rq_src = cpu_rq(src_cpu); |
5955 | rq_dest = cpu_rq(dest_cpu); | 6009 | rq_dest = cpu_rq(dest_cpu); |
5956 | 6010 | ||
6011 | raw_spin_lock(&p->pi_lock); | ||
5957 | double_rq_lock(rq_src, rq_dest); | 6012 | double_rq_lock(rq_src, rq_dest); |
5958 | /* Already moved. */ | 6013 | /* Already moved. */ |
5959 | if (task_cpu(p) != src_cpu) | 6014 | if (task_cpu(p) != src_cpu) |
@@ -5966,7 +6021,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
5966 | * If we're not on a rq, the next wake-up will ensure we're | 6021 | * If we're not on a rq, the next wake-up will ensure we're |
5967 | * placed properly. | 6022 | * placed properly. |
5968 | */ | 6023 | */ |
5969 | if (p->se.on_rq) { | 6024 | if (p->on_rq) { |
5970 | deactivate_task(rq_src, p, 0); | 6025 | deactivate_task(rq_src, p, 0); |
5971 | set_task_cpu(p, dest_cpu); | 6026 | set_task_cpu(p, dest_cpu); |
5972 | activate_task(rq_dest, p, 0); | 6027 | activate_task(rq_dest, p, 0); |
@@ -5976,6 +6031,7 @@ done: | |||
5976 | ret = 1; | 6031 | ret = 1; |
5977 | fail: | 6032 | fail: |
5978 | double_rq_unlock(rq_src, rq_dest); | 6033 | double_rq_unlock(rq_src, rq_dest); |
6034 | raw_spin_unlock(&p->pi_lock); | ||
5979 | return ret; | 6035 | return ret; |
5980 | } | 6036 | } |
5981 | 6037 | ||
@@ -6316,6 +6372,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
6316 | 6372 | ||
6317 | #ifdef CONFIG_HOTPLUG_CPU | 6373 | #ifdef CONFIG_HOTPLUG_CPU |
6318 | case CPU_DYING: | 6374 | case CPU_DYING: |
6375 | sched_ttwu_pending(); | ||
6319 | /* Update our root-domain */ | 6376 | /* Update our root-domain */ |
6320 | raw_spin_lock_irqsave(&rq->lock, flags); | 6377 | raw_spin_lock_irqsave(&rq->lock, flags); |
6321 | if (rq->rd) { | 6378 | if (rq->rd) { |
@@ -6394,6 +6451,8 @@ early_initcall(migration_init); | |||
6394 | 6451 | ||
6395 | #ifdef CONFIG_SMP | 6452 | #ifdef CONFIG_SMP |
6396 | 6453 | ||
6454 | static cpumask_var_t sched_domains_tmpmask; /* sched_domains_mutex */ | ||
6455 | |||
6397 | #ifdef CONFIG_SCHED_DEBUG | 6456 | #ifdef CONFIG_SCHED_DEBUG |
6398 | 6457 | ||
6399 | static __read_mostly int sched_domain_debug_enabled; | 6458 | static __read_mostly int sched_domain_debug_enabled; |
@@ -6489,7 +6548,6 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
6489 | 6548 | ||
6490 | static void sched_domain_debug(struct sched_domain *sd, int cpu) | 6549 | static void sched_domain_debug(struct sched_domain *sd, int cpu) |
6491 | { | 6550 | { |
6492 | cpumask_var_t groupmask; | ||
6493 | int level = 0; | 6551 | int level = 0; |
6494 | 6552 | ||
6495 | if (!sched_domain_debug_enabled) | 6553 | if (!sched_domain_debug_enabled) |
@@ -6502,20 +6560,14 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
6502 | 6560 | ||
6503 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); | 6561 | printk(KERN_DEBUG "CPU%d attaching sched-domain:\n", cpu); |
6504 | 6562 | ||
6505 | if (!alloc_cpumask_var(&groupmask, GFP_KERNEL)) { | ||
6506 | printk(KERN_DEBUG "Cannot load-balance (out of memory)\n"); | ||
6507 | return; | ||
6508 | } | ||
6509 | |||
6510 | for (;;) { | 6563 | for (;;) { |
6511 | if (sched_domain_debug_one(sd, cpu, level, groupmask)) | 6564 | if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask)) |
6512 | break; | 6565 | break; |
6513 | level++; | 6566 | level++; |
6514 | sd = sd->parent; | 6567 | sd = sd->parent; |
6515 | if (!sd) | 6568 | if (!sd) |
6516 | break; | 6569 | break; |
6517 | } | 6570 | } |
6518 | free_cpumask_var(groupmask); | ||
6519 | } | 6571 | } |
6520 | #else /* !CONFIG_SCHED_DEBUG */ | 6572 | #else /* !CONFIG_SCHED_DEBUG */ |
6521 | # define sched_domain_debug(sd, cpu) do { } while (0) | 6573 | # define sched_domain_debug(sd, cpu) do { } while (0) |
@@ -6572,12 +6624,11 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
6572 | return 1; | 6624 | return 1; |
6573 | } | 6625 | } |
6574 | 6626 | ||
6575 | static void free_rootdomain(struct root_domain *rd) | 6627 | static void free_rootdomain(struct rcu_head *rcu) |
6576 | { | 6628 | { |
6577 | synchronize_sched(); | 6629 | struct root_domain *rd = container_of(rcu, struct root_domain, rcu); |
6578 | 6630 | ||
6579 | cpupri_cleanup(&rd->cpupri); | 6631 | cpupri_cleanup(&rd->cpupri); |
6580 | |||
6581 | free_cpumask_var(rd->rto_mask); | 6632 | free_cpumask_var(rd->rto_mask); |
6582 | free_cpumask_var(rd->online); | 6633 | free_cpumask_var(rd->online); |
6583 | free_cpumask_var(rd->span); | 6634 | free_cpumask_var(rd->span); |
@@ -6618,7 +6669,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
6618 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 6669 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6619 | 6670 | ||
6620 | if (old_rd) | 6671 | if (old_rd) |
6621 | free_rootdomain(old_rd); | 6672 | call_rcu_sched(&old_rd->rcu, free_rootdomain); |
6622 | } | 6673 | } |
6623 | 6674 | ||
6624 | static int init_rootdomain(struct root_domain *rd) | 6675 | static int init_rootdomain(struct root_domain *rd) |
@@ -6669,6 +6720,25 @@ static struct root_domain *alloc_rootdomain(void) | |||
6669 | return rd; | 6720 | return rd; |
6670 | } | 6721 | } |
6671 | 6722 | ||
6723 | static void free_sched_domain(struct rcu_head *rcu) | ||
6724 | { | ||
6725 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | ||
6726 | if (atomic_dec_and_test(&sd->groups->ref)) | ||
6727 | kfree(sd->groups); | ||
6728 | kfree(sd); | ||
6729 | } | ||
6730 | |||
6731 | static void destroy_sched_domain(struct sched_domain *sd, int cpu) | ||
6732 | { | ||
6733 | call_rcu(&sd->rcu, free_sched_domain); | ||
6734 | } | ||
6735 | |||
6736 | static void destroy_sched_domains(struct sched_domain *sd, int cpu) | ||
6737 | { | ||
6738 | for (; sd; sd = sd->parent) | ||
6739 | destroy_sched_domain(sd, cpu); | ||
6740 | } | ||
6741 | |||
6672 | /* | 6742 | /* |
6673 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must | 6743 | * Attach the domain 'sd' to 'cpu' as its base domain. Callers must |
6674 | * hold the hotplug lock. | 6744 | * hold the hotplug lock. |
@@ -6679,9 +6749,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
6679 | struct rq *rq = cpu_rq(cpu); | 6749 | struct rq *rq = cpu_rq(cpu); |
6680 | struct sched_domain *tmp; | 6750 | struct sched_domain *tmp; |
6681 | 6751 | ||
6682 | for (tmp = sd; tmp; tmp = tmp->parent) | ||
6683 | tmp->span_weight = cpumask_weight(sched_domain_span(tmp)); | ||
6684 | |||
6685 | /* Remove the sched domains which do not contribute to scheduling. */ | 6752 | /* Remove the sched domains which do not contribute to scheduling. */ |
6686 | for (tmp = sd; tmp; ) { | 6753 | for (tmp = sd; tmp; ) { |
6687 | struct sched_domain *parent = tmp->parent; | 6754 | struct sched_domain *parent = tmp->parent; |
@@ -6692,12 +6759,15 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
6692 | tmp->parent = parent->parent; | 6759 | tmp->parent = parent->parent; |
6693 | if (parent->parent) | 6760 | if (parent->parent) |
6694 | parent->parent->child = tmp; | 6761 | parent->parent->child = tmp; |
6762 | destroy_sched_domain(parent, cpu); | ||
6695 | } else | 6763 | } else |
6696 | tmp = tmp->parent; | 6764 | tmp = tmp->parent; |
6697 | } | 6765 | } |
6698 | 6766 | ||
6699 | if (sd && sd_degenerate(sd)) { | 6767 | if (sd && sd_degenerate(sd)) { |
6768 | tmp = sd; | ||
6700 | sd = sd->parent; | 6769 | sd = sd->parent; |
6770 | destroy_sched_domain(tmp, cpu); | ||
6701 | if (sd) | 6771 | if (sd) |
6702 | sd->child = NULL; | 6772 | sd->child = NULL; |
6703 | } | 6773 | } |
@@ -6705,7 +6775,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) | |||
6705 | sched_domain_debug(sd, cpu); | 6775 | sched_domain_debug(sd, cpu); |
6706 | 6776 | ||
6707 | rq_attach_root(rq, rd); | 6777 | rq_attach_root(rq, rd); |
6778 | tmp = rq->sd; | ||
6708 | rcu_assign_pointer(rq->sd, sd); | 6779 | rcu_assign_pointer(rq->sd, sd); |
6780 | destroy_sched_domains(tmp, cpu); | ||
6709 | } | 6781 | } |
6710 | 6782 | ||
6711 | /* cpus with isolated domains */ | 6783 | /* cpus with isolated domains */ |
@@ -6721,56 +6793,6 @@ static int __init isolated_cpu_setup(char *str) | |||
6721 | 6793 | ||
6722 | __setup("isolcpus=", isolated_cpu_setup); | 6794 | __setup("isolcpus=", isolated_cpu_setup); |
6723 | 6795 | ||
6724 | /* | ||
6725 | * init_sched_build_groups takes the cpumask we wish to span, and a pointer | ||
6726 | * to a function which identifies what group(along with sched group) a CPU | ||
6727 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
6728 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6729 | * | ||
6730 | * init_sched_build_groups will build a circular linked list of the groups | ||
6731 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6732 | * and ->cpu_power to 0. | ||
6733 | */ | ||
6734 | static void | ||
6735 | init_sched_build_groups(const struct cpumask *span, | ||
6736 | const struct cpumask *cpu_map, | ||
6737 | int (*group_fn)(int cpu, const struct cpumask *cpu_map, | ||
6738 | struct sched_group **sg, | ||
6739 | struct cpumask *tmpmask), | ||
6740 | struct cpumask *covered, struct cpumask *tmpmask) | ||
6741 | { | ||
6742 | struct sched_group *first = NULL, *last = NULL; | ||
6743 | int i; | ||
6744 | |||
6745 | cpumask_clear(covered); | ||
6746 | |||
6747 | for_each_cpu(i, span) { | ||
6748 | struct sched_group *sg; | ||
6749 | int group = group_fn(i, cpu_map, &sg, tmpmask); | ||
6750 | int j; | ||
6751 | |||
6752 | if (cpumask_test_cpu(i, covered)) | ||
6753 | continue; | ||
6754 | |||
6755 | cpumask_clear(sched_group_cpus(sg)); | ||
6756 | sg->cpu_power = 0; | ||
6757 | |||
6758 | for_each_cpu(j, span) { | ||
6759 | if (group_fn(j, cpu_map, NULL, tmpmask) != group) | ||
6760 | continue; | ||
6761 | |||
6762 | cpumask_set_cpu(j, covered); | ||
6763 | cpumask_set_cpu(j, sched_group_cpus(sg)); | ||
6764 | } | ||
6765 | if (!first) | ||
6766 | first = sg; | ||
6767 | if (last) | ||
6768 | last->next = sg; | ||
6769 | last = sg; | ||
6770 | } | ||
6771 | last->next = first; | ||
6772 | } | ||
6773 | |||
6774 | #define SD_NODES_PER_DOMAIN 16 | 6796 | #define SD_NODES_PER_DOMAIN 16 |
6775 | 6797 | ||
6776 | #ifdef CONFIG_NUMA | 6798 | #ifdef CONFIG_NUMA |
@@ -6787,7 +6809,7 @@ init_sched_build_groups(const struct cpumask *span, | |||
6787 | */ | 6809 | */ |
6788 | static int find_next_best_node(int node, nodemask_t *used_nodes) | 6810 | static int find_next_best_node(int node, nodemask_t *used_nodes) |
6789 | { | 6811 | { |
6790 | int i, n, val, min_val, best_node = 0; | 6812 | int i, n, val, min_val, best_node = -1; |
6791 | 6813 | ||
6792 | min_val = INT_MAX; | 6814 | min_val = INT_MAX; |
6793 | 6815 | ||
@@ -6811,7 +6833,8 @@ static int find_next_best_node(int node, nodemask_t *used_nodes) | |||
6811 | } | 6833 | } |
6812 | } | 6834 | } |
6813 | 6835 | ||
6814 | node_set(best_node, *used_nodes); | 6836 | if (best_node != -1) |
6837 | node_set(best_node, *used_nodes); | ||
6815 | return best_node; | 6838 | return best_node; |
6816 | } | 6839 | } |
6817 | 6840 | ||
@@ -6837,315 +6860,130 @@ static void sched_domain_node_span(int node, struct cpumask *span) | |||
6837 | 6860 | ||
6838 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | 6861 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { |
6839 | int next_node = find_next_best_node(node, &used_nodes); | 6862 | int next_node = find_next_best_node(node, &used_nodes); |
6840 | 6863 | if (next_node < 0) | |
6864 | break; | ||
6841 | cpumask_or(span, span, cpumask_of_node(next_node)); | 6865 | cpumask_or(span, span, cpumask_of_node(next_node)); |
6842 | } | 6866 | } |
6843 | } | 6867 | } |
6868 | |||
6869 | static const struct cpumask *cpu_node_mask(int cpu) | ||
6870 | { | ||
6871 | lockdep_assert_held(&sched_domains_mutex); | ||
6872 | |||
6873 | sched_domain_node_span(cpu_to_node(cpu), sched_domains_tmpmask); | ||
6874 | |||
6875 | return sched_domains_tmpmask; | ||
6876 | } | ||
6877 | |||
6878 | static const struct cpumask *cpu_allnodes_mask(int cpu) | ||
6879 | { | ||
6880 | return cpu_possible_mask; | ||
6881 | } | ||
6844 | #endif /* CONFIG_NUMA */ | 6882 | #endif /* CONFIG_NUMA */ |
6845 | 6883 | ||
6846 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | 6884 | static const struct cpumask *cpu_cpu_mask(int cpu) |
6885 | { | ||
6886 | return cpumask_of_node(cpu_to_node(cpu)); | ||
6887 | } | ||
6847 | 6888 | ||
6848 | /* | 6889 | int sched_smt_power_savings = 0, sched_mc_power_savings = 0; |
6849 | * The cpus mask in sched_group and sched_domain hangs off the end. | ||
6850 | * | ||
6851 | * ( See the the comments in include/linux/sched.h:struct sched_group | ||
6852 | * and struct sched_domain. ) | ||
6853 | */ | ||
6854 | struct static_sched_group { | ||
6855 | struct sched_group sg; | ||
6856 | DECLARE_BITMAP(cpus, CONFIG_NR_CPUS); | ||
6857 | }; | ||
6858 | 6890 | ||
6859 | struct static_sched_domain { | 6891 | struct sd_data { |
6860 | struct sched_domain sd; | 6892 | struct sched_domain **__percpu sd; |
6861 | DECLARE_BITMAP(span, CONFIG_NR_CPUS); | 6893 | struct sched_group **__percpu sg; |
6862 | }; | 6894 | }; |
6863 | 6895 | ||
6864 | struct s_data { | 6896 | struct s_data { |
6865 | #ifdef CONFIG_NUMA | 6897 | struct sched_domain ** __percpu sd; |
6866 | int sd_allnodes; | ||
6867 | cpumask_var_t domainspan; | ||
6868 | cpumask_var_t covered; | ||
6869 | cpumask_var_t notcovered; | ||
6870 | #endif | ||
6871 | cpumask_var_t nodemask; | ||
6872 | cpumask_var_t this_sibling_map; | ||
6873 | cpumask_var_t this_core_map; | ||
6874 | cpumask_var_t this_book_map; | ||
6875 | cpumask_var_t send_covered; | ||
6876 | cpumask_var_t tmpmask; | ||
6877 | struct sched_group **sched_group_nodes; | ||
6878 | struct root_domain *rd; | 6898 | struct root_domain *rd; |
6879 | }; | 6899 | }; |
6880 | 6900 | ||
6881 | enum s_alloc { | 6901 | enum s_alloc { |
6882 | sa_sched_groups = 0, | ||
6883 | sa_rootdomain, | 6902 | sa_rootdomain, |
6884 | sa_tmpmask, | 6903 | sa_sd, |
6885 | sa_send_covered, | 6904 | sa_sd_storage, |
6886 | sa_this_book_map, | ||
6887 | sa_this_core_map, | ||
6888 | sa_this_sibling_map, | ||
6889 | sa_nodemask, | ||
6890 | sa_sched_group_nodes, | ||
6891 | #ifdef CONFIG_NUMA | ||
6892 | sa_notcovered, | ||
6893 | sa_covered, | ||
6894 | sa_domainspan, | ||
6895 | #endif | ||
6896 | sa_none, | 6905 | sa_none, |
6897 | }; | 6906 | }; |
6898 | 6907 | ||
6899 | /* | 6908 | struct sched_domain_topology_level; |
6900 | * SMT sched-domains: | ||
6901 | */ | ||
6902 | #ifdef CONFIG_SCHED_SMT | ||
6903 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | ||
6904 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); | ||
6905 | 6909 | ||
6906 | static int | 6910 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); |
6907 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | 6911 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); |
6908 | struct sched_group **sg, struct cpumask *unused) | ||
6909 | { | ||
6910 | if (sg) | ||
6911 | *sg = &per_cpu(sched_groups, cpu).sg; | ||
6912 | return cpu; | ||
6913 | } | ||
6914 | #endif /* CONFIG_SCHED_SMT */ | ||
6915 | 6912 | ||
6916 | /* | 6913 | struct sched_domain_topology_level { |
6917 | * multi-core sched-domains: | 6914 | sched_domain_init_f init; |
6918 | */ | 6915 | sched_domain_mask_f mask; |
6919 | #ifdef CONFIG_SCHED_MC | 6916 | struct sd_data data; |
6920 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | 6917 | }; |
6921 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | ||
6922 | |||
6923 | static int | ||
6924 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | ||
6925 | struct sched_group **sg, struct cpumask *mask) | ||
6926 | { | ||
6927 | int group; | ||
6928 | #ifdef CONFIG_SCHED_SMT | ||
6929 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6930 | group = cpumask_first(mask); | ||
6931 | #else | ||
6932 | group = cpu; | ||
6933 | #endif | ||
6934 | if (sg) | ||
6935 | *sg = &per_cpu(sched_group_core, group).sg; | ||
6936 | return group; | ||
6937 | } | ||
6938 | #endif /* CONFIG_SCHED_MC */ | ||
6939 | 6918 | ||
6940 | /* | 6919 | /* |
6941 | * book sched-domains: | 6920 | * Assumes the sched_domain tree is fully constructed |
6942 | */ | 6921 | */ |
6943 | #ifdef CONFIG_SCHED_BOOK | 6922 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
6944 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | ||
6945 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
6946 | |||
6947 | static int | ||
6948 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, | ||
6949 | struct sched_group **sg, struct cpumask *mask) | ||
6950 | { | 6923 | { |
6951 | int group = cpu; | 6924 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
6952 | #ifdef CONFIG_SCHED_MC | 6925 | struct sched_domain *child = sd->child; |
6953 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6954 | group = cpumask_first(mask); | ||
6955 | #elif defined(CONFIG_SCHED_SMT) | ||
6956 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6957 | group = cpumask_first(mask); | ||
6958 | #endif | ||
6959 | if (sg) | ||
6960 | *sg = &per_cpu(sched_group_book, group).sg; | ||
6961 | return group; | ||
6962 | } | ||
6963 | #endif /* CONFIG_SCHED_BOOK */ | ||
6964 | 6926 | ||
6965 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | 6927 | if (child) |
6966 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | 6928 | cpu = cpumask_first(sched_domain_span(child)); |
6967 | 6929 | ||
6968 | static int | ||
6969 | cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | ||
6970 | struct sched_group **sg, struct cpumask *mask) | ||
6971 | { | ||
6972 | int group; | ||
6973 | #ifdef CONFIG_SCHED_BOOK | ||
6974 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | ||
6975 | group = cpumask_first(mask); | ||
6976 | #elif defined(CONFIG_SCHED_MC) | ||
6977 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6978 | group = cpumask_first(mask); | ||
6979 | #elif defined(CONFIG_SCHED_SMT) | ||
6980 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6981 | group = cpumask_first(mask); | ||
6982 | #else | ||
6983 | group = cpu; | ||
6984 | #endif | ||
6985 | if (sg) | 6930 | if (sg) |
6986 | *sg = &per_cpu(sched_group_phys, group).sg; | 6931 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
6987 | return group; | 6932 | |
6933 | return cpu; | ||
6988 | } | 6934 | } |
6989 | 6935 | ||
6990 | #ifdef CONFIG_NUMA | ||
6991 | /* | 6936 | /* |
6992 | * The init_sched_build_groups can't handle what we want to do with node | 6937 | * build_sched_groups takes the cpumask we wish to span, and a pointer |
6993 | * groups, so roll our own. Now each node has its own list of groups which | 6938 | * to a function which identifies what group(along with sched group) a CPU |
6994 | * gets dynamically allocated. | 6939 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids |
6940 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
6941 | * | ||
6942 | * build_sched_groups will build a circular linked list of the groups | ||
6943 | * covered by the given span, and will set each group's ->cpumask correctly, | ||
6944 | * and ->cpu_power to 0. | ||
6995 | */ | 6945 | */ |
6996 | static DEFINE_PER_CPU(struct static_sched_domain, node_domains); | 6946 | static void |
6997 | static struct sched_group ***sched_group_nodes_bycpu; | 6947 | build_sched_groups(struct sched_domain *sd) |
6998 | |||
6999 | static DEFINE_PER_CPU(struct static_sched_domain, allnodes_domains); | ||
7000 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_allnodes); | ||
7001 | |||
7002 | static int cpu_to_allnodes_group(int cpu, const struct cpumask *cpu_map, | ||
7003 | struct sched_group **sg, | ||
7004 | struct cpumask *nodemask) | ||
7005 | { | ||
7006 | int group; | ||
7007 | |||
7008 | cpumask_and(nodemask, cpumask_of_node(cpu_to_node(cpu)), cpu_map); | ||
7009 | group = cpumask_first(nodemask); | ||
7010 | |||
7011 | if (sg) | ||
7012 | *sg = &per_cpu(sched_group_allnodes, group).sg; | ||
7013 | return group; | ||
7014 | } | ||
7015 | |||
7016 | static void init_numa_sched_groups_power(struct sched_group *group_head) | ||
7017 | { | ||
7018 | struct sched_group *sg = group_head; | ||
7019 | int j; | ||
7020 | |||
7021 | if (!sg) | ||
7022 | return; | ||
7023 | do { | ||
7024 | for_each_cpu(j, sched_group_cpus(sg)) { | ||
7025 | struct sched_domain *sd; | ||
7026 | |||
7027 | sd = &per_cpu(phys_domains, j).sd; | ||
7028 | if (j != group_first_cpu(sd->groups)) { | ||
7029 | /* | ||
7030 | * Only add "power" once for each | ||
7031 | * physical package. | ||
7032 | */ | ||
7033 | continue; | ||
7034 | } | ||
7035 | |||
7036 | sg->cpu_power += sd->groups->cpu_power; | ||
7037 | } | ||
7038 | sg = sg->next; | ||
7039 | } while (sg != group_head); | ||
7040 | } | ||
7041 | |||
7042 | static int build_numa_sched_groups(struct s_data *d, | ||
7043 | const struct cpumask *cpu_map, int num) | ||
7044 | { | 6948 | { |
7045 | struct sched_domain *sd; | 6949 | struct sched_group *first = NULL, *last = NULL; |
7046 | struct sched_group *sg, *prev; | 6950 | struct sd_data *sdd = sd->private; |
7047 | int n, j; | 6951 | const struct cpumask *span = sched_domain_span(sd); |
7048 | 6952 | struct cpumask *covered; | |
7049 | cpumask_clear(d->covered); | 6953 | int i; |
7050 | cpumask_and(d->nodemask, cpumask_of_node(num), cpu_map); | ||
7051 | if (cpumask_empty(d->nodemask)) { | ||
7052 | d->sched_group_nodes[num] = NULL; | ||
7053 | goto out; | ||
7054 | } | ||
7055 | |||
7056 | sched_domain_node_span(num, d->domainspan); | ||
7057 | cpumask_and(d->domainspan, d->domainspan, cpu_map); | ||
7058 | |||
7059 | sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7060 | GFP_KERNEL, num); | ||
7061 | if (!sg) { | ||
7062 | printk(KERN_WARNING "Can not alloc domain group for node %d\n", | ||
7063 | num); | ||
7064 | return -ENOMEM; | ||
7065 | } | ||
7066 | d->sched_group_nodes[num] = sg; | ||
7067 | |||
7068 | for_each_cpu(j, d->nodemask) { | ||
7069 | sd = &per_cpu(node_domains, j).sd; | ||
7070 | sd->groups = sg; | ||
7071 | } | ||
7072 | |||
7073 | sg->cpu_power = 0; | ||
7074 | cpumask_copy(sched_group_cpus(sg), d->nodemask); | ||
7075 | sg->next = sg; | ||
7076 | cpumask_or(d->covered, d->covered, d->nodemask); | ||
7077 | 6954 | ||
7078 | prev = sg; | 6955 | lockdep_assert_held(&sched_domains_mutex); |
7079 | for (j = 0; j < nr_node_ids; j++) { | 6956 | covered = sched_domains_tmpmask; |
7080 | n = (num + j) % nr_node_ids; | ||
7081 | cpumask_complement(d->notcovered, d->covered); | ||
7082 | cpumask_and(d->tmpmask, d->notcovered, cpu_map); | ||
7083 | cpumask_and(d->tmpmask, d->tmpmask, d->domainspan); | ||
7084 | if (cpumask_empty(d->tmpmask)) | ||
7085 | break; | ||
7086 | cpumask_and(d->tmpmask, d->tmpmask, cpumask_of_node(n)); | ||
7087 | if (cpumask_empty(d->tmpmask)) | ||
7088 | continue; | ||
7089 | sg = kmalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7090 | GFP_KERNEL, num); | ||
7091 | if (!sg) { | ||
7092 | printk(KERN_WARNING | ||
7093 | "Can not alloc domain group for node %d\n", j); | ||
7094 | return -ENOMEM; | ||
7095 | } | ||
7096 | sg->cpu_power = 0; | ||
7097 | cpumask_copy(sched_group_cpus(sg), d->tmpmask); | ||
7098 | sg->next = prev->next; | ||
7099 | cpumask_or(d->covered, d->covered, d->tmpmask); | ||
7100 | prev->next = sg; | ||
7101 | prev = sg; | ||
7102 | } | ||
7103 | out: | ||
7104 | return 0; | ||
7105 | } | ||
7106 | #endif /* CONFIG_NUMA */ | ||
7107 | 6957 | ||
7108 | #ifdef CONFIG_NUMA | 6958 | cpumask_clear(covered); |
7109 | /* Free memory allocated for various sched_group structures */ | ||
7110 | static void free_sched_groups(const struct cpumask *cpu_map, | ||
7111 | struct cpumask *nodemask) | ||
7112 | { | ||
7113 | int cpu, i; | ||
7114 | 6959 | ||
7115 | for_each_cpu(cpu, cpu_map) { | 6960 | for_each_cpu(i, span) { |
7116 | struct sched_group **sched_group_nodes | 6961 | struct sched_group *sg; |
7117 | = sched_group_nodes_bycpu[cpu]; | 6962 | int group = get_group(i, sdd, &sg); |
6963 | int j; | ||
7118 | 6964 | ||
7119 | if (!sched_group_nodes) | 6965 | if (cpumask_test_cpu(i, covered)) |
7120 | continue; | 6966 | continue; |
7121 | 6967 | ||
7122 | for (i = 0; i < nr_node_ids; i++) { | 6968 | cpumask_clear(sched_group_cpus(sg)); |
7123 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | 6969 | sg->cpu_power = 0; |
7124 | 6970 | ||
7125 | cpumask_and(nodemask, cpumask_of_node(i), cpu_map); | 6971 | for_each_cpu(j, span) { |
7126 | if (cpumask_empty(nodemask)) | 6972 | if (get_group(j, sdd, NULL) != group) |
7127 | continue; | 6973 | continue; |
7128 | 6974 | ||
7129 | if (sg == NULL) | 6975 | cpumask_set_cpu(j, covered); |
7130 | continue; | 6976 | cpumask_set_cpu(j, sched_group_cpus(sg)); |
7131 | sg = sg->next; | ||
7132 | next_sg: | ||
7133 | oldsg = sg; | ||
7134 | sg = sg->next; | ||
7135 | kfree(oldsg); | ||
7136 | if (oldsg != sched_group_nodes[i]) | ||
7137 | goto next_sg; | ||
7138 | } | 6977 | } |
7139 | kfree(sched_group_nodes); | 6978 | |
7140 | sched_group_nodes_bycpu[cpu] = NULL; | 6979 | if (!first) |
6980 | first = sg; | ||
6981 | if (last) | ||
6982 | last->next = sg; | ||
6983 | last = sg; | ||
7141 | } | 6984 | } |
6985 | last->next = first; | ||
7142 | } | 6986 | } |
7143 | #else /* !CONFIG_NUMA */ | ||
7144 | static void free_sched_groups(const struct cpumask *cpu_map, | ||
7145 | struct cpumask *nodemask) | ||
7146 | { | ||
7147 | } | ||
7148 | #endif /* CONFIG_NUMA */ | ||
7149 | 6987 | ||
7150 | /* | 6988 | /* |
7151 | * Initialize sched groups cpu_power. | 6989 | * Initialize sched groups cpu_power. |
@@ -7159,11 +6997,6 @@ static void free_sched_groups(const struct cpumask *cpu_map, | |||
7159 | */ | 6997 | */ |
7160 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | 6998 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) |
7161 | { | 6999 | { |
7162 | struct sched_domain *child; | ||
7163 | struct sched_group *group; | ||
7164 | long power; | ||
7165 | int weight; | ||
7166 | |||
7167 | WARN_ON(!sd || !sd->groups); | 7000 | WARN_ON(!sd || !sd->groups); |
7168 | 7001 | ||
7169 | if (cpu != group_first_cpu(sd->groups)) | 7002 | if (cpu != group_first_cpu(sd->groups)) |
@@ -7171,36 +7004,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7171 | 7004 | ||
7172 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | 7005 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); |
7173 | 7006 | ||
7174 | child = sd->child; | 7007 | update_group_power(sd, cpu); |
7175 | |||
7176 | sd->groups->cpu_power = 0; | ||
7177 | |||
7178 | if (!child) { | ||
7179 | power = SCHED_LOAD_SCALE; | ||
7180 | weight = cpumask_weight(sched_domain_span(sd)); | ||
7181 | /* | ||
7182 | * SMT siblings share the power of a single core. | ||
7183 | * Usually multiple threads get a better yield out of | ||
7184 | * that one core than a single thread would have, | ||
7185 | * reflect that in sd->smt_gain. | ||
7186 | */ | ||
7187 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | ||
7188 | power *= sd->smt_gain; | ||
7189 | power /= weight; | ||
7190 | power >>= SCHED_LOAD_SHIFT; | ||
7191 | } | ||
7192 | sd->groups->cpu_power += power; | ||
7193 | return; | ||
7194 | } | ||
7195 | |||
7196 | /* | ||
7197 | * Add cpu_power of each child group to this groups cpu_power. | ||
7198 | */ | ||
7199 | group = child->groups; | ||
7200 | do { | ||
7201 | sd->groups->cpu_power += group->cpu_power; | ||
7202 | group = group->next; | ||
7203 | } while (group != child->groups); | ||
7204 | } | 7008 | } |
7205 | 7009 | ||
7206 | /* | 7010 | /* |
@@ -7214,15 +7018,15 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7214 | # define SD_INIT_NAME(sd, type) do { } while (0) | 7018 | # define SD_INIT_NAME(sd, type) do { } while (0) |
7215 | #endif | 7019 | #endif |
7216 | 7020 | ||
7217 | #define SD_INIT(sd, type) sd_init_##type(sd) | 7021 | #define SD_INIT_FUNC(type) \ |
7218 | 7022 | static noinline struct sched_domain * \ | |
7219 | #define SD_INIT_FUNC(type) \ | 7023 | sd_init_##type(struct sched_domain_topology_level *tl, int cpu) \ |
7220 | static noinline void sd_init_##type(struct sched_domain *sd) \ | 7024 | { \ |
7221 | { \ | 7025 | struct sched_domain *sd = *per_cpu_ptr(tl->data.sd, cpu); \ |
7222 | memset(sd, 0, sizeof(*sd)); \ | 7026 | *sd = SD_##type##_INIT; \ |
7223 | *sd = SD_##type##_INIT; \ | 7027 | SD_INIT_NAME(sd, type); \ |
7224 | sd->level = SD_LV_##type; \ | 7028 | sd->private = &tl->data; \ |
7225 | SD_INIT_NAME(sd, type); \ | 7029 | return sd; \ |
7226 | } | 7030 | } |
7227 | 7031 | ||
7228 | SD_INIT_FUNC(CPU) | 7032 | SD_INIT_FUNC(CPU) |
@@ -7241,13 +7045,14 @@ SD_INIT_FUNC(CPU) | |||
7241 | #endif | 7045 | #endif |
7242 | 7046 | ||
7243 | static int default_relax_domain_level = -1; | 7047 | static int default_relax_domain_level = -1; |
7048 | int sched_domain_level_max; | ||
7244 | 7049 | ||
7245 | static int __init setup_relax_domain_level(char *str) | 7050 | static int __init setup_relax_domain_level(char *str) |
7246 | { | 7051 | { |
7247 | unsigned long val; | 7052 | unsigned long val; |
7248 | 7053 | ||
7249 | val = simple_strtoul(str, NULL, 0); | 7054 | val = simple_strtoul(str, NULL, 0); |
7250 | if (val < SD_LV_MAX) | 7055 | if (val < sched_domain_level_max) |
7251 | default_relax_domain_level = val; | 7056 | default_relax_domain_level = val; |
7252 | 7057 | ||
7253 | return 1; | 7058 | return 1; |
@@ -7275,37 +7080,20 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
7275 | } | 7080 | } |
7276 | } | 7081 | } |
7277 | 7082 | ||
7083 | static void __sdt_free(const struct cpumask *cpu_map); | ||
7084 | static int __sdt_alloc(const struct cpumask *cpu_map); | ||
7085 | |||
7278 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | 7086 | static void __free_domain_allocs(struct s_data *d, enum s_alloc what, |
7279 | const struct cpumask *cpu_map) | 7087 | const struct cpumask *cpu_map) |
7280 | { | 7088 | { |
7281 | switch (what) { | 7089 | switch (what) { |
7282 | case sa_sched_groups: | ||
7283 | free_sched_groups(cpu_map, d->tmpmask); /* fall through */ | ||
7284 | d->sched_group_nodes = NULL; | ||
7285 | case sa_rootdomain: | 7090 | case sa_rootdomain: |
7286 | free_rootdomain(d->rd); /* fall through */ | 7091 | if (!atomic_read(&d->rd->refcount)) |
7287 | case sa_tmpmask: | 7092 | free_rootdomain(&d->rd->rcu); /* fall through */ |
7288 | free_cpumask_var(d->tmpmask); /* fall through */ | 7093 | case sa_sd: |
7289 | case sa_send_covered: | 7094 | free_percpu(d->sd); /* fall through */ |
7290 | free_cpumask_var(d->send_covered); /* fall through */ | 7095 | case sa_sd_storage: |
7291 | case sa_this_book_map: | 7096 | __sdt_free(cpu_map); /* fall through */ |
7292 | free_cpumask_var(d->this_book_map); /* fall through */ | ||
7293 | case sa_this_core_map: | ||
7294 | free_cpumask_var(d->this_core_map); /* fall through */ | ||
7295 | case sa_this_sibling_map: | ||
7296 | free_cpumask_var(d->this_sibling_map); /* fall through */ | ||
7297 | case sa_nodemask: | ||
7298 | free_cpumask_var(d->nodemask); /* fall through */ | ||
7299 | case sa_sched_group_nodes: | ||
7300 | #ifdef CONFIG_NUMA | ||
7301 | kfree(d->sched_group_nodes); /* fall through */ | ||
7302 | case sa_notcovered: | ||
7303 | free_cpumask_var(d->notcovered); /* fall through */ | ||
7304 | case sa_covered: | ||
7305 | free_cpumask_var(d->covered); /* fall through */ | ||
7306 | case sa_domainspan: | ||
7307 | free_cpumask_var(d->domainspan); /* fall through */ | ||
7308 | #endif | ||
7309 | case sa_none: | 7097 | case sa_none: |
7310 | break; | 7098 | break; |
7311 | } | 7099 | } |
@@ -7314,308 +7102,212 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
7314 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | 7102 | static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, |
7315 | const struct cpumask *cpu_map) | 7103 | const struct cpumask *cpu_map) |
7316 | { | 7104 | { |
7317 | #ifdef CONFIG_NUMA | 7105 | memset(d, 0, sizeof(*d)); |
7318 | if (!alloc_cpumask_var(&d->domainspan, GFP_KERNEL)) | 7106 | |
7319 | return sa_none; | 7107 | if (__sdt_alloc(cpu_map)) |
7320 | if (!alloc_cpumask_var(&d->covered, GFP_KERNEL)) | 7108 | return sa_sd_storage; |
7321 | return sa_domainspan; | 7109 | d->sd = alloc_percpu(struct sched_domain *); |
7322 | if (!alloc_cpumask_var(&d->notcovered, GFP_KERNEL)) | 7110 | if (!d->sd) |
7323 | return sa_covered; | 7111 | return sa_sd_storage; |
7324 | /* Allocate the per-node list of sched groups */ | ||
7325 | d->sched_group_nodes = kcalloc(nr_node_ids, | ||
7326 | sizeof(struct sched_group *), GFP_KERNEL); | ||
7327 | if (!d->sched_group_nodes) { | ||
7328 | printk(KERN_WARNING "Can not alloc sched group node list\n"); | ||
7329 | return sa_notcovered; | ||
7330 | } | ||
7331 | sched_group_nodes_bycpu[cpumask_first(cpu_map)] = d->sched_group_nodes; | ||
7332 | #endif | ||
7333 | if (!alloc_cpumask_var(&d->nodemask, GFP_KERNEL)) | ||
7334 | return sa_sched_group_nodes; | ||
7335 | if (!alloc_cpumask_var(&d->this_sibling_map, GFP_KERNEL)) | ||
7336 | return sa_nodemask; | ||
7337 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) | ||
7338 | return sa_this_sibling_map; | ||
7339 | if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) | ||
7340 | return sa_this_core_map; | ||
7341 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | ||
7342 | return sa_this_book_map; | ||
7343 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | ||
7344 | return sa_send_covered; | ||
7345 | d->rd = alloc_rootdomain(); | 7112 | d->rd = alloc_rootdomain(); |
7346 | if (!d->rd) { | 7113 | if (!d->rd) |
7347 | printk(KERN_WARNING "Cannot alloc root domain\n"); | 7114 | return sa_sd; |
7348 | return sa_tmpmask; | ||
7349 | } | ||
7350 | return sa_rootdomain; | 7115 | return sa_rootdomain; |
7351 | } | 7116 | } |
7352 | 7117 | ||
7353 | static struct sched_domain *__build_numa_sched_domains(struct s_data *d, | 7118 | /* |
7354 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, int i) | 7119 | * NULL the sd_data elements we've used to build the sched_domain and |
7120 | * sched_group structure so that the subsequent __free_domain_allocs() | ||
7121 | * will not free the data we're using. | ||
7122 | */ | ||
7123 | static void claim_allocations(int cpu, struct sched_domain *sd) | ||
7355 | { | 7124 | { |
7356 | struct sched_domain *sd = NULL; | 7125 | struct sd_data *sdd = sd->private; |
7357 | #ifdef CONFIG_NUMA | 7126 | struct sched_group *sg = sd->groups; |
7358 | struct sched_domain *parent; | ||
7359 | |||
7360 | d->sd_allnodes = 0; | ||
7361 | if (cpumask_weight(cpu_map) > | ||
7362 | SD_NODES_PER_DOMAIN * cpumask_weight(d->nodemask)) { | ||
7363 | sd = &per_cpu(allnodes_domains, i).sd; | ||
7364 | SD_INIT(sd, ALLNODES); | ||
7365 | set_domain_attribute(sd, attr); | ||
7366 | cpumask_copy(sched_domain_span(sd), cpu_map); | ||
7367 | cpu_to_allnodes_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7368 | d->sd_allnodes = 1; | ||
7369 | } | ||
7370 | parent = sd; | ||
7371 | |||
7372 | sd = &per_cpu(node_domains, i).sd; | ||
7373 | SD_INIT(sd, NODE); | ||
7374 | set_domain_attribute(sd, attr); | ||
7375 | sched_domain_node_span(cpu_to_node(i), sched_domain_span(sd)); | ||
7376 | sd->parent = parent; | ||
7377 | if (parent) | ||
7378 | parent->child = sd; | ||
7379 | cpumask_and(sched_domain_span(sd), sched_domain_span(sd), cpu_map); | ||
7380 | #endif | ||
7381 | return sd; | ||
7382 | } | ||
7383 | 7127 | ||
7384 | static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | 7128 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); |
7385 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7129 | *per_cpu_ptr(sdd->sd, cpu) = NULL; |
7386 | struct sched_domain *parent, int i) | ||
7387 | { | ||
7388 | struct sched_domain *sd; | ||
7389 | sd = &per_cpu(phys_domains, i).sd; | ||
7390 | SD_INIT(sd, CPU); | ||
7391 | set_domain_attribute(sd, attr); | ||
7392 | cpumask_copy(sched_domain_span(sd), d->nodemask); | ||
7393 | sd->parent = parent; | ||
7394 | if (parent) | ||
7395 | parent->child = sd; | ||
7396 | cpu_to_phys_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7397 | return sd; | ||
7398 | } | ||
7399 | 7130 | ||
7400 | static struct sched_domain *__build_book_sched_domain(struct s_data *d, | 7131 | if (cpu == cpumask_first(sched_group_cpus(sg))) { |
7401 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7132 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); |
7402 | struct sched_domain *parent, int i) | 7133 | *per_cpu_ptr(sdd->sg, cpu) = NULL; |
7403 | { | 7134 | } |
7404 | struct sched_domain *sd = parent; | ||
7405 | #ifdef CONFIG_SCHED_BOOK | ||
7406 | sd = &per_cpu(book_domains, i).sd; | ||
7407 | SD_INIT(sd, BOOK); | ||
7408 | set_domain_attribute(sd, attr); | ||
7409 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | ||
7410 | sd->parent = parent; | ||
7411 | parent->child = sd; | ||
7412 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7413 | #endif | ||
7414 | return sd; | ||
7415 | } | 7135 | } |
7416 | 7136 | ||
7417 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | 7137 | #ifdef CONFIG_SCHED_SMT |
7418 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7138 | static const struct cpumask *cpu_smt_mask(int cpu) |
7419 | struct sched_domain *parent, int i) | ||
7420 | { | 7139 | { |
7421 | struct sched_domain *sd = parent; | 7140 | return topology_thread_cpumask(cpu); |
7422 | #ifdef CONFIG_SCHED_MC | ||
7423 | sd = &per_cpu(core_domains, i).sd; | ||
7424 | SD_INIT(sd, MC); | ||
7425 | set_domain_attribute(sd, attr); | ||
7426 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_coregroup_mask(i)); | ||
7427 | sd->parent = parent; | ||
7428 | parent->child = sd; | ||
7429 | cpu_to_core_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7430 | #endif | ||
7431 | return sd; | ||
7432 | } | 7141 | } |
7433 | |||
7434 | static struct sched_domain *__build_smt_sched_domain(struct s_data *d, | ||
7435 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | ||
7436 | struct sched_domain *parent, int i) | ||
7437 | { | ||
7438 | struct sched_domain *sd = parent; | ||
7439 | #ifdef CONFIG_SCHED_SMT | ||
7440 | sd = &per_cpu(cpu_domains, i).sd; | ||
7441 | SD_INIT(sd, SIBLING); | ||
7442 | set_domain_attribute(sd, attr); | ||
7443 | cpumask_and(sched_domain_span(sd), cpu_map, topology_thread_cpumask(i)); | ||
7444 | sd->parent = parent; | ||
7445 | parent->child = sd; | ||
7446 | cpu_to_cpu_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7447 | #endif | 7142 | #endif |
7448 | return sd; | ||
7449 | } | ||
7450 | 7143 | ||
7451 | static void build_sched_groups(struct s_data *d, enum sched_domain_level l, | 7144 | /* |
7452 | const struct cpumask *cpu_map, int cpu) | 7145 | * Topology list, bottom-up. |
7453 | { | 7146 | */ |
7454 | switch (l) { | 7147 | static struct sched_domain_topology_level default_topology[] = { |
7455 | #ifdef CONFIG_SCHED_SMT | 7148 | #ifdef CONFIG_SCHED_SMT |
7456 | case SD_LV_SIBLING: /* set up CPU (sibling) groups */ | 7149 | { sd_init_SIBLING, cpu_smt_mask, }, |
7457 | cpumask_and(d->this_sibling_map, cpu_map, | ||
7458 | topology_thread_cpumask(cpu)); | ||
7459 | if (cpu == cpumask_first(d->this_sibling_map)) | ||
7460 | init_sched_build_groups(d->this_sibling_map, cpu_map, | ||
7461 | &cpu_to_cpu_group, | ||
7462 | d->send_covered, d->tmpmask); | ||
7463 | break; | ||
7464 | #endif | 7150 | #endif |
7465 | #ifdef CONFIG_SCHED_MC | 7151 | #ifdef CONFIG_SCHED_MC |
7466 | case SD_LV_MC: /* set up multi-core groups */ | 7152 | { sd_init_MC, cpu_coregroup_mask, }, |
7467 | cpumask_and(d->this_core_map, cpu_map, cpu_coregroup_mask(cpu)); | ||
7468 | if (cpu == cpumask_first(d->this_core_map)) | ||
7469 | init_sched_build_groups(d->this_core_map, cpu_map, | ||
7470 | &cpu_to_core_group, | ||
7471 | d->send_covered, d->tmpmask); | ||
7472 | break; | ||
7473 | #endif | 7153 | #endif |
7474 | #ifdef CONFIG_SCHED_BOOK | 7154 | #ifdef CONFIG_SCHED_BOOK |
7475 | case SD_LV_BOOK: /* set up book groups */ | 7155 | { sd_init_BOOK, cpu_book_mask, }, |
7476 | cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); | ||
7477 | if (cpu == cpumask_first(d->this_book_map)) | ||
7478 | init_sched_build_groups(d->this_book_map, cpu_map, | ||
7479 | &cpu_to_book_group, | ||
7480 | d->send_covered, d->tmpmask); | ||
7481 | break; | ||
7482 | #endif | 7156 | #endif |
7483 | case SD_LV_CPU: /* set up physical groups */ | 7157 | { sd_init_CPU, cpu_cpu_mask, }, |
7484 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); | ||
7485 | if (!cpumask_empty(d->nodemask)) | ||
7486 | init_sched_build_groups(d->nodemask, cpu_map, | ||
7487 | &cpu_to_phys_group, | ||
7488 | d->send_covered, d->tmpmask); | ||
7489 | break; | ||
7490 | #ifdef CONFIG_NUMA | 7158 | #ifdef CONFIG_NUMA |
7491 | case SD_LV_ALLNODES: | 7159 | { sd_init_NODE, cpu_node_mask, }, |
7492 | init_sched_build_groups(cpu_map, cpu_map, &cpu_to_allnodes_group, | 7160 | { sd_init_ALLNODES, cpu_allnodes_mask, }, |
7493 | d->send_covered, d->tmpmask); | ||
7494 | break; | ||
7495 | #endif | 7161 | #endif |
7496 | default: | 7162 | { NULL, }, |
7497 | break; | 7163 | }; |
7164 | |||
7165 | static struct sched_domain_topology_level *sched_domain_topology = default_topology; | ||
7166 | |||
7167 | static int __sdt_alloc(const struct cpumask *cpu_map) | ||
7168 | { | ||
7169 | struct sched_domain_topology_level *tl; | ||
7170 | int j; | ||
7171 | |||
7172 | for (tl = sched_domain_topology; tl->init; tl++) { | ||
7173 | struct sd_data *sdd = &tl->data; | ||
7174 | |||
7175 | sdd->sd = alloc_percpu(struct sched_domain *); | ||
7176 | if (!sdd->sd) | ||
7177 | return -ENOMEM; | ||
7178 | |||
7179 | sdd->sg = alloc_percpu(struct sched_group *); | ||
7180 | if (!sdd->sg) | ||
7181 | return -ENOMEM; | ||
7182 | |||
7183 | for_each_cpu(j, cpu_map) { | ||
7184 | struct sched_domain *sd; | ||
7185 | struct sched_group *sg; | ||
7186 | |||
7187 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | ||
7188 | GFP_KERNEL, cpu_to_node(j)); | ||
7189 | if (!sd) | ||
7190 | return -ENOMEM; | ||
7191 | |||
7192 | *per_cpu_ptr(sdd->sd, j) = sd; | ||
7193 | |||
7194 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
7195 | GFP_KERNEL, cpu_to_node(j)); | ||
7196 | if (!sg) | ||
7197 | return -ENOMEM; | ||
7198 | |||
7199 | *per_cpu_ptr(sdd->sg, j) = sg; | ||
7200 | } | ||
7201 | } | ||
7202 | |||
7203 | return 0; | ||
7204 | } | ||
7205 | |||
7206 | static void __sdt_free(const struct cpumask *cpu_map) | ||
7207 | { | ||
7208 | struct sched_domain_topology_level *tl; | ||
7209 | int j; | ||
7210 | |||
7211 | for (tl = sched_domain_topology; tl->init; tl++) { | ||
7212 | struct sd_data *sdd = &tl->data; | ||
7213 | |||
7214 | for_each_cpu(j, cpu_map) { | ||
7215 | kfree(*per_cpu_ptr(sdd->sd, j)); | ||
7216 | kfree(*per_cpu_ptr(sdd->sg, j)); | ||
7217 | } | ||
7218 | free_percpu(sdd->sd); | ||
7219 | free_percpu(sdd->sg); | ||
7220 | } | ||
7221 | } | ||
7222 | |||
7223 | struct sched_domain *build_sched_domain(struct sched_domain_topology_level *tl, | ||
7224 | struct s_data *d, const struct cpumask *cpu_map, | ||
7225 | struct sched_domain_attr *attr, struct sched_domain *child, | ||
7226 | int cpu) | ||
7227 | { | ||
7228 | struct sched_domain *sd = tl->init(tl, cpu); | ||
7229 | if (!sd) | ||
7230 | return child; | ||
7231 | |||
7232 | set_domain_attribute(sd, attr); | ||
7233 | cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu)); | ||
7234 | if (child) { | ||
7235 | sd->level = child->level + 1; | ||
7236 | sched_domain_level_max = max(sched_domain_level_max, sd->level); | ||
7237 | child->parent = sd; | ||
7498 | } | 7238 | } |
7239 | sd->child = child; | ||
7240 | |||
7241 | return sd; | ||
7499 | } | 7242 | } |
7500 | 7243 | ||
7501 | /* | 7244 | /* |
7502 | * Build sched domains for a given set of cpus and attach the sched domains | 7245 | * Build sched domains for a given set of cpus and attach the sched domains |
7503 | * to the individual cpus | 7246 | * to the individual cpus |
7504 | */ | 7247 | */ |
7505 | static int __build_sched_domains(const struct cpumask *cpu_map, | 7248 | static int build_sched_domains(const struct cpumask *cpu_map, |
7506 | struct sched_domain_attr *attr) | 7249 | struct sched_domain_attr *attr) |
7507 | { | 7250 | { |
7508 | enum s_alloc alloc_state = sa_none; | 7251 | enum s_alloc alloc_state = sa_none; |
7509 | struct s_data d; | ||
7510 | struct sched_domain *sd; | 7252 | struct sched_domain *sd; |
7511 | int i; | 7253 | struct s_data d; |
7512 | #ifdef CONFIG_NUMA | 7254 | int i, ret = -ENOMEM; |
7513 | d.sd_allnodes = 0; | ||
7514 | #endif | ||
7515 | 7255 | ||
7516 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); | 7256 | alloc_state = __visit_domain_allocation_hell(&d, cpu_map); |
7517 | if (alloc_state != sa_rootdomain) | 7257 | if (alloc_state != sa_rootdomain) |
7518 | goto error; | 7258 | goto error; |
7519 | alloc_state = sa_sched_groups; | ||
7520 | 7259 | ||
7521 | /* | 7260 | /* Set up domains for cpus specified by the cpu_map. */ |
7522 | * Set up domains for cpus specified by the cpu_map. | ||
7523 | */ | ||
7524 | for_each_cpu(i, cpu_map) { | 7261 | for_each_cpu(i, cpu_map) { |
7525 | cpumask_and(d.nodemask, cpumask_of_node(cpu_to_node(i)), | 7262 | struct sched_domain_topology_level *tl; |
7526 | cpu_map); | ||
7527 | 7263 | ||
7528 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); | 7264 | sd = NULL; |
7529 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); | 7265 | for (tl = sched_domain_topology; tl->init; tl++) |
7530 | sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); | 7266 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); |
7531 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); | ||
7532 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | ||
7533 | } | ||
7534 | |||
7535 | for_each_cpu(i, cpu_map) { | ||
7536 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); | ||
7537 | build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); | ||
7538 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); | ||
7539 | } | ||
7540 | |||
7541 | /* Set up physical groups */ | ||
7542 | for (i = 0; i < nr_node_ids; i++) | ||
7543 | build_sched_groups(&d, SD_LV_CPU, cpu_map, i); | ||
7544 | 7267 | ||
7545 | #ifdef CONFIG_NUMA | 7268 | while (sd->child) |
7546 | /* Set up node groups */ | 7269 | sd = sd->child; |
7547 | if (d.sd_allnodes) | ||
7548 | build_sched_groups(&d, SD_LV_ALLNODES, cpu_map, 0); | ||
7549 | |||
7550 | for (i = 0; i < nr_node_ids; i++) | ||
7551 | if (build_numa_sched_groups(&d, cpu_map, i)) | ||
7552 | goto error; | ||
7553 | #endif | ||
7554 | 7270 | ||
7555 | /* Calculate CPU power for physical packages and nodes */ | 7271 | *per_cpu_ptr(d.sd, i) = sd; |
7556 | #ifdef CONFIG_SCHED_SMT | ||
7557 | for_each_cpu(i, cpu_map) { | ||
7558 | sd = &per_cpu(cpu_domains, i).sd; | ||
7559 | init_sched_groups_power(i, sd); | ||
7560 | } | ||
7561 | #endif | ||
7562 | #ifdef CONFIG_SCHED_MC | ||
7563 | for_each_cpu(i, cpu_map) { | ||
7564 | sd = &per_cpu(core_domains, i).sd; | ||
7565 | init_sched_groups_power(i, sd); | ||
7566 | } | 7272 | } |
7567 | #endif | ||
7568 | #ifdef CONFIG_SCHED_BOOK | ||
7569 | for_each_cpu(i, cpu_map) { | ||
7570 | sd = &per_cpu(book_domains, i).sd; | ||
7571 | init_sched_groups_power(i, sd); | ||
7572 | } | ||
7573 | #endif | ||
7574 | 7273 | ||
7274 | /* Build the groups for the domains */ | ||
7575 | for_each_cpu(i, cpu_map) { | 7275 | for_each_cpu(i, cpu_map) { |
7576 | sd = &per_cpu(phys_domains, i).sd; | 7276 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
7577 | init_sched_groups_power(i, sd); | 7277 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); |
7578 | } | 7278 | get_group(i, sd->private, &sd->groups); |
7279 | atomic_inc(&sd->groups->ref); | ||
7579 | 7280 | ||
7580 | #ifdef CONFIG_NUMA | 7281 | if (i != cpumask_first(sched_domain_span(sd))) |
7581 | for (i = 0; i < nr_node_ids; i++) | 7282 | continue; |
7582 | init_numa_sched_groups_power(d.sched_group_nodes[i]); | ||
7583 | 7283 | ||
7584 | if (d.sd_allnodes) { | 7284 | build_sched_groups(sd); |
7585 | struct sched_group *sg; | 7285 | } |
7286 | } | ||
7586 | 7287 | ||
7587 | cpu_to_allnodes_group(cpumask_first(cpu_map), cpu_map, &sg, | 7288 | /* Calculate CPU power for physical packages and nodes */ |
7588 | d.tmpmask); | 7289 | for (i = nr_cpumask_bits-1; i >= 0; i--) { |
7589 | init_numa_sched_groups_power(sg); | 7290 | if (!cpumask_test_cpu(i, cpu_map)) |
7291 | continue; | ||
7292 | |||
7293 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | ||
7294 | claim_allocations(i, sd); | ||
7295 | init_sched_groups_power(i, sd); | ||
7296 | } | ||
7590 | } | 7297 | } |
7591 | #endif | ||
7592 | 7298 | ||
7593 | /* Attach the domains */ | 7299 | /* Attach the domains */ |
7300 | rcu_read_lock(); | ||
7594 | for_each_cpu(i, cpu_map) { | 7301 | for_each_cpu(i, cpu_map) { |
7595 | #ifdef CONFIG_SCHED_SMT | 7302 | sd = *per_cpu_ptr(d.sd, i); |
7596 | sd = &per_cpu(cpu_domains, i).sd; | ||
7597 | #elif defined(CONFIG_SCHED_MC) | ||
7598 | sd = &per_cpu(core_domains, i).sd; | ||
7599 | #elif defined(CONFIG_SCHED_BOOK) | ||
7600 | sd = &per_cpu(book_domains, i).sd; | ||
7601 | #else | ||
7602 | sd = &per_cpu(phys_domains, i).sd; | ||
7603 | #endif | ||
7604 | cpu_attach_domain(sd, d.rd, i); | 7303 | cpu_attach_domain(sd, d.rd, i); |
7605 | } | 7304 | } |
7305 | rcu_read_unlock(); | ||
7606 | 7306 | ||
7607 | d.sched_group_nodes = NULL; /* don't free this we still need it */ | 7307 | ret = 0; |
7608 | __free_domain_allocs(&d, sa_tmpmask, cpu_map); | ||
7609 | return 0; | ||
7610 | |||
7611 | error: | 7308 | error: |
7612 | __free_domain_allocs(&d, alloc_state, cpu_map); | 7309 | __free_domain_allocs(&d, alloc_state, cpu_map); |
7613 | return -ENOMEM; | 7310 | return ret; |
7614 | } | ||
7615 | |||
7616 | static int build_sched_domains(const struct cpumask *cpu_map) | ||
7617 | { | ||
7618 | return __build_sched_domains(cpu_map, NULL); | ||
7619 | } | 7311 | } |
7620 | 7312 | ||
7621 | static cpumask_var_t *doms_cur; /* current sched domains */ | 7313 | static cpumask_var_t *doms_cur; /* current sched domains */ |
@@ -7670,7 +7362,7 @@ void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | |||
7670 | * For now this just excludes isolated cpus, but could be used to | 7362 | * For now this just excludes isolated cpus, but could be used to |
7671 | * exclude other special cases in the future. | 7363 | * exclude other special cases in the future. |
7672 | */ | 7364 | */ |
7673 | static int arch_init_sched_domains(const struct cpumask *cpu_map) | 7365 | static int init_sched_domains(const struct cpumask *cpu_map) |
7674 | { | 7366 | { |
7675 | int err; | 7367 | int err; |
7676 | 7368 | ||
@@ -7681,32 +7373,24 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
7681 | doms_cur = &fallback_doms; | 7373 | doms_cur = &fallback_doms; |
7682 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); | 7374 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
7683 | dattr_cur = NULL; | 7375 | dattr_cur = NULL; |
7684 | err = build_sched_domains(doms_cur[0]); | 7376 | err = build_sched_domains(doms_cur[0], NULL); |
7685 | register_sched_domain_sysctl(); | 7377 | register_sched_domain_sysctl(); |
7686 | 7378 | ||
7687 | return err; | 7379 | return err; |
7688 | } | 7380 | } |
7689 | 7381 | ||
7690 | static void arch_destroy_sched_domains(const struct cpumask *cpu_map, | ||
7691 | struct cpumask *tmpmask) | ||
7692 | { | ||
7693 | free_sched_groups(cpu_map, tmpmask); | ||
7694 | } | ||
7695 | |||
7696 | /* | 7382 | /* |
7697 | * Detach sched domains from a group of cpus specified in cpu_map | 7383 | * Detach sched domains from a group of cpus specified in cpu_map |
7698 | * These cpus will now be attached to the NULL domain | 7384 | * These cpus will now be attached to the NULL domain |
7699 | */ | 7385 | */ |
7700 | static void detach_destroy_domains(const struct cpumask *cpu_map) | 7386 | static void detach_destroy_domains(const struct cpumask *cpu_map) |
7701 | { | 7387 | { |
7702 | /* Save because hotplug lock held. */ | ||
7703 | static DECLARE_BITMAP(tmpmask, CONFIG_NR_CPUS); | ||
7704 | int i; | 7388 | int i; |
7705 | 7389 | ||
7390 | rcu_read_lock(); | ||
7706 | for_each_cpu(i, cpu_map) | 7391 | for_each_cpu(i, cpu_map) |
7707 | cpu_attach_domain(NULL, &def_root_domain, i); | 7392 | cpu_attach_domain(NULL, &def_root_domain, i); |
7708 | synchronize_sched(); | 7393 | rcu_read_unlock(); |
7709 | arch_destroy_sched_domains(cpu_map, to_cpumask(tmpmask)); | ||
7710 | } | 7394 | } |
7711 | 7395 | ||
7712 | /* handle null as "default" */ | 7396 | /* handle null as "default" */ |
@@ -7795,8 +7479,7 @@ match1: | |||
7795 | goto match2; | 7479 | goto match2; |
7796 | } | 7480 | } |
7797 | /* no match - add a new doms_new */ | 7481 | /* no match - add a new doms_new */ |
7798 | __build_sched_domains(doms_new[i], | 7482 | build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL); |
7799 | dattr_new ? dattr_new + i : NULL); | ||
7800 | match2: | 7483 | match2: |
7801 | ; | 7484 | ; |
7802 | } | 7485 | } |
@@ -7815,7 +7498,7 @@ match2: | |||
7815 | } | 7498 | } |
7816 | 7499 | ||
7817 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | 7500 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) |
7818 | static void arch_reinit_sched_domains(void) | 7501 | static void reinit_sched_domains(void) |
7819 | { | 7502 | { |
7820 | get_online_cpus(); | 7503 | get_online_cpus(); |
7821 | 7504 | ||
@@ -7848,7 +7531,7 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) | |||
7848 | else | 7531 | else |
7849 | sched_mc_power_savings = level; | 7532 | sched_mc_power_savings = level; |
7850 | 7533 | ||
7851 | arch_reinit_sched_domains(); | 7534 | reinit_sched_domains(); |
7852 | 7535 | ||
7853 | return count; | 7536 | return count; |
7854 | } | 7537 | } |
@@ -7967,14 +7650,9 @@ void __init sched_init_smp(void) | |||
7967 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 7650 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
7968 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | 7651 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); |
7969 | 7652 | ||
7970 | #if defined(CONFIG_NUMA) | ||
7971 | sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), | ||
7972 | GFP_KERNEL); | ||
7973 | BUG_ON(sched_group_nodes_bycpu == NULL); | ||
7974 | #endif | ||
7975 | get_online_cpus(); | 7653 | get_online_cpus(); |
7976 | mutex_lock(&sched_domains_mutex); | 7654 | mutex_lock(&sched_domains_mutex); |
7977 | arch_init_sched_domains(cpu_active_mask); | 7655 | init_sched_domains(cpu_active_mask); |
7978 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 7656 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
7979 | if (cpumask_empty(non_isolated_cpus)) | 7657 | if (cpumask_empty(non_isolated_cpus)) |
7980 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 7658 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -8281,6 +7959,7 @@ void __init sched_init(void) | |||
8281 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 7959 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
8282 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 7960 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
8283 | #ifdef CONFIG_SMP | 7961 | #ifdef CONFIG_SMP |
7962 | zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); | ||
8284 | #ifdef CONFIG_NO_HZ | 7963 | #ifdef CONFIG_NO_HZ |
8285 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); | 7964 | zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT); |
8286 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); | 7965 | alloc_cpumask_var(&nohz.grp_idle_mask, GFP_NOWAIT); |
@@ -8340,7 +8019,7 @@ static void normalize_task(struct rq *rq, struct task_struct *p) | |||
8340 | int old_prio = p->prio; | 8019 | int old_prio = p->prio; |
8341 | int on_rq; | 8020 | int on_rq; |
8342 | 8021 | ||
8343 | on_rq = p->se.on_rq; | 8022 | on_rq = p->on_rq; |
8344 | if (on_rq) | 8023 | if (on_rq) |
8345 | deactivate_task(rq, p, 0); | 8024 | deactivate_task(rq, p, 0); |
8346 | __setscheduler(rq, p, SCHED_NORMAL, 0); | 8025 | __setscheduler(rq, p, SCHED_NORMAL, 0); |
@@ -8553,7 +8232,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8553 | { | 8232 | { |
8554 | struct rt_rq *rt_rq; | 8233 | struct rt_rq *rt_rq; |
8555 | struct sched_rt_entity *rt_se; | 8234 | struct sched_rt_entity *rt_se; |
8556 | struct rq *rq; | ||
8557 | int i; | 8235 | int i; |
8558 | 8236 | ||
8559 | tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); | 8237 | tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); |
@@ -8567,8 +8245,6 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8567 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); | 8245 | ktime_to_ns(def_rt_bandwidth.rt_period), 0); |
8568 | 8246 | ||
8569 | for_each_possible_cpu(i) { | 8247 | for_each_possible_cpu(i) { |
8570 | rq = cpu_rq(i); | ||
8571 | |||
8572 | rt_rq = kzalloc_node(sizeof(struct rt_rq), | 8248 | rt_rq = kzalloc_node(sizeof(struct rt_rq), |
8573 | GFP_KERNEL, cpu_to_node(i)); | 8249 | GFP_KERNEL, cpu_to_node(i)); |
8574 | if (!rt_rq) | 8250 | if (!rt_rq) |
@@ -8683,7 +8359,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8683 | rq = task_rq_lock(tsk, &flags); | 8359 | rq = task_rq_lock(tsk, &flags); |
8684 | 8360 | ||
8685 | running = task_current(rq, tsk); | 8361 | running = task_current(rq, tsk); |
8686 | on_rq = tsk->se.on_rq; | 8362 | on_rq = tsk->on_rq; |
8687 | 8363 | ||
8688 | if (on_rq) | 8364 | if (on_rq) |
8689 | dequeue_task(rq, tsk, 0); | 8365 | dequeue_task(rq, tsk, 0); |
@@ -8702,7 +8378,7 @@ void sched_move_task(struct task_struct *tsk) | |||
8702 | if (on_rq) | 8378 | if (on_rq) |
8703 | enqueue_task(rq, tsk, 0); | 8379 | enqueue_task(rq, tsk, 0); |
8704 | 8380 | ||
8705 | task_rq_unlock(rq, &flags); | 8381 | task_rq_unlock(rq, tsk, &flags); |
8706 | } | 8382 | } |
8707 | #endif /* CONFIG_CGROUP_SCHED */ | 8383 | #endif /* CONFIG_CGROUP_SCHED */ |
8708 | 8384 | ||