diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 734 |
1 files changed, 429 insertions, 305 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index a455dca884a6..18cceeecce35 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -141,7 +141,7 @@ struct rt_prio_array { | |||
141 | 141 | ||
142 | struct rt_bandwidth { | 142 | struct rt_bandwidth { |
143 | /* nests inside the rq lock: */ | 143 | /* nests inside the rq lock: */ |
144 | spinlock_t rt_runtime_lock; | 144 | raw_spinlock_t rt_runtime_lock; |
145 | ktime_t rt_period; | 145 | ktime_t rt_period; |
146 | u64 rt_runtime; | 146 | u64 rt_runtime; |
147 | struct hrtimer rt_period_timer; | 147 | struct hrtimer rt_period_timer; |
@@ -178,7 +178,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
178 | rt_b->rt_period = ns_to_ktime(period); | 178 | rt_b->rt_period = ns_to_ktime(period); |
179 | rt_b->rt_runtime = runtime; | 179 | rt_b->rt_runtime = runtime; |
180 | 180 | ||
181 | spin_lock_init(&rt_b->rt_runtime_lock); | 181 | raw_spin_lock_init(&rt_b->rt_runtime_lock); |
182 | 182 | ||
183 | hrtimer_init(&rt_b->rt_period_timer, | 183 | hrtimer_init(&rt_b->rt_period_timer, |
184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
@@ -200,7 +200,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
200 | if (hrtimer_active(&rt_b->rt_period_timer)) | 200 | if (hrtimer_active(&rt_b->rt_period_timer)) |
201 | return; | 201 | return; |
202 | 202 | ||
203 | spin_lock(&rt_b->rt_runtime_lock); | 203 | raw_spin_lock(&rt_b->rt_runtime_lock); |
204 | for (;;) { | 204 | for (;;) { |
205 | unsigned long delta; | 205 | unsigned long delta; |
206 | ktime_t soft, hard; | 206 | ktime_t soft, hard; |
@@ -217,7 +217,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | 217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, |
218 | HRTIMER_MODE_ABS_PINNED, 0); | 218 | HRTIMER_MODE_ABS_PINNED, 0); |
219 | } | 219 | } |
220 | spin_unlock(&rt_b->rt_runtime_lock); | 220 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
221 | } | 221 | } |
222 | 222 | ||
223 | #ifdef CONFIG_RT_GROUP_SCHED | 223 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -298,7 +298,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); | |||
298 | 298 | ||
299 | #ifdef CONFIG_RT_GROUP_SCHED | 299 | #ifdef CONFIG_RT_GROUP_SCHED |
300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); |
302 | #endif /* CONFIG_RT_GROUP_SCHED */ | 302 | #endif /* CONFIG_RT_GROUP_SCHED */ |
303 | #else /* !CONFIG_USER_SCHED */ | 303 | #else /* !CONFIG_USER_SCHED */ |
304 | #define root_task_group init_task_group | 304 | #define root_task_group init_task_group |
@@ -309,6 +309,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | |||
309 | */ | 309 | */ |
310 | static DEFINE_SPINLOCK(task_group_lock); | 310 | static DEFINE_SPINLOCK(task_group_lock); |
311 | 311 | ||
312 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
313 | |||
312 | #ifdef CONFIG_SMP | 314 | #ifdef CONFIG_SMP |
313 | static int root_task_group_empty(void) | 315 | static int root_task_group_empty(void) |
314 | { | 316 | { |
@@ -316,7 +318,6 @@ static int root_task_group_empty(void) | |||
316 | } | 318 | } |
317 | #endif | 319 | #endif |
318 | 320 | ||
319 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
320 | #ifdef CONFIG_USER_SCHED | 321 | #ifdef CONFIG_USER_SCHED |
321 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) | 322 | # define INIT_TASK_GROUP_LOAD (2*NICE_0_LOAD) |
322 | #else /* !CONFIG_USER_SCHED */ | 323 | #else /* !CONFIG_USER_SCHED */ |
@@ -469,7 +470,7 @@ struct rt_rq { | |||
469 | u64 rt_time; | 470 | u64 rt_time; |
470 | u64 rt_runtime; | 471 | u64 rt_runtime; |
471 | /* Nests inside the rq lock: */ | 472 | /* Nests inside the rq lock: */ |
472 | spinlock_t rt_runtime_lock; | 473 | raw_spinlock_t rt_runtime_lock; |
473 | 474 | ||
474 | #ifdef CONFIG_RT_GROUP_SCHED | 475 | #ifdef CONFIG_RT_GROUP_SCHED |
475 | unsigned long rt_nr_boosted; | 476 | unsigned long rt_nr_boosted; |
@@ -524,7 +525,7 @@ static struct root_domain def_root_domain; | |||
524 | */ | 525 | */ |
525 | struct rq { | 526 | struct rq { |
526 | /* runqueue lock: */ | 527 | /* runqueue lock: */ |
527 | spinlock_t lock; | 528 | raw_spinlock_t lock; |
528 | 529 | ||
529 | /* | 530 | /* |
530 | * nr_running and cpu_load should be in the same cacheline because | 531 | * nr_running and cpu_load should be in the same cacheline because |
@@ -534,14 +535,12 @@ struct rq { | |||
534 | #define CPU_LOAD_IDX_MAX 5 | 535 | #define CPU_LOAD_IDX_MAX 5 |
535 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
536 | #ifdef CONFIG_NO_HZ | 537 | #ifdef CONFIG_NO_HZ |
537 | unsigned long last_tick_seen; | ||
538 | unsigned char in_nohz_recently; | 538 | unsigned char in_nohz_recently; |
539 | #endif | 539 | #endif |
540 | /* capture load from *all* tasks on this cpu: */ | 540 | /* capture load from *all* tasks on this cpu: */ |
541 | struct load_weight load; | 541 | struct load_weight load; |
542 | unsigned long nr_load_updates; | 542 | unsigned long nr_load_updates; |
543 | u64 nr_switches; | 543 | u64 nr_switches; |
544 | u64 nr_migrations_in; | ||
545 | 544 | ||
546 | struct cfs_rq cfs; | 545 | struct cfs_rq cfs; |
547 | struct rt_rq rt; | 546 | struct rt_rq rt; |
@@ -590,6 +589,8 @@ struct rq { | |||
590 | 589 | ||
591 | u64 rt_avg; | 590 | u64 rt_avg; |
592 | u64 age_stamp; | 591 | u64 age_stamp; |
592 | u64 idle_stamp; | ||
593 | u64 avg_idle; | ||
593 | #endif | 594 | #endif |
594 | 595 | ||
595 | /* calc_load related fields */ | 596 | /* calc_load related fields */ |
@@ -684,7 +685,7 @@ inline void update_rq_clock(struct rq *rq) | |||
684 | */ | 685 | */ |
685 | int runqueue_is_locked(int cpu) | 686 | int runqueue_is_locked(int cpu) |
686 | { | 687 | { |
687 | return spin_is_locked(&cpu_rq(cpu)->lock); | 688 | return raw_spin_is_locked(&cpu_rq(cpu)->lock); |
688 | } | 689 | } |
689 | 690 | ||
690 | /* | 691 | /* |
@@ -771,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
771 | if (!sched_feat_names[i]) | 772 | if (!sched_feat_names[i]) |
772 | return -EINVAL; | 773 | return -EINVAL; |
773 | 774 | ||
774 | filp->f_pos += cnt; | 775 | *ppos += cnt; |
775 | 776 | ||
776 | return cnt; | 777 | return cnt; |
777 | } | 778 | } |
@@ -813,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
813 | * default: 0.25ms | 814 | * default: 0.25ms |
814 | */ | 815 | */ |
815 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
816 | 818 | ||
817 | /* | 819 | /* |
818 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -891,7 +893,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
891 | */ | 893 | */ |
892 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | 894 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); |
893 | 895 | ||
894 | spin_unlock_irq(&rq->lock); | 896 | raw_spin_unlock_irq(&rq->lock); |
895 | } | 897 | } |
896 | 898 | ||
897 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 899 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
@@ -915,9 +917,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
915 | next->oncpu = 1; | 917 | next->oncpu = 1; |
916 | #endif | 918 | #endif |
917 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 919 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
918 | spin_unlock_irq(&rq->lock); | 920 | raw_spin_unlock_irq(&rq->lock); |
919 | #else | 921 | #else |
920 | spin_unlock(&rq->lock); | 922 | raw_spin_unlock(&rq->lock); |
921 | #endif | 923 | #endif |
922 | } | 924 | } |
923 | 925 | ||
@@ -947,10 +949,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
947 | { | 949 | { |
948 | for (;;) { | 950 | for (;;) { |
949 | struct rq *rq = task_rq(p); | 951 | struct rq *rq = task_rq(p); |
950 | spin_lock(&rq->lock); | 952 | raw_spin_lock(&rq->lock); |
951 | if (likely(rq == task_rq(p))) | 953 | if (likely(rq == task_rq(p))) |
952 | return rq; | 954 | return rq; |
953 | spin_unlock(&rq->lock); | 955 | raw_spin_unlock(&rq->lock); |
954 | } | 956 | } |
955 | } | 957 | } |
956 | 958 | ||
@@ -967,10 +969,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
967 | for (;;) { | 969 | for (;;) { |
968 | local_irq_save(*flags); | 970 | local_irq_save(*flags); |
969 | rq = task_rq(p); | 971 | rq = task_rq(p); |
970 | spin_lock(&rq->lock); | 972 | raw_spin_lock(&rq->lock); |
971 | if (likely(rq == task_rq(p))) | 973 | if (likely(rq == task_rq(p))) |
972 | return rq; | 974 | return rq; |
973 | spin_unlock_irqrestore(&rq->lock, *flags); | 975 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
974 | } | 976 | } |
975 | } | 977 | } |
976 | 978 | ||
@@ -979,19 +981,19 @@ void task_rq_unlock_wait(struct task_struct *p) | |||
979 | struct rq *rq = task_rq(p); | 981 | struct rq *rq = task_rq(p); |
980 | 982 | ||
981 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ | 983 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ |
982 | spin_unlock_wait(&rq->lock); | 984 | raw_spin_unlock_wait(&rq->lock); |
983 | } | 985 | } |
984 | 986 | ||
985 | static void __task_rq_unlock(struct rq *rq) | 987 | static void __task_rq_unlock(struct rq *rq) |
986 | __releases(rq->lock) | 988 | __releases(rq->lock) |
987 | { | 989 | { |
988 | spin_unlock(&rq->lock); | 990 | raw_spin_unlock(&rq->lock); |
989 | } | 991 | } |
990 | 992 | ||
991 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 993 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) |
992 | __releases(rq->lock) | 994 | __releases(rq->lock) |
993 | { | 995 | { |
994 | spin_unlock_irqrestore(&rq->lock, *flags); | 996 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
995 | } | 997 | } |
996 | 998 | ||
997 | /* | 999 | /* |
@@ -1004,7 +1006,7 @@ static struct rq *this_rq_lock(void) | |||
1004 | 1006 | ||
1005 | local_irq_disable(); | 1007 | local_irq_disable(); |
1006 | rq = this_rq(); | 1008 | rq = this_rq(); |
1007 | spin_lock(&rq->lock); | 1009 | raw_spin_lock(&rq->lock); |
1008 | 1010 | ||
1009 | return rq; | 1011 | return rq; |
1010 | } | 1012 | } |
@@ -1051,10 +1053,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1051 | 1053 | ||
1052 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); | 1054 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); |
1053 | 1055 | ||
1054 | spin_lock(&rq->lock); | 1056 | raw_spin_lock(&rq->lock); |
1055 | update_rq_clock(rq); | 1057 | update_rq_clock(rq); |
1056 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); | 1058 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); |
1057 | spin_unlock(&rq->lock); | 1059 | raw_spin_unlock(&rq->lock); |
1058 | 1060 | ||
1059 | return HRTIMER_NORESTART; | 1061 | return HRTIMER_NORESTART; |
1060 | } | 1062 | } |
@@ -1067,10 +1069,10 @@ static void __hrtick_start(void *arg) | |||
1067 | { | 1069 | { |
1068 | struct rq *rq = arg; | 1070 | struct rq *rq = arg; |
1069 | 1071 | ||
1070 | spin_lock(&rq->lock); | 1072 | raw_spin_lock(&rq->lock); |
1071 | hrtimer_restart(&rq->hrtick_timer); | 1073 | hrtimer_restart(&rq->hrtick_timer); |
1072 | rq->hrtick_csd_pending = 0; | 1074 | rq->hrtick_csd_pending = 0; |
1073 | spin_unlock(&rq->lock); | 1075 | raw_spin_unlock(&rq->lock); |
1074 | } | 1076 | } |
1075 | 1077 | ||
1076 | /* | 1078 | /* |
@@ -1177,7 +1179,7 @@ static void resched_task(struct task_struct *p) | |||
1177 | { | 1179 | { |
1178 | int cpu; | 1180 | int cpu; |
1179 | 1181 | ||
1180 | assert_spin_locked(&task_rq(p)->lock); | 1182 | assert_raw_spin_locked(&task_rq(p)->lock); |
1181 | 1183 | ||
1182 | if (test_tsk_need_resched(p)) | 1184 | if (test_tsk_need_resched(p)) |
1183 | return; | 1185 | return; |
@@ -1199,10 +1201,10 @@ static void resched_cpu(int cpu) | |||
1199 | struct rq *rq = cpu_rq(cpu); | 1201 | struct rq *rq = cpu_rq(cpu); |
1200 | unsigned long flags; | 1202 | unsigned long flags; |
1201 | 1203 | ||
1202 | if (!spin_trylock_irqsave(&rq->lock, flags)) | 1204 | if (!raw_spin_trylock_irqsave(&rq->lock, flags)) |
1203 | return; | 1205 | return; |
1204 | resched_task(cpu_curr(cpu)); | 1206 | resched_task(cpu_curr(cpu)); |
1205 | spin_unlock_irqrestore(&rq->lock, flags); | 1207 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1206 | } | 1208 | } |
1207 | 1209 | ||
1208 | #ifdef CONFIG_NO_HZ | 1210 | #ifdef CONFIG_NO_HZ |
@@ -1271,7 +1273,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | |||
1271 | #else /* !CONFIG_SMP */ | 1273 | #else /* !CONFIG_SMP */ |
1272 | static void resched_task(struct task_struct *p) | 1274 | static void resched_task(struct task_struct *p) |
1273 | { | 1275 | { |
1274 | assert_spin_locked(&task_rq(p)->lock); | 1276 | assert_raw_spin_locked(&task_rq(p)->lock); |
1275 | set_tsk_need_resched(p); | 1277 | set_tsk_need_resched(p); |
1276 | } | 1278 | } |
1277 | 1279 | ||
@@ -1598,11 +1600,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1598 | struct rq *rq = cpu_rq(cpu); | 1600 | struct rq *rq = cpu_rq(cpu); |
1599 | unsigned long flags; | 1601 | unsigned long flags; |
1600 | 1602 | ||
1601 | spin_lock_irqsave(&rq->lock, flags); | 1603 | raw_spin_lock_irqsave(&rq->lock, flags); |
1602 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | 1604 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; |
1603 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1605 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; |
1604 | __set_se_shares(tg->se[cpu], shares); | 1606 | __set_se_shares(tg->se[cpu], shares); |
1605 | spin_unlock_irqrestore(&rq->lock, flags); | 1607 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1606 | } | 1608 | } |
1607 | } | 1609 | } |
1608 | 1610 | ||
@@ -1613,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1613 | */ | 1615 | */ |
1614 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1615 | { | 1617 | { |
1616 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1617 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
1618 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1619 | unsigned long flags; | 1621 | unsigned long flags; |
@@ -1629,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1629 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1630 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1631 | 1633 | ||
1634 | rq_weight += weight; | ||
1632 | /* | 1635 | /* |
1633 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1634 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1637,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1637 | if (!weight) | 1640 | if (!weight) |
1638 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1639 | 1642 | ||
1640 | rq_weight += weight; | 1643 | sum_weight += weight; |
1641 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1642 | } | 1645 | } |
1643 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1644 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1645 | shares = tg->shares; | 1651 | shares = tg->shares; |
1646 | 1652 | ||
@@ -1700,9 +1706,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1700 | if (root_task_group_empty()) | 1706 | if (root_task_group_empty()) |
1701 | return; | 1707 | return; |
1702 | 1708 | ||
1703 | spin_unlock(&rq->lock); | 1709 | raw_spin_unlock(&rq->lock); |
1704 | update_shares(sd); | 1710 | update_shares(sd); |
1705 | spin_lock(&rq->lock); | 1711 | raw_spin_lock(&rq->lock); |
1706 | } | 1712 | } |
1707 | 1713 | ||
1708 | static void update_h_load(long cpu) | 1714 | static void update_h_load(long cpu) |
@@ -1742,7 +1748,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1742 | __acquires(busiest->lock) | 1748 | __acquires(busiest->lock) |
1743 | __acquires(this_rq->lock) | 1749 | __acquires(this_rq->lock) |
1744 | { | 1750 | { |
1745 | spin_unlock(&this_rq->lock); | 1751 | raw_spin_unlock(&this_rq->lock); |
1746 | double_rq_lock(this_rq, busiest); | 1752 | double_rq_lock(this_rq, busiest); |
1747 | 1753 | ||
1748 | return 1; | 1754 | return 1; |
@@ -1763,14 +1769,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1763 | { | 1769 | { |
1764 | int ret = 0; | 1770 | int ret = 0; |
1765 | 1771 | ||
1766 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1772 | if (unlikely(!raw_spin_trylock(&busiest->lock))) { |
1767 | if (busiest < this_rq) { | 1773 | if (busiest < this_rq) { |
1768 | spin_unlock(&this_rq->lock); | 1774 | raw_spin_unlock(&this_rq->lock); |
1769 | spin_lock(&busiest->lock); | 1775 | raw_spin_lock(&busiest->lock); |
1770 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | 1776 | raw_spin_lock_nested(&this_rq->lock, |
1777 | SINGLE_DEPTH_NESTING); | ||
1771 | ret = 1; | 1778 | ret = 1; |
1772 | } else | 1779 | } else |
1773 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | 1780 | raw_spin_lock_nested(&busiest->lock, |
1781 | SINGLE_DEPTH_NESTING); | ||
1774 | } | 1782 | } |
1775 | return ret; | 1783 | return ret; |
1776 | } | 1784 | } |
@@ -1784,7 +1792,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1784 | { | 1792 | { |
1785 | if (unlikely(!irqs_disabled())) { | 1793 | if (unlikely(!irqs_disabled())) { |
1786 | /* printk() doesn't work good under rq->lock */ | 1794 | /* printk() doesn't work good under rq->lock */ |
1787 | spin_unlock(&this_rq->lock); | 1795 | raw_spin_unlock(&this_rq->lock); |
1788 | BUG_ON(1); | 1796 | BUG_ON(1); |
1789 | } | 1797 | } |
1790 | 1798 | ||
@@ -1794,7 +1802,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1794 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | 1802 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) |
1795 | __releases(busiest->lock) | 1803 | __releases(busiest->lock) |
1796 | { | 1804 | { |
1797 | spin_unlock(&busiest->lock); | 1805 | raw_spin_unlock(&busiest->lock); |
1798 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | 1806 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); |
1799 | } | 1807 | } |
1800 | #endif | 1808 | #endif |
@@ -1809,6 +1817,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1809 | #endif | 1817 | #endif |
1810 | 1818 | ||
1811 | static void calc_load_account_active(struct rq *this_rq); | 1819 | static void calc_load_account_active(struct rq *this_rq); |
1820 | static void update_sysctl(void); | ||
1821 | static int get_update_sysctl_factor(void); | ||
1822 | |||
1823 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1824 | { | ||
1825 | set_task_rq(p, cpu); | ||
1826 | #ifdef CONFIG_SMP | ||
1827 | /* | ||
1828 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1829 | * successfuly executed on another CPU. We must ensure that updates of | ||
1830 | * per-task data have been completed by this moment. | ||
1831 | */ | ||
1832 | smp_wmb(); | ||
1833 | task_thread_info(p)->cpu = cpu; | ||
1834 | #endif | ||
1835 | } | ||
1812 | 1836 | ||
1813 | #include "sched_stats.h" | 1837 | #include "sched_stats.h" |
1814 | #include "sched_idletask.c" | 1838 | #include "sched_idletask.c" |
@@ -1966,20 +1990,6 @@ inline int task_curr(const struct task_struct *p) | |||
1966 | return cpu_curr(task_cpu(p)) == p; | 1990 | return cpu_curr(task_cpu(p)) == p; |
1967 | } | 1991 | } |
1968 | 1992 | ||
1969 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1970 | { | ||
1971 | set_task_rq(p, cpu); | ||
1972 | #ifdef CONFIG_SMP | ||
1973 | /* | ||
1974 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1975 | * successfuly executed on another CPU. We must ensure that updates of | ||
1976 | * per-task data have been completed by this moment. | ||
1977 | */ | ||
1978 | smp_wmb(); | ||
1979 | task_thread_info(p)->cpu = cpu; | ||
1980 | #endif | ||
1981 | } | ||
1982 | |||
1983 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1993 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1984 | const struct sched_class *prev_class, | 1994 | const struct sched_class *prev_class, |
1985 | int oldprio, int running) | 1995 | int oldprio, int running) |
@@ -1992,6 +2002,39 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1992 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2002 | p->sched_class->prio_changed(rq, p, oldprio, running); |
1993 | } | 2003 | } |
1994 | 2004 | ||
2005 | /** | ||
2006 | * kthread_bind - bind a just-created kthread to a cpu. | ||
2007 | * @p: thread created by kthread_create(). | ||
2008 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
2009 | * | ||
2010 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2011 | * except that @cpu doesn't need to be online, and the thread must be | ||
2012 | * stopped (i.e., just returned from kthread_create()). | ||
2013 | * | ||
2014 | * Function lives here instead of kthread.c because it messes with | ||
2015 | * scheduler internals which require locking. | ||
2016 | */ | ||
2017 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2018 | { | ||
2019 | struct rq *rq = cpu_rq(cpu); | ||
2020 | unsigned long flags; | ||
2021 | |||
2022 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2023 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2024 | WARN_ON(1); | ||
2025 | return; | ||
2026 | } | ||
2027 | |||
2028 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
2029 | update_rq_clock(rq); | ||
2030 | set_task_cpu(p, cpu); | ||
2031 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2032 | p->rt.nr_cpus_allowed = 1; | ||
2033 | p->flags |= PF_THREAD_BOUND; | ||
2034 | raw_spin_unlock_irqrestore(&rq->lock, flags); | ||
2035 | } | ||
2036 | EXPORT_SYMBOL(kthread_bind); | ||
2037 | |||
1995 | #ifdef CONFIG_SMP | 2038 | #ifdef CONFIG_SMP |
1996 | /* | 2039 | /* |
1997 | * Is this task likely cache-hot: | 2040 | * Is this task likely cache-hot: |
@@ -2004,7 +2047,7 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2004 | /* | 2047 | /* |
2005 | * Buddy candidates are cache hot: | 2048 | * Buddy candidates are cache hot: |
2006 | */ | 2049 | */ |
2007 | if (sched_feat(CACHE_HOT_BUDDY) && | 2050 | if (sched_feat(CACHE_HOT_BUDDY) && this_rq()->nr_running && |
2008 | (&p->se == cfs_rq_of(&p->se)->next || | 2051 | (&p->se == cfs_rq_of(&p->se)->next || |
2009 | &p->se == cfs_rq_of(&p->se)->last)) | 2052 | &p->se == cfs_rq_of(&p->se)->last)) |
2010 | return 1; | 2053 | return 1; |
@@ -2026,30 +2069,13 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2026 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2069 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2027 | { | 2070 | { |
2028 | int old_cpu = task_cpu(p); | 2071 | int old_cpu = task_cpu(p); |
2029 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | ||
2030 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2072 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), |
2031 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2073 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); |
2032 | u64 clock_offset; | ||
2033 | |||
2034 | clock_offset = old_rq->clock - new_rq->clock; | ||
2035 | 2074 | ||
2036 | trace_sched_migrate_task(p, new_cpu); | 2075 | trace_sched_migrate_task(p, new_cpu); |
2037 | 2076 | ||
2038 | #ifdef CONFIG_SCHEDSTATS | ||
2039 | if (p->se.wait_start) | ||
2040 | p->se.wait_start -= clock_offset; | ||
2041 | if (p->se.sleep_start) | ||
2042 | p->se.sleep_start -= clock_offset; | ||
2043 | if (p->se.block_start) | ||
2044 | p->se.block_start -= clock_offset; | ||
2045 | #endif | ||
2046 | if (old_cpu != new_cpu) { | 2077 | if (old_cpu != new_cpu) { |
2047 | p->se.nr_migrations++; | 2078 | p->se.nr_migrations++; |
2048 | new_rq->nr_migrations_in++; | ||
2049 | #ifdef CONFIG_SCHEDSTATS | ||
2050 | if (task_hot(p, old_rq->clock, NULL)) | ||
2051 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2052 | #endif | ||
2053 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2079 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
2054 | 1, 1, NULL, 0); | 2080 | 1, 1, NULL, 0); |
2055 | } | 2081 | } |
@@ -2082,6 +2108,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2082 | * it is sufficient to simply update the task's cpu field. | 2108 | * it is sufficient to simply update the task's cpu field. |
2083 | */ | 2109 | */ |
2084 | if (!p->se.on_rq && !task_running(rq, p)) { | 2110 | if (!p->se.on_rq && !task_running(rq, p)) { |
2111 | update_rq_clock(rq); | ||
2085 | set_task_cpu(p, dest_cpu); | 2112 | set_task_cpu(p, dest_cpu); |
2086 | return 0; | 2113 | return 0; |
2087 | } | 2114 | } |
@@ -2289,6 +2316,14 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2289 | preempt_enable(); | 2316 | preempt_enable(); |
2290 | } | 2317 | } |
2291 | 2318 | ||
2319 | #ifdef CONFIG_SMP | ||
2320 | static inline | ||
2321 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2322 | { | ||
2323 | return p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2324 | } | ||
2325 | #endif | ||
2326 | |||
2292 | /*** | 2327 | /*** |
2293 | * try_to_wake_up - wake up a thread | 2328 | * try_to_wake_up - wake up a thread |
2294 | * @p: the to-be-woken-up thread | 2329 | * @p: the to-be-woken-up thread |
@@ -2340,16 +2375,14 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2340 | if (task_contributes_to_load(p)) | 2375 | if (task_contributes_to_load(p)) |
2341 | rq->nr_uninterruptible--; | 2376 | rq->nr_uninterruptible--; |
2342 | p->state = TASK_WAKING; | 2377 | p->state = TASK_WAKING; |
2343 | task_rq_unlock(rq, &flags); | 2378 | __task_rq_unlock(rq); |
2344 | 2379 | ||
2345 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2380 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
2346 | if (cpu != orig_cpu) | 2381 | if (cpu != orig_cpu) |
2347 | set_task_cpu(p, cpu); | 2382 | set_task_cpu(p, cpu); |
2348 | 2383 | ||
2349 | rq = task_rq_lock(p, &flags); | 2384 | rq = __task_rq_lock(p); |
2350 | 2385 | update_rq_clock(rq); | |
2351 | if (rq != orig_rq) | ||
2352 | update_rq_clock(rq); | ||
2353 | 2386 | ||
2354 | WARN_ON(p->state != TASK_WAKING); | 2387 | WARN_ON(p->state != TASK_WAKING); |
2355 | cpu = task_cpu(p); | 2388 | cpu = task_cpu(p); |
@@ -2407,6 +2440,17 @@ out_running: | |||
2407 | #ifdef CONFIG_SMP | 2440 | #ifdef CONFIG_SMP |
2408 | if (p->sched_class->task_wake_up) | 2441 | if (p->sched_class->task_wake_up) |
2409 | p->sched_class->task_wake_up(rq, p); | 2442 | p->sched_class->task_wake_up(rq, p); |
2443 | |||
2444 | if (unlikely(rq->idle_stamp)) { | ||
2445 | u64 delta = rq->clock - rq->idle_stamp; | ||
2446 | u64 max = 2*sysctl_sched_migration_cost; | ||
2447 | |||
2448 | if (delta > max) | ||
2449 | rq->avg_idle = max; | ||
2450 | else | ||
2451 | update_avg(&rq->avg_idle, delta); | ||
2452 | rq->idle_stamp = 0; | ||
2453 | } | ||
2410 | #endif | 2454 | #endif |
2411 | out: | 2455 | out: |
2412 | task_rq_unlock(rq, &flags); | 2456 | task_rq_unlock(rq, &flags); |
@@ -2453,7 +2497,6 @@ static void __sched_fork(struct task_struct *p) | |||
2453 | p->se.avg_overlap = 0; | 2497 | p->se.avg_overlap = 0; |
2454 | p->se.start_runtime = 0; | 2498 | p->se.start_runtime = 0; |
2455 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2499 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2456 | p->se.avg_running = 0; | ||
2457 | 2500 | ||
2458 | #ifdef CONFIG_SCHEDSTATS | 2501 | #ifdef CONFIG_SCHEDSTATS |
2459 | p->se.wait_start = 0; | 2502 | p->se.wait_start = 0; |
@@ -2475,7 +2518,6 @@ static void __sched_fork(struct task_struct *p) | |||
2475 | p->se.nr_failed_migrations_running = 0; | 2518 | p->se.nr_failed_migrations_running = 0; |
2476 | p->se.nr_failed_migrations_hot = 0; | 2519 | p->se.nr_failed_migrations_hot = 0; |
2477 | p->se.nr_forced_migrations = 0; | 2520 | p->se.nr_forced_migrations = 0; |
2478 | p->se.nr_forced2_migrations = 0; | ||
2479 | 2521 | ||
2480 | p->se.nr_wakeups = 0; | 2522 | p->se.nr_wakeups = 0; |
2481 | p->se.nr_wakeups_sync = 0; | 2523 | p->se.nr_wakeups_sync = 0; |
@@ -2545,8 +2587,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2545 | if (!rt_prio(p->prio)) | 2587 | if (!rt_prio(p->prio)) |
2546 | p->sched_class = &fair_sched_class; | 2588 | p->sched_class = &fair_sched_class; |
2547 | 2589 | ||
2590 | if (p->sched_class->task_fork) | ||
2591 | p->sched_class->task_fork(p); | ||
2592 | |||
2548 | #ifdef CONFIG_SMP | 2593 | #ifdef CONFIG_SMP |
2549 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2594 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
2550 | #endif | 2595 | #endif |
2551 | set_task_cpu(p, cpu); | 2596 | set_task_cpu(p, cpu); |
2552 | 2597 | ||
@@ -2581,17 +2626,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2581 | rq = task_rq_lock(p, &flags); | 2626 | rq = task_rq_lock(p, &flags); |
2582 | BUG_ON(p->state != TASK_RUNNING); | 2627 | BUG_ON(p->state != TASK_RUNNING); |
2583 | update_rq_clock(rq); | 2628 | update_rq_clock(rq); |
2584 | 2629 | activate_task(rq, p, 0); | |
2585 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2586 | activate_task(rq, p, 0); | ||
2587 | } else { | ||
2588 | /* | ||
2589 | * Let the scheduling class do new task startup | ||
2590 | * management (if any): | ||
2591 | */ | ||
2592 | p->sched_class->task_new(rq, p); | ||
2593 | inc_nr_running(rq); | ||
2594 | } | ||
2595 | trace_sched_wakeup_new(rq, p, 1); | 2630 | trace_sched_wakeup_new(rq, p, 1); |
2596 | check_preempt_curr(rq, p, WF_FORK); | 2631 | check_preempt_curr(rq, p, WF_FORK); |
2597 | #ifdef CONFIG_SMP | 2632 | #ifdef CONFIG_SMP |
@@ -2748,10 +2783,10 @@ static inline void post_schedule(struct rq *rq) | |||
2748 | if (rq->post_schedule) { | 2783 | if (rq->post_schedule) { |
2749 | unsigned long flags; | 2784 | unsigned long flags; |
2750 | 2785 | ||
2751 | spin_lock_irqsave(&rq->lock, flags); | 2786 | raw_spin_lock_irqsave(&rq->lock, flags); |
2752 | if (rq->curr->sched_class->post_schedule) | 2787 | if (rq->curr->sched_class->post_schedule) |
2753 | rq->curr->sched_class->post_schedule(rq); | 2788 | rq->curr->sched_class->post_schedule(rq); |
2754 | spin_unlock_irqrestore(&rq->lock, flags); | 2789 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
2755 | 2790 | ||
2756 | rq->post_schedule = 0; | 2791 | rq->post_schedule = 0; |
2757 | } | 2792 | } |
@@ -2815,14 +2850,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2815 | */ | 2850 | */ |
2816 | arch_start_context_switch(prev); | 2851 | arch_start_context_switch(prev); |
2817 | 2852 | ||
2818 | if (unlikely(!mm)) { | 2853 | if (likely(!mm)) { |
2819 | next->active_mm = oldmm; | 2854 | next->active_mm = oldmm; |
2820 | atomic_inc(&oldmm->mm_count); | 2855 | atomic_inc(&oldmm->mm_count); |
2821 | enter_lazy_tlb(oldmm, next); | 2856 | enter_lazy_tlb(oldmm, next); |
2822 | } else | 2857 | } else |
2823 | switch_mm(oldmm, mm, next); | 2858 | switch_mm(oldmm, mm, next); |
2824 | 2859 | ||
2825 | if (unlikely(!prev->mm)) { | 2860 | if (likely(!prev->mm)) { |
2826 | prev->active_mm = NULL; | 2861 | prev->active_mm = NULL; |
2827 | rq->prev_mm = oldmm; | 2862 | rq->prev_mm = oldmm; |
2828 | } | 2863 | } |
@@ -2985,15 +3020,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
2985 | } | 3020 | } |
2986 | 3021 | ||
2987 | /* | 3022 | /* |
2988 | * Externally visible per-cpu scheduler statistics: | ||
2989 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
2990 | */ | ||
2991 | u64 cpu_nr_migrations(int cpu) | ||
2992 | { | ||
2993 | return cpu_rq(cpu)->nr_migrations_in; | ||
2994 | } | ||
2995 | |||
2996 | /* | ||
2997 | * Update rq->cpu_load[] statistics. This function is usually called every | 3023 | * Update rq->cpu_load[] statistics. This function is usually called every |
2998 | * scheduler tick (TICK_NSEC). | 3024 | * scheduler tick (TICK_NSEC). |
2999 | */ | 3025 | */ |
@@ -3042,15 +3068,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
3042 | { | 3068 | { |
3043 | BUG_ON(!irqs_disabled()); | 3069 | BUG_ON(!irqs_disabled()); |
3044 | if (rq1 == rq2) { | 3070 | if (rq1 == rq2) { |
3045 | spin_lock(&rq1->lock); | 3071 | raw_spin_lock(&rq1->lock); |
3046 | __acquire(rq2->lock); /* Fake it out ;) */ | 3072 | __acquire(rq2->lock); /* Fake it out ;) */ |
3047 | } else { | 3073 | } else { |
3048 | if (rq1 < rq2) { | 3074 | if (rq1 < rq2) { |
3049 | spin_lock(&rq1->lock); | 3075 | raw_spin_lock(&rq1->lock); |
3050 | spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); | 3076 | raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); |
3051 | } else { | 3077 | } else { |
3052 | spin_lock(&rq2->lock); | 3078 | raw_spin_lock(&rq2->lock); |
3053 | spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | 3079 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); |
3054 | } | 3080 | } |
3055 | } | 3081 | } |
3056 | update_rq_clock(rq1); | 3082 | update_rq_clock(rq1); |
@@ -3067,9 +3093,9 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
3067 | __releases(rq1->lock) | 3093 | __releases(rq1->lock) |
3068 | __releases(rq2->lock) | 3094 | __releases(rq2->lock) |
3069 | { | 3095 | { |
3070 | spin_unlock(&rq1->lock); | 3096 | raw_spin_unlock(&rq1->lock); |
3071 | if (rq1 != rq2) | 3097 | if (rq1 != rq2) |
3072 | spin_unlock(&rq2->lock); | 3098 | raw_spin_unlock(&rq2->lock); |
3073 | else | 3099 | else |
3074 | __release(rq2->lock); | 3100 | __release(rq2->lock); |
3075 | } | 3101 | } |
@@ -3115,7 +3141,7 @@ out: | |||
3115 | void sched_exec(void) | 3141 | void sched_exec(void) |
3116 | { | 3142 | { |
3117 | int new_cpu, this_cpu = get_cpu(); | 3143 | int new_cpu, this_cpu = get_cpu(); |
3118 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | 3144 | new_cpu = select_task_rq(current, SD_BALANCE_EXEC, 0); |
3119 | put_cpu(); | 3145 | put_cpu(); |
3120 | if (new_cpu != this_cpu) | 3146 | if (new_cpu != this_cpu) |
3121 | sched_migrate_task(current, new_cpu); | 3147 | sched_migrate_task(current, new_cpu); |
@@ -3131,10 +3157,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3131 | deactivate_task(src_rq, p, 0); | 3157 | deactivate_task(src_rq, p, 0); |
3132 | set_task_cpu(p, this_cpu); | 3158 | set_task_cpu(p, this_cpu); |
3133 | activate_task(this_rq, p, 0); | 3159 | activate_task(this_rq, p, 0); |
3134 | /* | ||
3135 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3136 | * to be always true for them. | ||
3137 | */ | ||
3138 | check_preempt_curr(this_rq, p, 0); | 3160 | check_preempt_curr(this_rq, p, 0); |
3139 | } | 3161 | } |
3140 | 3162 | ||
@@ -4093,7 +4115,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4093 | unsigned long flags; | 4115 | unsigned long flags; |
4094 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4116 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4095 | 4117 | ||
4096 | cpumask_setall(cpus); | 4118 | cpumask_copy(cpus, cpu_active_mask); |
4097 | 4119 | ||
4098 | /* | 4120 | /* |
4099 | * When power savings policy is enabled for the parent domain, idle | 4121 | * When power savings policy is enabled for the parent domain, idle |
@@ -4166,14 +4188,15 @@ redo: | |||
4166 | 4188 | ||
4167 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 4189 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
4168 | 4190 | ||
4169 | spin_lock_irqsave(&busiest->lock, flags); | 4191 | raw_spin_lock_irqsave(&busiest->lock, flags); |
4170 | 4192 | ||
4171 | /* don't kick the migration_thread, if the curr | 4193 | /* don't kick the migration_thread, if the curr |
4172 | * task on busiest cpu can't be moved to this_cpu | 4194 | * task on busiest cpu can't be moved to this_cpu |
4173 | */ | 4195 | */ |
4174 | if (!cpumask_test_cpu(this_cpu, | 4196 | if (!cpumask_test_cpu(this_cpu, |
4175 | &busiest->curr->cpus_allowed)) { | 4197 | &busiest->curr->cpus_allowed)) { |
4176 | spin_unlock_irqrestore(&busiest->lock, flags); | 4198 | raw_spin_unlock_irqrestore(&busiest->lock, |
4199 | flags); | ||
4177 | all_pinned = 1; | 4200 | all_pinned = 1; |
4178 | goto out_one_pinned; | 4201 | goto out_one_pinned; |
4179 | } | 4202 | } |
@@ -4183,7 +4206,7 @@ redo: | |||
4183 | busiest->push_cpu = this_cpu; | 4206 | busiest->push_cpu = this_cpu; |
4184 | active_balance = 1; | 4207 | active_balance = 1; |
4185 | } | 4208 | } |
4186 | spin_unlock_irqrestore(&busiest->lock, flags); | 4209 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
4187 | if (active_balance) | 4210 | if (active_balance) |
4188 | wake_up_process(busiest->migration_thread); | 4211 | wake_up_process(busiest->migration_thread); |
4189 | 4212 | ||
@@ -4256,7 +4279,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4256 | int all_pinned = 0; | 4279 | int all_pinned = 0; |
4257 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4280 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4258 | 4281 | ||
4259 | cpumask_setall(cpus); | 4282 | cpumask_copy(cpus, cpu_active_mask); |
4260 | 4283 | ||
4261 | /* | 4284 | /* |
4262 | * When power savings policy is enabled for the parent domain, idle | 4285 | * When power savings policy is enabled for the parent domain, idle |
@@ -4365,10 +4388,10 @@ redo: | |||
4365 | /* | 4388 | /* |
4366 | * Should not call ttwu while holding a rq->lock | 4389 | * Should not call ttwu while holding a rq->lock |
4367 | */ | 4390 | */ |
4368 | spin_unlock(&this_rq->lock); | 4391 | raw_spin_unlock(&this_rq->lock); |
4369 | if (active_balance) | 4392 | if (active_balance) |
4370 | wake_up_process(busiest->migration_thread); | 4393 | wake_up_process(busiest->migration_thread); |
4371 | spin_lock(&this_rq->lock); | 4394 | raw_spin_lock(&this_rq->lock); |
4372 | 4395 | ||
4373 | } else | 4396 | } else |
4374 | sd->nr_balance_failed = 0; | 4397 | sd->nr_balance_failed = 0; |
@@ -4396,6 +4419,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4396 | int pulled_task = 0; | 4419 | int pulled_task = 0; |
4397 | unsigned long next_balance = jiffies + HZ; | 4420 | unsigned long next_balance = jiffies + HZ; |
4398 | 4421 | ||
4422 | this_rq->idle_stamp = this_rq->clock; | ||
4423 | |||
4424 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | ||
4425 | return; | ||
4426 | |||
4399 | for_each_domain(this_cpu, sd) { | 4427 | for_each_domain(this_cpu, sd) { |
4400 | unsigned long interval; | 4428 | unsigned long interval; |
4401 | 4429 | ||
@@ -4410,8 +4438,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4410 | interval = msecs_to_jiffies(sd->balance_interval); | 4438 | interval = msecs_to_jiffies(sd->balance_interval); |
4411 | if (time_after(next_balance, sd->last_balance + interval)) | 4439 | if (time_after(next_balance, sd->last_balance + interval)) |
4412 | next_balance = sd->last_balance + interval; | 4440 | next_balance = sd->last_balance + interval; |
4413 | if (pulled_task) | 4441 | if (pulled_task) { |
4442 | this_rq->idle_stamp = 0; | ||
4414 | break; | 4443 | break; |
4444 | } | ||
4415 | } | 4445 | } |
4416 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 4446 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
4417 | /* | 4447 | /* |
@@ -4646,7 +4676,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4646 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4676 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4647 | 4677 | ||
4648 | /* time for ilb owner also to sleep */ | 4678 | /* time for ilb owner also to sleep */ |
4649 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4679 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4650 | if (atomic_read(&nohz.load_balancer) == cpu) | 4680 | if (atomic_read(&nohz.load_balancer) == cpu) |
4651 | atomic_set(&nohz.load_balancer, -1); | 4681 | atomic_set(&nohz.load_balancer, -1); |
4652 | return 0; | 4682 | return 0; |
@@ -5013,8 +5043,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
5013 | p->gtime = cputime_add(p->gtime, cputime); | 5043 | p->gtime = cputime_add(p->gtime, cputime); |
5014 | 5044 | ||
5015 | /* Add guest time to cpustat. */ | 5045 | /* Add guest time to cpustat. */ |
5016 | cpustat->user = cputime64_add(cpustat->user, tmp); | 5046 | if (TASK_NICE(p) > 0) { |
5017 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | 5047 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
5048 | cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); | ||
5049 | } else { | ||
5050 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
5051 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
5052 | } | ||
5018 | } | 5053 | } |
5019 | 5054 | ||
5020 | /* | 5055 | /* |
@@ -5129,60 +5164,86 @@ void account_idle_ticks(unsigned long ticks) | |||
5129 | * Use precise platform statistics if available: | 5164 | * Use precise platform statistics if available: |
5130 | */ | 5165 | */ |
5131 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 5166 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
5132 | cputime_t task_utime(struct task_struct *p) | 5167 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5133 | { | 5168 | { |
5134 | return p->utime; | 5169 | *ut = p->utime; |
5170 | *st = p->stime; | ||
5135 | } | 5171 | } |
5136 | 5172 | ||
5137 | cputime_t task_stime(struct task_struct *p) | 5173 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5138 | { | 5174 | { |
5139 | return p->stime; | 5175 | struct task_cputime cputime; |
5176 | |||
5177 | thread_group_cputime(p, &cputime); | ||
5178 | |||
5179 | *ut = cputime.utime; | ||
5180 | *st = cputime.stime; | ||
5140 | } | 5181 | } |
5141 | #else | 5182 | #else |
5142 | cputime_t task_utime(struct task_struct *p) | 5183 | |
5184 | #ifndef nsecs_to_cputime | ||
5185 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
5186 | #endif | ||
5187 | |||
5188 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5143 | { | 5189 | { |
5144 | clock_t utime = cputime_to_clock_t(p->utime), | 5190 | cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); |
5145 | total = utime + cputime_to_clock_t(p->stime); | ||
5146 | u64 temp; | ||
5147 | 5191 | ||
5148 | /* | 5192 | /* |
5149 | * Use CFS's precise accounting: | 5193 | * Use CFS's precise accounting: |
5150 | */ | 5194 | */ |
5151 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | 5195 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
5152 | 5196 | ||
5153 | if (total) { | 5197 | if (total) { |
5154 | temp *= utime; | 5198 | u64 temp; |
5199 | |||
5200 | temp = (u64)(rtime * utime); | ||
5155 | do_div(temp, total); | 5201 | do_div(temp, total); |
5156 | } | 5202 | utime = (cputime_t)temp; |
5157 | utime = (clock_t)temp; | 5203 | } else |
5204 | utime = rtime; | ||
5205 | |||
5206 | /* | ||
5207 | * Compare with previous values, to keep monotonicity: | ||
5208 | */ | ||
5209 | p->prev_utime = max(p->prev_utime, utime); | ||
5210 | p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); | ||
5158 | 5211 | ||
5159 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | 5212 | *ut = p->prev_utime; |
5160 | return p->prev_utime; | 5213 | *st = p->prev_stime; |
5161 | } | 5214 | } |
5162 | 5215 | ||
5163 | cputime_t task_stime(struct task_struct *p) | 5216 | /* |
5217 | * Must be called with siglock held. | ||
5218 | */ | ||
5219 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5164 | { | 5220 | { |
5165 | clock_t stime; | 5221 | struct signal_struct *sig = p->signal; |
5222 | struct task_cputime cputime; | ||
5223 | cputime_t rtime, utime, total; | ||
5166 | 5224 | ||
5167 | /* | 5225 | thread_group_cputime(p, &cputime); |
5168 | * Use CFS's precise accounting. (we subtract utime from | ||
5169 | * the total, to make sure the total observed by userspace | ||
5170 | * grows monotonically - apps rely on that): | ||
5171 | */ | ||
5172 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
5173 | cputime_to_clock_t(task_utime(p)); | ||
5174 | 5226 | ||
5175 | if (stime >= 0) | 5227 | total = cputime_add(cputime.utime, cputime.stime); |
5176 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | 5228 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
5177 | 5229 | ||
5178 | return p->prev_stime; | 5230 | if (total) { |
5179 | } | 5231 | u64 temp; |
5180 | #endif | ||
5181 | 5232 | ||
5182 | inline cputime_t task_gtime(struct task_struct *p) | 5233 | temp = (u64)(rtime * cputime.utime); |
5183 | { | 5234 | do_div(temp, total); |
5184 | return p->gtime; | 5235 | utime = (cputime_t)temp; |
5236 | } else | ||
5237 | utime = rtime; | ||
5238 | |||
5239 | sig->prev_utime = max(sig->prev_utime, utime); | ||
5240 | sig->prev_stime = max(sig->prev_stime, | ||
5241 | cputime_sub(rtime, sig->prev_utime)); | ||
5242 | |||
5243 | *ut = sig->prev_utime; | ||
5244 | *st = sig->prev_stime; | ||
5185 | } | 5245 | } |
5246 | #endif | ||
5186 | 5247 | ||
5187 | /* | 5248 | /* |
5188 | * This function gets called by the timer code, with HZ frequency. | 5249 | * This function gets called by the timer code, with HZ frequency. |
@@ -5199,11 +5260,11 @@ void scheduler_tick(void) | |||
5199 | 5260 | ||
5200 | sched_clock_tick(); | 5261 | sched_clock_tick(); |
5201 | 5262 | ||
5202 | spin_lock(&rq->lock); | 5263 | raw_spin_lock(&rq->lock); |
5203 | update_rq_clock(rq); | 5264 | update_rq_clock(rq); |
5204 | update_cpu_load(rq); | 5265 | update_cpu_load(rq); |
5205 | curr->sched_class->task_tick(rq, curr, 0); | 5266 | curr->sched_class->task_tick(rq, curr, 0); |
5206 | spin_unlock(&rq->lock); | 5267 | raw_spin_unlock(&rq->lock); |
5207 | 5268 | ||
5208 | perf_event_task_tick(curr, cpu); | 5269 | perf_event_task_tick(curr, cpu); |
5209 | 5270 | ||
@@ -5317,13 +5378,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5317 | #endif | 5378 | #endif |
5318 | } | 5379 | } |
5319 | 5380 | ||
5320 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5381 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5321 | { | 5382 | { |
5322 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5383 | if (prev->state == TASK_RUNNING) { |
5384 | u64 runtime = prev->se.sum_exec_runtime; | ||
5323 | 5385 | ||
5324 | update_avg(&p->se.avg_running, runtime); | 5386 | runtime -= prev->se.prev_sum_exec_runtime; |
5387 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5325 | 5388 | ||
5326 | if (p->state == TASK_RUNNING) { | ||
5327 | /* | 5389 | /* |
5328 | * In order to avoid avg_overlap growing stale when we are | 5390 | * In order to avoid avg_overlap growing stale when we are |
5329 | * indeed overlapping and hence not getting put to sleep, grow | 5391 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5333,12 +5395,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5333 | * correlates to the amount of cache footprint a task can | 5395 | * correlates to the amount of cache footprint a task can |
5334 | * build up. | 5396 | * build up. |
5335 | */ | 5397 | */ |
5336 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5398 | update_avg(&prev->se.avg_overlap, runtime); |
5337 | update_avg(&p->se.avg_overlap, runtime); | ||
5338 | } else { | ||
5339 | update_avg(&p->se.avg_running, 0); | ||
5340 | } | 5399 | } |
5341 | p->sched_class->put_prev_task(rq, p); | 5400 | prev->sched_class->put_prev_task(rq, prev); |
5342 | } | 5401 | } |
5343 | 5402 | ||
5344 | /* | 5403 | /* |
@@ -5399,7 +5458,7 @@ need_resched_nonpreemptible: | |||
5399 | if (sched_feat(HRTICK)) | 5458 | if (sched_feat(HRTICK)) |
5400 | hrtick_clear(rq); | 5459 | hrtick_clear(rq); |
5401 | 5460 | ||
5402 | spin_lock_irq(&rq->lock); | 5461 | raw_spin_lock_irq(&rq->lock); |
5403 | update_rq_clock(rq); | 5462 | update_rq_clock(rq); |
5404 | clear_tsk_need_resched(prev); | 5463 | clear_tsk_need_resched(prev); |
5405 | 5464 | ||
@@ -5435,7 +5494,7 @@ need_resched_nonpreemptible: | |||
5435 | cpu = smp_processor_id(); | 5494 | cpu = smp_processor_id(); |
5436 | rq = cpu_rq(cpu); | 5495 | rq = cpu_rq(cpu); |
5437 | } else | 5496 | } else |
5438 | spin_unlock_irq(&rq->lock); | 5497 | raw_spin_unlock_irq(&rq->lock); |
5439 | 5498 | ||
5440 | post_schedule(rq); | 5499 | post_schedule(rq); |
5441 | 5500 | ||
@@ -5448,7 +5507,7 @@ need_resched_nonpreemptible: | |||
5448 | } | 5507 | } |
5449 | EXPORT_SYMBOL(schedule); | 5508 | EXPORT_SYMBOL(schedule); |
5450 | 5509 | ||
5451 | #ifdef CONFIG_SMP | 5510 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
5452 | /* | 5511 | /* |
5453 | * Look out! "owner" is an entirely speculative pointer | 5512 | * Look out! "owner" is an entirely speculative pointer |
5454 | * access and not reliable. | 5513 | * access and not reliable. |
@@ -6142,22 +6201,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
6142 | BUG_ON(p->se.on_rq); | 6201 | BUG_ON(p->se.on_rq); |
6143 | 6202 | ||
6144 | p->policy = policy; | 6203 | p->policy = policy; |
6145 | switch (p->policy) { | ||
6146 | case SCHED_NORMAL: | ||
6147 | case SCHED_BATCH: | ||
6148 | case SCHED_IDLE: | ||
6149 | p->sched_class = &fair_sched_class; | ||
6150 | break; | ||
6151 | case SCHED_FIFO: | ||
6152 | case SCHED_RR: | ||
6153 | p->sched_class = &rt_sched_class; | ||
6154 | break; | ||
6155 | } | ||
6156 | |||
6157 | p->rt_priority = prio; | 6204 | p->rt_priority = prio; |
6158 | p->normal_prio = normal_prio(p); | 6205 | p->normal_prio = normal_prio(p); |
6159 | /* we are holding p->pi_lock already */ | 6206 | /* we are holding p->pi_lock already */ |
6160 | p->prio = rt_mutex_getprio(p); | 6207 | p->prio = rt_mutex_getprio(p); |
6208 | if (rt_prio(p->prio)) | ||
6209 | p->sched_class = &rt_sched_class; | ||
6210 | else | ||
6211 | p->sched_class = &fair_sched_class; | ||
6161 | set_load_weight(p); | 6212 | set_load_weight(p); |
6162 | } | 6213 | } |
6163 | 6214 | ||
@@ -6272,7 +6323,7 @@ recheck: | |||
6272 | * make sure no PI-waiters arrive (or leave) while we are | 6323 | * make sure no PI-waiters arrive (or leave) while we are |
6273 | * changing the priority of the task: | 6324 | * changing the priority of the task: |
6274 | */ | 6325 | */ |
6275 | spin_lock_irqsave(&p->pi_lock, flags); | 6326 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
6276 | /* | 6327 | /* |
6277 | * To be able to change p->policy safely, the apropriate | 6328 | * To be able to change p->policy safely, the apropriate |
6278 | * runqueue lock must be held. | 6329 | * runqueue lock must be held. |
@@ -6282,7 +6333,7 @@ recheck: | |||
6282 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 6333 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
6283 | policy = oldpolicy = -1; | 6334 | policy = oldpolicy = -1; |
6284 | __task_rq_unlock(rq); | 6335 | __task_rq_unlock(rq); |
6285 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6336 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6286 | goto recheck; | 6337 | goto recheck; |
6287 | } | 6338 | } |
6288 | update_rq_clock(rq); | 6339 | update_rq_clock(rq); |
@@ -6306,7 +6357,7 @@ recheck: | |||
6306 | check_class_changed(rq, p, prev_class, oldprio, running); | 6357 | check_class_changed(rq, p, prev_class, oldprio, running); |
6307 | } | 6358 | } |
6308 | __task_rq_unlock(rq); | 6359 | __task_rq_unlock(rq); |
6309 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6360 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6310 | 6361 | ||
6311 | rt_mutex_adjust_pi(p); | 6362 | rt_mutex_adjust_pi(p); |
6312 | 6363 | ||
@@ -6560,6 +6611,8 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6560 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6611 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6561 | { | 6612 | { |
6562 | struct task_struct *p; | 6613 | struct task_struct *p; |
6614 | unsigned long flags; | ||
6615 | struct rq *rq; | ||
6563 | int retval; | 6616 | int retval; |
6564 | 6617 | ||
6565 | get_online_cpus(); | 6618 | get_online_cpus(); |
@@ -6574,7 +6627,9 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6574 | if (retval) | 6627 | if (retval) |
6575 | goto out_unlock; | 6628 | goto out_unlock; |
6576 | 6629 | ||
6630 | rq = task_rq_lock(p, &flags); | ||
6577 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6631 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6632 | task_rq_unlock(rq, &flags); | ||
6578 | 6633 | ||
6579 | out_unlock: | 6634 | out_unlock: |
6580 | read_unlock(&tasklist_lock); | 6635 | read_unlock(&tasklist_lock); |
@@ -6632,7 +6687,7 @@ SYSCALL_DEFINE0(sched_yield) | |||
6632 | */ | 6687 | */ |
6633 | __release(rq->lock); | 6688 | __release(rq->lock); |
6634 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 6689 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
6635 | _raw_spin_unlock(&rq->lock); | 6690 | do_raw_spin_unlock(&rq->lock); |
6636 | preempt_enable_no_resched(); | 6691 | preempt_enable_no_resched(); |
6637 | 6692 | ||
6638 | schedule(); | 6693 | schedule(); |
@@ -6812,6 +6867,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6812 | { | 6867 | { |
6813 | struct task_struct *p; | 6868 | struct task_struct *p; |
6814 | unsigned int time_slice; | 6869 | unsigned int time_slice; |
6870 | unsigned long flags; | ||
6871 | struct rq *rq; | ||
6815 | int retval; | 6872 | int retval; |
6816 | struct timespec t; | 6873 | struct timespec t; |
6817 | 6874 | ||
@@ -6828,7 +6885,9 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6828 | if (retval) | 6885 | if (retval) |
6829 | goto out_unlock; | 6886 | goto out_unlock; |
6830 | 6887 | ||
6831 | time_slice = p->sched_class->get_rr_interval(p); | 6888 | rq = task_rq_lock(p, &flags); |
6889 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6890 | task_rq_unlock(rq, &flags); | ||
6832 | 6891 | ||
6833 | read_unlock(&tasklist_lock); | 6892 | read_unlock(&tasklist_lock); |
6834 | jiffies_to_timespec(time_slice, &t); | 6893 | jiffies_to_timespec(time_slice, &t); |
@@ -6902,7 +6961,7 @@ void show_state_filter(unsigned long state_filter) | |||
6902 | /* | 6961 | /* |
6903 | * Only show locks if all tasks are dumped: | 6962 | * Only show locks if all tasks are dumped: |
6904 | */ | 6963 | */ |
6905 | if (state_filter == -1) | 6964 | if (!state_filter) |
6906 | debug_show_all_locks(); | 6965 | debug_show_all_locks(); |
6907 | } | 6966 | } |
6908 | 6967 | ||
@@ -6924,12 +6983,11 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6924 | struct rq *rq = cpu_rq(cpu); | 6983 | struct rq *rq = cpu_rq(cpu); |
6925 | unsigned long flags; | 6984 | unsigned long flags; |
6926 | 6985 | ||
6927 | spin_lock_irqsave(&rq->lock, flags); | 6986 | raw_spin_lock_irqsave(&rq->lock, flags); |
6928 | 6987 | ||
6929 | __sched_fork(idle); | 6988 | __sched_fork(idle); |
6930 | idle->se.exec_start = sched_clock(); | 6989 | idle->se.exec_start = sched_clock(); |
6931 | 6990 | ||
6932 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
6933 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 6991 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
6934 | __set_task_cpu(idle, cpu); | 6992 | __set_task_cpu(idle, cpu); |
6935 | 6993 | ||
@@ -6937,7 +6995,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6937 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 6995 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
6938 | idle->oncpu = 1; | 6996 | idle->oncpu = 1; |
6939 | #endif | 6997 | #endif |
6940 | spin_unlock_irqrestore(&rq->lock, flags); | 6998 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6941 | 6999 | ||
6942 | /* Set the preempt count _outside_ the spinlocks! */ | 7000 | /* Set the preempt count _outside_ the spinlocks! */ |
6943 | #if defined(CONFIG_PREEMPT) | 7001 | #if defined(CONFIG_PREEMPT) |
@@ -6970,22 +7028,43 @@ cpumask_var_t nohz_cpu_mask; | |||
6970 | * | 7028 | * |
6971 | * This idea comes from the SD scheduler of Con Kolivas: | 7029 | * This idea comes from the SD scheduler of Con Kolivas: |
6972 | */ | 7030 | */ |
6973 | static inline void sched_init_granularity(void) | 7031 | static int get_update_sysctl_factor(void) |
6974 | { | 7032 | { |
6975 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7033 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
6976 | const unsigned long limit = 200000000; | 7034 | unsigned int factor; |
7035 | |||
7036 | switch (sysctl_sched_tunable_scaling) { | ||
7037 | case SCHED_TUNABLESCALING_NONE: | ||
7038 | factor = 1; | ||
7039 | break; | ||
7040 | case SCHED_TUNABLESCALING_LINEAR: | ||
7041 | factor = cpus; | ||
7042 | break; | ||
7043 | case SCHED_TUNABLESCALING_LOG: | ||
7044 | default: | ||
7045 | factor = 1 + ilog2(cpus); | ||
7046 | break; | ||
7047 | } | ||
6977 | 7048 | ||
6978 | sysctl_sched_min_granularity *= factor; | 7049 | return factor; |
6979 | if (sysctl_sched_min_granularity > limit) | 7050 | } |
6980 | sysctl_sched_min_granularity = limit; | ||
6981 | 7051 | ||
6982 | sysctl_sched_latency *= factor; | 7052 | static void update_sysctl(void) |
6983 | if (sysctl_sched_latency > limit) | 7053 | { |
6984 | sysctl_sched_latency = limit; | 7054 | unsigned int factor = get_update_sysctl_factor(); |
6985 | 7055 | ||
6986 | sysctl_sched_wakeup_granularity *= factor; | 7056 | #define SET_SYSCTL(name) \ |
7057 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7058 | SET_SYSCTL(sched_min_granularity); | ||
7059 | SET_SYSCTL(sched_latency); | ||
7060 | SET_SYSCTL(sched_wakeup_granularity); | ||
7061 | SET_SYSCTL(sched_shares_ratelimit); | ||
7062 | #undef SET_SYSCTL | ||
7063 | } | ||
6987 | 7064 | ||
6988 | sysctl_sched_shares_ratelimit *= factor; | 7065 | static inline void sched_init_granularity(void) |
7066 | { | ||
7067 | update_sysctl(); | ||
6989 | } | 7068 | } |
6990 | 7069 | ||
6991 | #ifdef CONFIG_SMP | 7070 | #ifdef CONFIG_SMP |
@@ -7022,7 +7101,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7022 | int ret = 0; | 7101 | int ret = 0; |
7023 | 7102 | ||
7024 | rq = task_rq_lock(p, &flags); | 7103 | rq = task_rq_lock(p, &flags); |
7025 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7104 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
7026 | ret = -EINVAL; | 7105 | ret = -EINVAL; |
7027 | goto out; | 7106 | goto out; |
7028 | } | 7107 | } |
@@ -7044,7 +7123,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7044 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7123 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7045 | goto out; | 7124 | goto out; |
7046 | 7125 | ||
7047 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7126 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7048 | /* Need help from migration thread: drop lock and wait. */ | 7127 | /* Need help from migration thread: drop lock and wait. */ |
7049 | struct task_struct *mt = rq->migration_thread; | 7128 | struct task_struct *mt = rq->migration_thread; |
7050 | 7129 | ||
@@ -7133,10 +7212,10 @@ static int migration_thread(void *data) | |||
7133 | struct migration_req *req; | 7212 | struct migration_req *req; |
7134 | struct list_head *head; | 7213 | struct list_head *head; |
7135 | 7214 | ||
7136 | spin_lock_irq(&rq->lock); | 7215 | raw_spin_lock_irq(&rq->lock); |
7137 | 7216 | ||
7138 | if (cpu_is_offline(cpu)) { | 7217 | if (cpu_is_offline(cpu)) { |
7139 | spin_unlock_irq(&rq->lock); | 7218 | raw_spin_unlock_irq(&rq->lock); |
7140 | break; | 7219 | break; |
7141 | } | 7220 | } |
7142 | 7221 | ||
@@ -7148,7 +7227,7 @@ static int migration_thread(void *data) | |||
7148 | head = &rq->migration_queue; | 7227 | head = &rq->migration_queue; |
7149 | 7228 | ||
7150 | if (list_empty(head)) { | 7229 | if (list_empty(head)) { |
7151 | spin_unlock_irq(&rq->lock); | 7230 | raw_spin_unlock_irq(&rq->lock); |
7152 | schedule(); | 7231 | schedule(); |
7153 | set_current_state(TASK_INTERRUPTIBLE); | 7232 | set_current_state(TASK_INTERRUPTIBLE); |
7154 | continue; | 7233 | continue; |
@@ -7157,14 +7236,14 @@ static int migration_thread(void *data) | |||
7157 | list_del_init(head->next); | 7236 | list_del_init(head->next); |
7158 | 7237 | ||
7159 | if (req->task != NULL) { | 7238 | if (req->task != NULL) { |
7160 | spin_unlock(&rq->lock); | 7239 | raw_spin_unlock(&rq->lock); |
7161 | __migrate_task(req->task, cpu, req->dest_cpu); | 7240 | __migrate_task(req->task, cpu, req->dest_cpu); |
7162 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | 7241 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { |
7163 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | 7242 | req->dest_cpu = RCU_MIGRATION_GOT_QS; |
7164 | spin_unlock(&rq->lock); | 7243 | raw_spin_unlock(&rq->lock); |
7165 | } else { | 7244 | } else { |
7166 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | 7245 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; |
7167 | spin_unlock(&rq->lock); | 7246 | raw_spin_unlock(&rq->lock); |
7168 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | 7247 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); |
7169 | } | 7248 | } |
7170 | local_irq_enable(); | 7249 | local_irq_enable(); |
@@ -7198,19 +7277,19 @@ static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | |||
7198 | 7277 | ||
7199 | again: | 7278 | again: |
7200 | /* Look for allowed, online CPU in same node. */ | 7279 | /* Look for allowed, online CPU in same node. */ |
7201 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | 7280 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) |
7202 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7281 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7203 | goto move; | 7282 | goto move; |
7204 | 7283 | ||
7205 | /* Any allowed, online CPU? */ | 7284 | /* Any allowed, online CPU? */ |
7206 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | 7285 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); |
7207 | if (dest_cpu < nr_cpu_ids) | 7286 | if (dest_cpu < nr_cpu_ids) |
7208 | goto move; | 7287 | goto move; |
7209 | 7288 | ||
7210 | /* No more Mr. Nice Guy. */ | 7289 | /* No more Mr. Nice Guy. */ |
7211 | if (dest_cpu >= nr_cpu_ids) { | 7290 | if (dest_cpu >= nr_cpu_ids) { |
7212 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | 7291 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); |
7213 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | 7292 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); |
7214 | 7293 | ||
7215 | /* | 7294 | /* |
7216 | * Don't tell them about moving exiting tasks or | 7295 | * Don't tell them about moving exiting tasks or |
@@ -7239,7 +7318,7 @@ move: | |||
7239 | */ | 7318 | */ |
7240 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7319 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7241 | { | 7320 | { |
7242 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7321 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7243 | unsigned long flags; | 7322 | unsigned long flags; |
7244 | 7323 | ||
7245 | local_irq_save(flags); | 7324 | local_irq_save(flags); |
@@ -7287,14 +7366,14 @@ void sched_idle_next(void) | |||
7287 | * Strictly not necessary since rest of the CPUs are stopped by now | 7366 | * Strictly not necessary since rest of the CPUs are stopped by now |
7288 | * and interrupts disabled on the current cpu. | 7367 | * and interrupts disabled on the current cpu. |
7289 | */ | 7368 | */ |
7290 | spin_lock_irqsave(&rq->lock, flags); | 7369 | raw_spin_lock_irqsave(&rq->lock, flags); |
7291 | 7370 | ||
7292 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 7371 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
7293 | 7372 | ||
7294 | update_rq_clock(rq); | 7373 | update_rq_clock(rq); |
7295 | activate_task(rq, p, 0); | 7374 | activate_task(rq, p, 0); |
7296 | 7375 | ||
7297 | spin_unlock_irqrestore(&rq->lock, flags); | 7376 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7298 | } | 7377 | } |
7299 | 7378 | ||
7300 | /* | 7379 | /* |
@@ -7330,9 +7409,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
7330 | * that's OK. No task can be added to this CPU, so iteration is | 7409 | * that's OK. No task can be added to this CPU, so iteration is |
7331 | * fine. | 7410 | * fine. |
7332 | */ | 7411 | */ |
7333 | spin_unlock_irq(&rq->lock); | 7412 | raw_spin_unlock_irq(&rq->lock); |
7334 | move_task_off_dead_cpu(dead_cpu, p); | 7413 | move_task_off_dead_cpu(dead_cpu, p); |
7335 | spin_lock_irq(&rq->lock); | 7414 | raw_spin_lock_irq(&rq->lock); |
7336 | 7415 | ||
7337 | put_task_struct(p); | 7416 | put_task_struct(p); |
7338 | } | 7417 | } |
@@ -7373,17 +7452,16 @@ static struct ctl_table sd_ctl_dir[] = { | |||
7373 | .procname = "sched_domain", | 7452 | .procname = "sched_domain", |
7374 | .mode = 0555, | 7453 | .mode = 0555, |
7375 | }, | 7454 | }, |
7376 | {0, }, | 7455 | {} |
7377 | }; | 7456 | }; |
7378 | 7457 | ||
7379 | static struct ctl_table sd_ctl_root[] = { | 7458 | static struct ctl_table sd_ctl_root[] = { |
7380 | { | 7459 | { |
7381 | .ctl_name = CTL_KERN, | ||
7382 | .procname = "kernel", | 7460 | .procname = "kernel", |
7383 | .mode = 0555, | 7461 | .mode = 0555, |
7384 | .child = sd_ctl_dir, | 7462 | .child = sd_ctl_dir, |
7385 | }, | 7463 | }, |
7386 | {0, }, | 7464 | {} |
7387 | }; | 7465 | }; |
7388 | 7466 | ||
7389 | static struct ctl_table *sd_alloc_ctl_entry(int n) | 7467 | static struct ctl_table *sd_alloc_ctl_entry(int n) |
@@ -7493,7 +7571,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7493 | static struct ctl_table_header *sd_sysctl_header; | 7571 | static struct ctl_table_header *sd_sysctl_header; |
7494 | static void register_sched_domain_sysctl(void) | 7572 | static void register_sched_domain_sysctl(void) |
7495 | { | 7573 | { |
7496 | int i, cpu_num = num_online_cpus(); | 7574 | int i, cpu_num = num_possible_cpus(); |
7497 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7575 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7498 | char buf[32]; | 7576 | char buf[32]; |
7499 | 7577 | ||
@@ -7503,7 +7581,7 @@ static void register_sched_domain_sysctl(void) | |||
7503 | if (entry == NULL) | 7581 | if (entry == NULL) |
7504 | return; | 7582 | return; |
7505 | 7583 | ||
7506 | for_each_online_cpu(i) { | 7584 | for_each_possible_cpu(i) { |
7507 | snprintf(buf, 32, "cpu%d", i); | 7585 | snprintf(buf, 32, "cpu%d", i); |
7508 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7586 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7509 | entry->mode = 0555; | 7587 | entry->mode = 0555; |
@@ -7599,13 +7677,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7599 | 7677 | ||
7600 | /* Update our root-domain */ | 7678 | /* Update our root-domain */ |
7601 | rq = cpu_rq(cpu); | 7679 | rq = cpu_rq(cpu); |
7602 | spin_lock_irqsave(&rq->lock, flags); | 7680 | raw_spin_lock_irqsave(&rq->lock, flags); |
7603 | if (rq->rd) { | 7681 | if (rq->rd) { |
7604 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7682 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7605 | 7683 | ||
7606 | set_rq_online(rq); | 7684 | set_rq_online(rq); |
7607 | } | 7685 | } |
7608 | spin_unlock_irqrestore(&rq->lock, flags); | 7686 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7609 | break; | 7687 | break; |
7610 | 7688 | ||
7611 | #ifdef CONFIG_HOTPLUG_CPU | 7689 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -7630,14 +7708,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7630 | put_task_struct(rq->migration_thread); | 7708 | put_task_struct(rq->migration_thread); |
7631 | rq->migration_thread = NULL; | 7709 | rq->migration_thread = NULL; |
7632 | /* Idle task back to normal (off runqueue, low prio) */ | 7710 | /* Idle task back to normal (off runqueue, low prio) */ |
7633 | spin_lock_irq(&rq->lock); | 7711 | raw_spin_lock_irq(&rq->lock); |
7634 | update_rq_clock(rq); | 7712 | update_rq_clock(rq); |
7635 | deactivate_task(rq, rq->idle, 0); | 7713 | deactivate_task(rq, rq->idle, 0); |
7636 | rq->idle->static_prio = MAX_PRIO; | ||
7637 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7714 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7638 | rq->idle->sched_class = &idle_sched_class; | 7715 | rq->idle->sched_class = &idle_sched_class; |
7639 | migrate_dead_tasks(cpu); | 7716 | migrate_dead_tasks(cpu); |
7640 | spin_unlock_irq(&rq->lock); | 7717 | raw_spin_unlock_irq(&rq->lock); |
7641 | cpuset_unlock(); | 7718 | cpuset_unlock(); |
7642 | migrate_nr_uninterruptible(rq); | 7719 | migrate_nr_uninterruptible(rq); |
7643 | BUG_ON(rq->nr_running != 0); | 7720 | BUG_ON(rq->nr_running != 0); |
@@ -7647,30 +7724,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7647 | * they didn't take sched_hotcpu_mutex. Just wake up | 7724 | * they didn't take sched_hotcpu_mutex. Just wake up |
7648 | * the requestors. | 7725 | * the requestors. |
7649 | */ | 7726 | */ |
7650 | spin_lock_irq(&rq->lock); | 7727 | raw_spin_lock_irq(&rq->lock); |
7651 | while (!list_empty(&rq->migration_queue)) { | 7728 | while (!list_empty(&rq->migration_queue)) { |
7652 | struct migration_req *req; | 7729 | struct migration_req *req; |
7653 | 7730 | ||
7654 | req = list_entry(rq->migration_queue.next, | 7731 | req = list_entry(rq->migration_queue.next, |
7655 | struct migration_req, list); | 7732 | struct migration_req, list); |
7656 | list_del_init(&req->list); | 7733 | list_del_init(&req->list); |
7657 | spin_unlock_irq(&rq->lock); | 7734 | raw_spin_unlock_irq(&rq->lock); |
7658 | complete(&req->done); | 7735 | complete(&req->done); |
7659 | spin_lock_irq(&rq->lock); | 7736 | raw_spin_lock_irq(&rq->lock); |
7660 | } | 7737 | } |
7661 | spin_unlock_irq(&rq->lock); | 7738 | raw_spin_unlock_irq(&rq->lock); |
7662 | break; | 7739 | break; |
7663 | 7740 | ||
7664 | case CPU_DYING: | 7741 | case CPU_DYING: |
7665 | case CPU_DYING_FROZEN: | 7742 | case CPU_DYING_FROZEN: |
7666 | /* Update our root-domain */ | 7743 | /* Update our root-domain */ |
7667 | rq = cpu_rq(cpu); | 7744 | rq = cpu_rq(cpu); |
7668 | spin_lock_irqsave(&rq->lock, flags); | 7745 | raw_spin_lock_irqsave(&rq->lock, flags); |
7669 | if (rq->rd) { | 7746 | if (rq->rd) { |
7670 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7747 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7671 | set_rq_offline(rq); | 7748 | set_rq_offline(rq); |
7672 | } | 7749 | } |
7673 | spin_unlock_irqrestore(&rq->lock, flags); | 7750 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7674 | break; | 7751 | break; |
7675 | #endif | 7752 | #endif |
7676 | } | 7753 | } |
@@ -7707,6 +7784,16 @@ early_initcall(migration_init); | |||
7707 | 7784 | ||
7708 | #ifdef CONFIG_SCHED_DEBUG | 7785 | #ifdef CONFIG_SCHED_DEBUG |
7709 | 7786 | ||
7787 | static __read_mostly int sched_domain_debug_enabled; | ||
7788 | |||
7789 | static int __init sched_domain_debug_setup(char *str) | ||
7790 | { | ||
7791 | sched_domain_debug_enabled = 1; | ||
7792 | |||
7793 | return 0; | ||
7794 | } | ||
7795 | early_param("sched_debug", sched_domain_debug_setup); | ||
7796 | |||
7710 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 7797 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
7711 | struct cpumask *groupmask) | 7798 | struct cpumask *groupmask) |
7712 | { | 7799 | { |
@@ -7793,6 +7880,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
7793 | cpumask_var_t groupmask; | 7880 | cpumask_var_t groupmask; |
7794 | int level = 0; | 7881 | int level = 0; |
7795 | 7882 | ||
7883 | if (!sched_domain_debug_enabled) | ||
7884 | return; | ||
7885 | |||
7796 | if (!sd) { | 7886 | if (!sd) { |
7797 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); | 7887 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); |
7798 | return; | 7888 | return; |
@@ -7872,6 +7962,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
7872 | 7962 | ||
7873 | static void free_rootdomain(struct root_domain *rd) | 7963 | static void free_rootdomain(struct root_domain *rd) |
7874 | { | 7964 | { |
7965 | synchronize_sched(); | ||
7966 | |||
7875 | cpupri_cleanup(&rd->cpupri); | 7967 | cpupri_cleanup(&rd->cpupri); |
7876 | 7968 | ||
7877 | free_cpumask_var(rd->rto_mask); | 7969 | free_cpumask_var(rd->rto_mask); |
@@ -7885,7 +7977,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7885 | struct root_domain *old_rd = NULL; | 7977 | struct root_domain *old_rd = NULL; |
7886 | unsigned long flags; | 7978 | unsigned long flags; |
7887 | 7979 | ||
7888 | spin_lock_irqsave(&rq->lock, flags); | 7980 | raw_spin_lock_irqsave(&rq->lock, flags); |
7889 | 7981 | ||
7890 | if (rq->rd) { | 7982 | if (rq->rd) { |
7891 | old_rd = rq->rd; | 7983 | old_rd = rq->rd; |
@@ -7911,7 +8003,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7911 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) | 8003 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) |
7912 | set_rq_online(rq); | 8004 | set_rq_online(rq); |
7913 | 8005 | ||
7914 | spin_unlock_irqrestore(&rq->lock, flags); | 8006 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7915 | 8007 | ||
7916 | if (old_rd) | 8008 | if (old_rd) |
7917 | free_rootdomain(old_rd); | 8009 | free_rootdomain(old_rd); |
@@ -8012,6 +8104,7 @@ static cpumask_var_t cpu_isolated_map; | |||
8012 | /* Setup the mask of cpus configured for isolated domains */ | 8104 | /* Setup the mask of cpus configured for isolated domains */ |
8013 | static int __init isolated_cpu_setup(char *str) | 8105 | static int __init isolated_cpu_setup(char *str) |
8014 | { | 8106 | { |
8107 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | ||
8015 | cpulist_parse(str, cpu_isolated_map); | 8108 | cpulist_parse(str, cpu_isolated_map); |
8016 | return 1; | 8109 | return 1; |
8017 | } | 8110 | } |
@@ -8196,14 +8289,14 @@ enum s_alloc { | |||
8196 | */ | 8289 | */ |
8197 | #ifdef CONFIG_SCHED_SMT | 8290 | #ifdef CONFIG_SCHED_SMT |
8198 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | 8291 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); |
8199 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); | 8292 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); |
8200 | 8293 | ||
8201 | static int | 8294 | static int |
8202 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | 8295 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, |
8203 | struct sched_group **sg, struct cpumask *unused) | 8296 | struct sched_group **sg, struct cpumask *unused) |
8204 | { | 8297 | { |
8205 | if (sg) | 8298 | if (sg) |
8206 | *sg = &per_cpu(sched_group_cpus, cpu).sg; | 8299 | *sg = &per_cpu(sched_groups, cpu).sg; |
8207 | return cpu; | 8300 | return cpu; |
8208 | } | 8301 | } |
8209 | #endif /* CONFIG_SCHED_SMT */ | 8302 | #endif /* CONFIG_SCHED_SMT */ |
@@ -8848,7 +8941,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) | |||
8848 | return __build_sched_domains(cpu_map, NULL); | 8941 | return __build_sched_domains(cpu_map, NULL); |
8849 | } | 8942 | } |
8850 | 8943 | ||
8851 | static struct cpumask *doms_cur; /* current sched domains */ | 8944 | static cpumask_var_t *doms_cur; /* current sched domains */ |
8852 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 8945 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
8853 | static struct sched_domain_attr *dattr_cur; | 8946 | static struct sched_domain_attr *dattr_cur; |
8854 | /* attribues of custom domains in 'doms_cur' */ | 8947 | /* attribues of custom domains in 'doms_cur' */ |
@@ -8870,6 +8963,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) | |||
8870 | return 0; | 8963 | return 0; |
8871 | } | 8964 | } |
8872 | 8965 | ||
8966 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms) | ||
8967 | { | ||
8968 | int i; | ||
8969 | cpumask_var_t *doms; | ||
8970 | |||
8971 | doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); | ||
8972 | if (!doms) | ||
8973 | return NULL; | ||
8974 | for (i = 0; i < ndoms; i++) { | ||
8975 | if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { | ||
8976 | free_sched_domains(doms, i); | ||
8977 | return NULL; | ||
8978 | } | ||
8979 | } | ||
8980 | return doms; | ||
8981 | } | ||
8982 | |||
8983 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | ||
8984 | { | ||
8985 | unsigned int i; | ||
8986 | for (i = 0; i < ndoms; i++) | ||
8987 | free_cpumask_var(doms[i]); | ||
8988 | kfree(doms); | ||
8989 | } | ||
8990 | |||
8873 | /* | 8991 | /* |
8874 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 8992 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
8875 | * For now this just excludes isolated cpus, but could be used to | 8993 | * For now this just excludes isolated cpus, but could be used to |
@@ -8881,12 +8999,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
8881 | 8999 | ||
8882 | arch_update_cpu_topology(); | 9000 | arch_update_cpu_topology(); |
8883 | ndoms_cur = 1; | 9001 | ndoms_cur = 1; |
8884 | doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); | 9002 | doms_cur = alloc_sched_domains(ndoms_cur); |
8885 | if (!doms_cur) | 9003 | if (!doms_cur) |
8886 | doms_cur = fallback_doms; | 9004 | doms_cur = &fallback_doms; |
8887 | cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); | 9005 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
8888 | dattr_cur = NULL; | 9006 | dattr_cur = NULL; |
8889 | err = build_sched_domains(doms_cur); | 9007 | err = build_sched_domains(doms_cur[0]); |
8890 | register_sched_domain_sysctl(); | 9008 | register_sched_domain_sysctl(); |
8891 | 9009 | ||
8892 | return err; | 9010 | return err; |
@@ -8936,19 +9054,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8936 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 9054 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
8937 | * It destroys each deleted domain and builds each new domain. | 9055 | * It destroys each deleted domain and builds each new domain. |
8938 | * | 9056 | * |
8939 | * 'doms_new' is an array of cpumask's of length 'ndoms_new'. | 9057 | * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. |
8940 | * The masks don't intersect (don't overlap.) We should setup one | 9058 | * The masks don't intersect (don't overlap.) We should setup one |
8941 | * sched domain for each mask. CPUs not in any of the cpumasks will | 9059 | * sched domain for each mask. CPUs not in any of the cpumasks will |
8942 | * not be load balanced. If the same cpumask appears both in the | 9060 | * not be load balanced. If the same cpumask appears both in the |
8943 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 9061 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
8944 | * it as it is. | 9062 | * it as it is. |
8945 | * | 9063 | * |
8946 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 9064 | * The passed in 'doms_new' should be allocated using |
8947 | * ownership of it and will kfree it when done with it. If the caller | 9065 | * alloc_sched_domains. This routine takes ownership of it and will |
8948 | * failed the kmalloc call, then it can pass in doms_new == NULL && | 9066 | * free_sched_domains it when done with it. If the caller failed the |
8949 | * ndoms_new == 1, and partition_sched_domains() will fallback to | 9067 | * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, |
8950 | * the single partition 'fallback_doms', it also forces the domains | 9068 | * and partition_sched_domains() will fallback to the single partition |
8951 | * to be rebuilt. | 9069 | * 'fallback_doms', it also forces the domains to be rebuilt. |
8952 | * | 9070 | * |
8953 | * If doms_new == NULL it will be replaced with cpu_online_mask. | 9071 | * If doms_new == NULL it will be replaced with cpu_online_mask. |
8954 | * ndoms_new == 0 is a special case for destroying existing domains, | 9072 | * ndoms_new == 0 is a special case for destroying existing domains, |
@@ -8956,8 +9074,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8956 | * | 9074 | * |
8957 | * Call with hotplug lock held | 9075 | * Call with hotplug lock held |
8958 | */ | 9076 | */ |
8959 | /* FIXME: Change to struct cpumask *doms_new[] */ | 9077 | void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
8960 | void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | ||
8961 | struct sched_domain_attr *dattr_new) | 9078 | struct sched_domain_attr *dattr_new) |
8962 | { | 9079 | { |
8963 | int i, j, n; | 9080 | int i, j, n; |
@@ -8976,40 +9093,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
8976 | /* Destroy deleted domains */ | 9093 | /* Destroy deleted domains */ |
8977 | for (i = 0; i < ndoms_cur; i++) { | 9094 | for (i = 0; i < ndoms_cur; i++) { |
8978 | for (j = 0; j < n && !new_topology; j++) { | 9095 | for (j = 0; j < n && !new_topology; j++) { |
8979 | if (cpumask_equal(&doms_cur[i], &doms_new[j]) | 9096 | if (cpumask_equal(doms_cur[i], doms_new[j]) |
8980 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 9097 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
8981 | goto match1; | 9098 | goto match1; |
8982 | } | 9099 | } |
8983 | /* no match - a current sched domain not in new doms_new[] */ | 9100 | /* no match - a current sched domain not in new doms_new[] */ |
8984 | detach_destroy_domains(doms_cur + i); | 9101 | detach_destroy_domains(doms_cur[i]); |
8985 | match1: | 9102 | match1: |
8986 | ; | 9103 | ; |
8987 | } | 9104 | } |
8988 | 9105 | ||
8989 | if (doms_new == NULL) { | 9106 | if (doms_new == NULL) { |
8990 | ndoms_cur = 0; | 9107 | ndoms_cur = 0; |
8991 | doms_new = fallback_doms; | 9108 | doms_new = &fallback_doms; |
8992 | cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); | 9109 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
8993 | WARN_ON_ONCE(dattr_new); | 9110 | WARN_ON_ONCE(dattr_new); |
8994 | } | 9111 | } |
8995 | 9112 | ||
8996 | /* Build new domains */ | 9113 | /* Build new domains */ |
8997 | for (i = 0; i < ndoms_new; i++) { | 9114 | for (i = 0; i < ndoms_new; i++) { |
8998 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 9115 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
8999 | if (cpumask_equal(&doms_new[i], &doms_cur[j]) | 9116 | if (cpumask_equal(doms_new[i], doms_cur[j]) |
9000 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 9117 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
9001 | goto match2; | 9118 | goto match2; |
9002 | } | 9119 | } |
9003 | /* no match - add a new doms_new */ | 9120 | /* no match - add a new doms_new */ |
9004 | __build_sched_domains(doms_new + i, | 9121 | __build_sched_domains(doms_new[i], |
9005 | dattr_new ? dattr_new + i : NULL); | 9122 | dattr_new ? dattr_new + i : NULL); |
9006 | match2: | 9123 | match2: |
9007 | ; | 9124 | ; |
9008 | } | 9125 | } |
9009 | 9126 | ||
9010 | /* Remember the new sched domains */ | 9127 | /* Remember the new sched domains */ |
9011 | if (doms_cur != fallback_doms) | 9128 | if (doms_cur != &fallback_doms) |
9012 | kfree(doms_cur); | 9129 | free_sched_domains(doms_cur, ndoms_cur); |
9013 | kfree(dattr_cur); /* kfree(NULL) is safe */ | 9130 | kfree(dattr_cur); /* kfree(NULL) is safe */ |
9014 | doms_cur = doms_new; | 9131 | doms_cur = doms_new; |
9015 | dattr_cur = dattr_new; | 9132 | dattr_cur = dattr_new; |
@@ -9120,8 +9237,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9120 | switch (action) { | 9237 | switch (action) { |
9121 | case CPU_ONLINE: | 9238 | case CPU_ONLINE: |
9122 | case CPU_ONLINE_FROZEN: | 9239 | case CPU_ONLINE_FROZEN: |
9123 | case CPU_DEAD: | 9240 | case CPU_DOWN_PREPARE: |
9124 | case CPU_DEAD_FROZEN: | 9241 | case CPU_DOWN_PREPARE_FROZEN: |
9242 | case CPU_DOWN_FAILED: | ||
9243 | case CPU_DOWN_FAILED_FROZEN: | ||
9125 | partition_sched_domains(1, NULL, NULL); | 9244 | partition_sched_domains(1, NULL, NULL); |
9126 | return NOTIFY_OK; | 9245 | return NOTIFY_OK; |
9127 | 9246 | ||
@@ -9168,7 +9287,7 @@ void __init sched_init_smp(void) | |||
9168 | #endif | 9287 | #endif |
9169 | get_online_cpus(); | 9288 | get_online_cpus(); |
9170 | mutex_lock(&sched_domains_mutex); | 9289 | mutex_lock(&sched_domains_mutex); |
9171 | arch_init_sched_domains(cpu_online_mask); | 9290 | arch_init_sched_domains(cpu_active_mask); |
9172 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9291 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9173 | if (cpumask_empty(non_isolated_cpus)) | 9292 | if (cpumask_empty(non_isolated_cpus)) |
9174 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9293 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9241,13 +9360,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
9241 | #ifdef CONFIG_SMP | 9360 | #ifdef CONFIG_SMP |
9242 | rt_rq->rt_nr_migratory = 0; | 9361 | rt_rq->rt_nr_migratory = 0; |
9243 | rt_rq->overloaded = 0; | 9362 | rt_rq->overloaded = 0; |
9244 | plist_head_init(&rt_rq->pushable_tasks, &rq->lock); | 9363 | plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock); |
9245 | #endif | 9364 | #endif |
9246 | 9365 | ||
9247 | rt_rq->rt_time = 0; | 9366 | rt_rq->rt_time = 0; |
9248 | rt_rq->rt_throttled = 0; | 9367 | rt_rq->rt_throttled = 0; |
9249 | rt_rq->rt_runtime = 0; | 9368 | rt_rq->rt_runtime = 0; |
9250 | spin_lock_init(&rt_rq->rt_runtime_lock); | 9369 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
9251 | 9370 | ||
9252 | #ifdef CONFIG_RT_GROUP_SCHED | 9371 | #ifdef CONFIG_RT_GROUP_SCHED |
9253 | rt_rq->rt_nr_boosted = 0; | 9372 | rt_rq->rt_nr_boosted = 0; |
@@ -9331,10 +9450,6 @@ void __init sched_init(void) | |||
9331 | #ifdef CONFIG_CPUMASK_OFFSTACK | 9450 | #ifdef CONFIG_CPUMASK_OFFSTACK |
9332 | alloc_size += num_possible_cpus() * cpumask_size(); | 9451 | alloc_size += num_possible_cpus() * cpumask_size(); |
9333 | #endif | 9452 | #endif |
9334 | /* | ||
9335 | * As sched_init() is called before page_alloc is setup, | ||
9336 | * we use alloc_bootmem(). | ||
9337 | */ | ||
9338 | if (alloc_size) { | 9453 | if (alloc_size) { |
9339 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 9454 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9340 | 9455 | ||
@@ -9411,7 +9526,7 @@ void __init sched_init(void) | |||
9411 | struct rq *rq; | 9526 | struct rq *rq; |
9412 | 9527 | ||
9413 | rq = cpu_rq(i); | 9528 | rq = cpu_rq(i); |
9414 | spin_lock_init(&rq->lock); | 9529 | raw_spin_lock_init(&rq->lock); |
9415 | rq->nr_running = 0; | 9530 | rq->nr_running = 0; |
9416 | rq->calc_load_active = 0; | 9531 | rq->calc_load_active = 0; |
9417 | rq->calc_load_update = jiffies + LOAD_FREQ; | 9532 | rq->calc_load_update = jiffies + LOAD_FREQ; |
@@ -9471,7 +9586,7 @@ void __init sched_init(void) | |||
9471 | #elif defined CONFIG_USER_SCHED | 9586 | #elif defined CONFIG_USER_SCHED |
9472 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); | 9587 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); |
9473 | init_tg_rt_entry(&init_task_group, | 9588 | init_tg_rt_entry(&init_task_group, |
9474 | &per_cpu(init_rt_rq, i), | 9589 | &per_cpu(init_rt_rq_var, i), |
9475 | &per_cpu(init_sched_rt_entity, i), i, 1, | 9590 | &per_cpu(init_sched_rt_entity, i), i, 1, |
9476 | root_task_group.rt_se[i]); | 9591 | root_task_group.rt_se[i]); |
9477 | #endif | 9592 | #endif |
@@ -9489,6 +9604,8 @@ void __init sched_init(void) | |||
9489 | rq->cpu = i; | 9604 | rq->cpu = i; |
9490 | rq->online = 0; | 9605 | rq->online = 0; |
9491 | rq->migration_thread = NULL; | 9606 | rq->migration_thread = NULL; |
9607 | rq->idle_stamp = 0; | ||
9608 | rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
9492 | INIT_LIST_HEAD(&rq->migration_queue); | 9609 | INIT_LIST_HEAD(&rq->migration_queue); |
9493 | rq_attach_root(rq, &def_root_domain); | 9610 | rq_attach_root(rq, &def_root_domain); |
9494 | #endif | 9611 | #endif |
@@ -9507,7 +9624,7 @@ void __init sched_init(void) | |||
9507 | #endif | 9624 | #endif |
9508 | 9625 | ||
9509 | #ifdef CONFIG_RT_MUTEXES | 9626 | #ifdef CONFIG_RT_MUTEXES |
9510 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | 9627 | plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock); |
9511 | #endif | 9628 | #endif |
9512 | 9629 | ||
9513 | /* | 9630 | /* |
@@ -9532,13 +9649,15 @@ void __init sched_init(void) | |||
9532 | current->sched_class = &fair_sched_class; | 9649 | current->sched_class = &fair_sched_class; |
9533 | 9650 | ||
9534 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9651 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9535 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); | 9652 | zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9536 | #ifdef CONFIG_SMP | 9653 | #ifdef CONFIG_SMP |
9537 | #ifdef CONFIG_NO_HZ | 9654 | #ifdef CONFIG_NO_HZ |
9538 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9655 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9539 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9656 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9540 | #endif | 9657 | #endif |
9541 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9658 | /* May be allocated at isolcpus cmdline parse time */ |
9659 | if (cpu_isolated_map == NULL) | ||
9660 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | ||
9542 | #endif /* SMP */ | 9661 | #endif /* SMP */ |
9543 | 9662 | ||
9544 | perf_event_init(); | 9663 | perf_event_init(); |
@@ -9630,13 +9749,13 @@ void normalize_rt_tasks(void) | |||
9630 | continue; | 9749 | continue; |
9631 | } | 9750 | } |
9632 | 9751 | ||
9633 | spin_lock(&p->pi_lock); | 9752 | raw_spin_lock(&p->pi_lock); |
9634 | rq = __task_rq_lock(p); | 9753 | rq = __task_rq_lock(p); |
9635 | 9754 | ||
9636 | normalize_task(rq, p); | 9755 | normalize_task(rq, p); |
9637 | 9756 | ||
9638 | __task_rq_unlock(rq); | 9757 | __task_rq_unlock(rq); |
9639 | spin_unlock(&p->pi_lock); | 9758 | raw_spin_unlock(&p->pi_lock); |
9640 | } while_each_thread(g, p); | 9759 | } while_each_thread(g, p); |
9641 | 9760 | ||
9642 | read_unlock_irqrestore(&tasklist_lock, flags); | 9761 | read_unlock_irqrestore(&tasklist_lock, flags); |
@@ -9732,13 +9851,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9732 | se = kzalloc_node(sizeof(struct sched_entity), | 9851 | se = kzalloc_node(sizeof(struct sched_entity), |
9733 | GFP_KERNEL, cpu_to_node(i)); | 9852 | GFP_KERNEL, cpu_to_node(i)); |
9734 | if (!se) | 9853 | if (!se) |
9735 | goto err; | 9854 | goto err_free_rq; |
9736 | 9855 | ||
9737 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9856 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9738 | } | 9857 | } |
9739 | 9858 | ||
9740 | return 1; | 9859 | return 1; |
9741 | 9860 | ||
9861 | err_free_rq: | ||
9862 | kfree(cfs_rq); | ||
9742 | err: | 9863 | err: |
9743 | return 0; | 9864 | return 0; |
9744 | } | 9865 | } |
@@ -9820,13 +9941,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9820 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9941 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9821 | GFP_KERNEL, cpu_to_node(i)); | 9942 | GFP_KERNEL, cpu_to_node(i)); |
9822 | if (!rt_se) | 9943 | if (!rt_se) |
9823 | goto err; | 9944 | goto err_free_rq; |
9824 | 9945 | ||
9825 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9946 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9826 | } | 9947 | } |
9827 | 9948 | ||
9828 | return 1; | 9949 | return 1; |
9829 | 9950 | ||
9951 | err_free_rq: | ||
9952 | kfree(rt_rq); | ||
9830 | err: | 9953 | err: |
9831 | return 0; | 9954 | return 0; |
9832 | } | 9955 | } |
@@ -9995,9 +10118,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
9995 | struct rq *rq = cfs_rq->rq; | 10118 | struct rq *rq = cfs_rq->rq; |
9996 | unsigned long flags; | 10119 | unsigned long flags; |
9997 | 10120 | ||
9998 | spin_lock_irqsave(&rq->lock, flags); | 10121 | raw_spin_lock_irqsave(&rq->lock, flags); |
9999 | __set_se_shares(se, shares); | 10122 | __set_se_shares(se, shares); |
10000 | spin_unlock_irqrestore(&rq->lock, flags); | 10123 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10001 | } | 10124 | } |
10002 | 10125 | ||
10003 | static DEFINE_MUTEX(shares_mutex); | 10126 | static DEFINE_MUTEX(shares_mutex); |
@@ -10182,18 +10305,18 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
10182 | if (err) | 10305 | if (err) |
10183 | goto unlock; | 10306 | goto unlock; |
10184 | 10307 | ||
10185 | spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10308 | raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10186 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); | 10309 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); |
10187 | tg->rt_bandwidth.rt_runtime = rt_runtime; | 10310 | tg->rt_bandwidth.rt_runtime = rt_runtime; |
10188 | 10311 | ||
10189 | for_each_possible_cpu(i) { | 10312 | for_each_possible_cpu(i) { |
10190 | struct rt_rq *rt_rq = tg->rt_rq[i]; | 10313 | struct rt_rq *rt_rq = tg->rt_rq[i]; |
10191 | 10314 | ||
10192 | spin_lock(&rt_rq->rt_runtime_lock); | 10315 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10193 | rt_rq->rt_runtime = rt_runtime; | 10316 | rt_rq->rt_runtime = rt_runtime; |
10194 | spin_unlock(&rt_rq->rt_runtime_lock); | 10317 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10195 | } | 10318 | } |
10196 | spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10319 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10197 | unlock: | 10320 | unlock: |
10198 | read_unlock(&tasklist_lock); | 10321 | read_unlock(&tasklist_lock); |
10199 | mutex_unlock(&rt_constraints_mutex); | 10322 | mutex_unlock(&rt_constraints_mutex); |
@@ -10298,15 +10421,15 @@ static int sched_rt_global_constraints(void) | |||
10298 | if (sysctl_sched_rt_runtime == 0) | 10421 | if (sysctl_sched_rt_runtime == 0) |
10299 | return -EBUSY; | 10422 | return -EBUSY; |
10300 | 10423 | ||
10301 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10424 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
10302 | for_each_possible_cpu(i) { | 10425 | for_each_possible_cpu(i) { |
10303 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10426 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |
10304 | 10427 | ||
10305 | spin_lock(&rt_rq->rt_runtime_lock); | 10428 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10306 | rt_rq->rt_runtime = global_rt_runtime(); | 10429 | rt_rq->rt_runtime = global_rt_runtime(); |
10307 | spin_unlock(&rt_rq->rt_runtime_lock); | 10430 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10308 | } | 10431 | } |
10309 | spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); | 10432 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); |
10310 | 10433 | ||
10311 | return 0; | 10434 | return 0; |
10312 | } | 10435 | } |
@@ -10597,9 +10720,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |||
10597 | /* | 10720 | /* |
10598 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | 10721 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
10599 | */ | 10722 | */ |
10600 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10723 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10601 | data = *cpuusage; | 10724 | data = *cpuusage; |
10602 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10725 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10603 | #else | 10726 | #else |
10604 | data = *cpuusage; | 10727 | data = *cpuusage; |
10605 | #endif | 10728 | #endif |
@@ -10615,9 +10738,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |||
10615 | /* | 10738 | /* |
10616 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | 10739 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
10617 | */ | 10740 | */ |
10618 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10741 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10619 | *cpuusage = val; | 10742 | *cpuusage = val; |
10620 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10743 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10621 | #else | 10744 | #else |
10622 | *cpuusage = val; | 10745 | *cpuusage = val; |
10623 | #endif | 10746 | #endif |
@@ -10851,9 +10974,9 @@ void synchronize_sched_expedited(void) | |||
10851 | init_completion(&req->done); | 10974 | init_completion(&req->done); |
10852 | req->task = NULL; | 10975 | req->task = NULL; |
10853 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | 10976 | req->dest_cpu = RCU_MIGRATION_NEED_QS; |
10854 | spin_lock_irqsave(&rq->lock, flags); | 10977 | raw_spin_lock_irqsave(&rq->lock, flags); |
10855 | list_add(&req->list, &rq->migration_queue); | 10978 | list_add(&req->list, &rq->migration_queue); |
10856 | spin_unlock_irqrestore(&rq->lock, flags); | 10979 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10857 | wake_up_process(rq->migration_thread); | 10980 | wake_up_process(rq->migration_thread); |
10858 | } | 10981 | } |
10859 | for_each_online_cpu(cpu) { | 10982 | for_each_online_cpu(cpu) { |
@@ -10861,13 +10984,14 @@ void synchronize_sched_expedited(void) | |||
10861 | req = &per_cpu(rcu_migration_req, cpu); | 10984 | req = &per_cpu(rcu_migration_req, cpu); |
10862 | rq = cpu_rq(cpu); | 10985 | rq = cpu_rq(cpu); |
10863 | wait_for_completion(&req->done); | 10986 | wait_for_completion(&req->done); |
10864 | spin_lock_irqsave(&rq->lock, flags); | 10987 | raw_spin_lock_irqsave(&rq->lock, flags); |
10865 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | 10988 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) |
10866 | need_full_sync = 1; | 10989 | need_full_sync = 1; |
10867 | req->dest_cpu = RCU_MIGRATION_IDLE; | 10990 | req->dest_cpu = RCU_MIGRATION_IDLE; |
10868 | spin_unlock_irqrestore(&rq->lock, flags); | 10991 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10869 | } | 10992 | } |
10870 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | 10993 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; |
10994 | synchronize_sched_expedited_count++; | ||
10871 | mutex_unlock(&rcu_sched_expedited_mutex); | 10995 | mutex_unlock(&rcu_sched_expedited_mutex); |
10872 | put_online_cpus(); | 10996 | put_online_cpus(); |
10873 | if (need_full_sync) | 10997 | if (need_full_sync) |