diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 986 |
1 files changed, 549 insertions, 437 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 3c11ae0a948d..c535cc4f6428 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -141,7 +141,7 @@ struct rt_prio_array { | |||
141 | 141 | ||
142 | struct rt_bandwidth { | 142 | struct rt_bandwidth { |
143 | /* nests inside the rq lock: */ | 143 | /* nests inside the rq lock: */ |
144 | spinlock_t rt_runtime_lock; | 144 | raw_spinlock_t rt_runtime_lock; |
145 | ktime_t rt_period; | 145 | ktime_t rt_period; |
146 | u64 rt_runtime; | 146 | u64 rt_runtime; |
147 | struct hrtimer rt_period_timer; | 147 | struct hrtimer rt_period_timer; |
@@ -178,7 +178,7 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) | |||
178 | rt_b->rt_period = ns_to_ktime(period); | 178 | rt_b->rt_period = ns_to_ktime(period); |
179 | rt_b->rt_runtime = runtime; | 179 | rt_b->rt_runtime = runtime; |
180 | 180 | ||
181 | spin_lock_init(&rt_b->rt_runtime_lock); | 181 | raw_spin_lock_init(&rt_b->rt_runtime_lock); |
182 | 182 | ||
183 | hrtimer_init(&rt_b->rt_period_timer, | 183 | hrtimer_init(&rt_b->rt_period_timer, |
184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); | 184 | CLOCK_MONOTONIC, HRTIMER_MODE_REL); |
@@ -200,7 +200,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
200 | if (hrtimer_active(&rt_b->rt_period_timer)) | 200 | if (hrtimer_active(&rt_b->rt_period_timer)) |
201 | return; | 201 | return; |
202 | 202 | ||
203 | spin_lock(&rt_b->rt_runtime_lock); | 203 | raw_spin_lock(&rt_b->rt_runtime_lock); |
204 | for (;;) { | 204 | for (;;) { |
205 | unsigned long delta; | 205 | unsigned long delta; |
206 | ktime_t soft, hard; | 206 | ktime_t soft, hard; |
@@ -217,7 +217,7 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) | |||
217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, | 217 | __hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta, |
218 | HRTIMER_MODE_ABS_PINNED, 0); | 218 | HRTIMER_MODE_ABS_PINNED, 0); |
219 | } | 219 | } |
220 | spin_unlock(&rt_b->rt_runtime_lock); | 220 | raw_spin_unlock(&rt_b->rt_runtime_lock); |
221 | } | 221 | } |
222 | 222 | ||
223 | #ifdef CONFIG_RT_GROUP_SCHED | 223 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -298,7 +298,7 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct cfs_rq, init_tg_cfs_rq); | |||
298 | 298 | ||
299 | #ifdef CONFIG_RT_GROUP_SCHED | 299 | #ifdef CONFIG_RT_GROUP_SCHED |
300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); | 300 | static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); |
301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq); | 301 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rt_rq, init_rt_rq_var); |
302 | #endif /* CONFIG_RT_GROUP_SCHED */ | 302 | #endif /* CONFIG_RT_GROUP_SCHED */ |
303 | #else /* !CONFIG_USER_SCHED */ | 303 | #else /* !CONFIG_USER_SCHED */ |
304 | #define root_task_group init_task_group | 304 | #define root_task_group init_task_group |
@@ -470,7 +470,7 @@ struct rt_rq { | |||
470 | u64 rt_time; | 470 | u64 rt_time; |
471 | u64 rt_runtime; | 471 | u64 rt_runtime; |
472 | /* Nests inside the rq lock: */ | 472 | /* Nests inside the rq lock: */ |
473 | spinlock_t rt_runtime_lock; | 473 | raw_spinlock_t rt_runtime_lock; |
474 | 474 | ||
475 | #ifdef CONFIG_RT_GROUP_SCHED | 475 | #ifdef CONFIG_RT_GROUP_SCHED |
476 | unsigned long rt_nr_boosted; | 476 | unsigned long rt_nr_boosted; |
@@ -525,7 +525,7 @@ static struct root_domain def_root_domain; | |||
525 | */ | 525 | */ |
526 | struct rq { | 526 | struct rq { |
527 | /* runqueue lock: */ | 527 | /* runqueue lock: */ |
528 | spinlock_t lock; | 528 | raw_spinlock_t lock; |
529 | 529 | ||
530 | /* | 530 | /* |
531 | * nr_running and cpu_load should be in the same cacheline because | 531 | * nr_running and cpu_load should be in the same cacheline because |
@@ -535,14 +535,12 @@ struct rq { | |||
535 | #define CPU_LOAD_IDX_MAX 5 | 535 | #define CPU_LOAD_IDX_MAX 5 |
536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; | 536 | unsigned long cpu_load[CPU_LOAD_IDX_MAX]; |
537 | #ifdef CONFIG_NO_HZ | 537 | #ifdef CONFIG_NO_HZ |
538 | unsigned long last_tick_seen; | ||
539 | unsigned char in_nohz_recently; | 538 | unsigned char in_nohz_recently; |
540 | #endif | 539 | #endif |
541 | /* capture load from *all* tasks on this cpu: */ | 540 | /* capture load from *all* tasks on this cpu: */ |
542 | struct load_weight load; | 541 | struct load_weight load; |
543 | unsigned long nr_load_updates; | 542 | unsigned long nr_load_updates; |
544 | u64 nr_switches; | 543 | u64 nr_switches; |
545 | u64 nr_migrations_in; | ||
546 | 544 | ||
547 | struct cfs_rq cfs; | 545 | struct cfs_rq cfs; |
548 | struct rt_rq rt; | 546 | struct rt_rq rt; |
@@ -591,6 +589,8 @@ struct rq { | |||
591 | 589 | ||
592 | u64 rt_avg; | 590 | u64 rt_avg; |
593 | u64 age_stamp; | 591 | u64 age_stamp; |
592 | u64 idle_stamp; | ||
593 | u64 avg_idle; | ||
594 | #endif | 594 | #endif |
595 | 595 | ||
596 | /* calc_load related fields */ | 596 | /* calc_load related fields */ |
@@ -685,7 +685,7 @@ inline void update_rq_clock(struct rq *rq) | |||
685 | */ | 685 | */ |
686 | int runqueue_is_locked(int cpu) | 686 | int runqueue_is_locked(int cpu) |
687 | { | 687 | { |
688 | return spin_is_locked(&cpu_rq(cpu)->lock); | 688 | return raw_spin_is_locked(&cpu_rq(cpu)->lock); |
689 | } | 689 | } |
690 | 690 | ||
691 | /* | 691 | /* |
@@ -772,7 +772,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
772 | if (!sched_feat_names[i]) | 772 | if (!sched_feat_names[i]) |
773 | return -EINVAL; | 773 | return -EINVAL; |
774 | 774 | ||
775 | filp->f_pos += cnt; | 775 | *ppos += cnt; |
776 | 776 | ||
777 | return cnt; | 777 | return cnt; |
778 | } | 778 | } |
@@ -814,6 +814,7 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32; | |||
814 | * default: 0.25ms | 814 | * default: 0.25ms |
815 | */ | 815 | */ |
816 | unsigned int sysctl_sched_shares_ratelimit = 250000; | 816 | unsigned int sysctl_sched_shares_ratelimit = 250000; |
817 | unsigned int normalized_sysctl_sched_shares_ratelimit = 250000; | ||
817 | 818 | ||
818 | /* | 819 | /* |
819 | * Inject some fuzzyness into changing the per-cpu group shares | 820 | * Inject some fuzzyness into changing the per-cpu group shares |
@@ -892,7 +893,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
892 | */ | 893 | */ |
893 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); | 894 | spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_); |
894 | 895 | ||
895 | spin_unlock_irq(&rq->lock); | 896 | raw_spin_unlock_irq(&rq->lock); |
896 | } | 897 | } |
897 | 898 | ||
898 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ | 899 | #else /* __ARCH_WANT_UNLOCKED_CTXSW */ |
@@ -916,9 +917,9 @@ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next) | |||
916 | next->oncpu = 1; | 917 | next->oncpu = 1; |
917 | #endif | 918 | #endif |
918 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW | 919 | #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW |
919 | spin_unlock_irq(&rq->lock); | 920 | raw_spin_unlock_irq(&rq->lock); |
920 | #else | 921 | #else |
921 | spin_unlock(&rq->lock); | 922 | raw_spin_unlock(&rq->lock); |
922 | #endif | 923 | #endif |
923 | } | 924 | } |
924 | 925 | ||
@@ -948,10 +949,10 @@ static inline struct rq *__task_rq_lock(struct task_struct *p) | |||
948 | { | 949 | { |
949 | for (;;) { | 950 | for (;;) { |
950 | struct rq *rq = task_rq(p); | 951 | struct rq *rq = task_rq(p); |
951 | spin_lock(&rq->lock); | 952 | raw_spin_lock(&rq->lock); |
952 | if (likely(rq == task_rq(p))) | 953 | if (likely(rq == task_rq(p))) |
953 | return rq; | 954 | return rq; |
954 | spin_unlock(&rq->lock); | 955 | raw_spin_unlock(&rq->lock); |
955 | } | 956 | } |
956 | } | 957 | } |
957 | 958 | ||
@@ -968,10 +969,10 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
968 | for (;;) { | 969 | for (;;) { |
969 | local_irq_save(*flags); | 970 | local_irq_save(*flags); |
970 | rq = task_rq(p); | 971 | rq = task_rq(p); |
971 | spin_lock(&rq->lock); | 972 | raw_spin_lock(&rq->lock); |
972 | if (likely(rq == task_rq(p))) | 973 | if (likely(rq == task_rq(p))) |
973 | return rq; | 974 | return rq; |
974 | spin_unlock_irqrestore(&rq->lock, *flags); | 975 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
975 | } | 976 | } |
976 | } | 977 | } |
977 | 978 | ||
@@ -980,19 +981,19 @@ void task_rq_unlock_wait(struct task_struct *p) | |||
980 | struct rq *rq = task_rq(p); | 981 | struct rq *rq = task_rq(p); |
981 | 982 | ||
982 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ | 983 | smp_mb(); /* spin-unlock-wait is not a full memory barrier */ |
983 | spin_unlock_wait(&rq->lock); | 984 | raw_spin_unlock_wait(&rq->lock); |
984 | } | 985 | } |
985 | 986 | ||
986 | static void __task_rq_unlock(struct rq *rq) | 987 | static void __task_rq_unlock(struct rq *rq) |
987 | __releases(rq->lock) | 988 | __releases(rq->lock) |
988 | { | 989 | { |
989 | spin_unlock(&rq->lock); | 990 | raw_spin_unlock(&rq->lock); |
990 | } | 991 | } |
991 | 992 | ||
992 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) | 993 | static inline void task_rq_unlock(struct rq *rq, unsigned long *flags) |
993 | __releases(rq->lock) | 994 | __releases(rq->lock) |
994 | { | 995 | { |
995 | spin_unlock_irqrestore(&rq->lock, *flags); | 996 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
996 | } | 997 | } |
997 | 998 | ||
998 | /* | 999 | /* |
@@ -1005,7 +1006,7 @@ static struct rq *this_rq_lock(void) | |||
1005 | 1006 | ||
1006 | local_irq_disable(); | 1007 | local_irq_disable(); |
1007 | rq = this_rq(); | 1008 | rq = this_rq(); |
1008 | spin_lock(&rq->lock); | 1009 | raw_spin_lock(&rq->lock); |
1009 | 1010 | ||
1010 | return rq; | 1011 | return rq; |
1011 | } | 1012 | } |
@@ -1052,10 +1053,10 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) | |||
1052 | 1053 | ||
1053 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); | 1054 | WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); |
1054 | 1055 | ||
1055 | spin_lock(&rq->lock); | 1056 | raw_spin_lock(&rq->lock); |
1056 | update_rq_clock(rq); | 1057 | update_rq_clock(rq); |
1057 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); | 1058 | rq->curr->sched_class->task_tick(rq, rq->curr, 1); |
1058 | spin_unlock(&rq->lock); | 1059 | raw_spin_unlock(&rq->lock); |
1059 | 1060 | ||
1060 | return HRTIMER_NORESTART; | 1061 | return HRTIMER_NORESTART; |
1061 | } | 1062 | } |
@@ -1068,10 +1069,10 @@ static void __hrtick_start(void *arg) | |||
1068 | { | 1069 | { |
1069 | struct rq *rq = arg; | 1070 | struct rq *rq = arg; |
1070 | 1071 | ||
1071 | spin_lock(&rq->lock); | 1072 | raw_spin_lock(&rq->lock); |
1072 | hrtimer_restart(&rq->hrtick_timer); | 1073 | hrtimer_restart(&rq->hrtick_timer); |
1073 | rq->hrtick_csd_pending = 0; | 1074 | rq->hrtick_csd_pending = 0; |
1074 | spin_unlock(&rq->lock); | 1075 | raw_spin_unlock(&rq->lock); |
1075 | } | 1076 | } |
1076 | 1077 | ||
1077 | /* | 1078 | /* |
@@ -1178,7 +1179,7 @@ static void resched_task(struct task_struct *p) | |||
1178 | { | 1179 | { |
1179 | int cpu; | 1180 | int cpu; |
1180 | 1181 | ||
1181 | assert_spin_locked(&task_rq(p)->lock); | 1182 | assert_raw_spin_locked(&task_rq(p)->lock); |
1182 | 1183 | ||
1183 | if (test_tsk_need_resched(p)) | 1184 | if (test_tsk_need_resched(p)) |
1184 | return; | 1185 | return; |
@@ -1200,10 +1201,10 @@ static void resched_cpu(int cpu) | |||
1200 | struct rq *rq = cpu_rq(cpu); | 1201 | struct rq *rq = cpu_rq(cpu); |
1201 | unsigned long flags; | 1202 | unsigned long flags; |
1202 | 1203 | ||
1203 | if (!spin_trylock_irqsave(&rq->lock, flags)) | 1204 | if (!raw_spin_trylock_irqsave(&rq->lock, flags)) |
1204 | return; | 1205 | return; |
1205 | resched_task(cpu_curr(cpu)); | 1206 | resched_task(cpu_curr(cpu)); |
1206 | spin_unlock_irqrestore(&rq->lock, flags); | 1207 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1207 | } | 1208 | } |
1208 | 1209 | ||
1209 | #ifdef CONFIG_NO_HZ | 1210 | #ifdef CONFIG_NO_HZ |
@@ -1272,7 +1273,7 @@ static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) | |||
1272 | #else /* !CONFIG_SMP */ | 1273 | #else /* !CONFIG_SMP */ |
1273 | static void resched_task(struct task_struct *p) | 1274 | static void resched_task(struct task_struct *p) |
1274 | { | 1275 | { |
1275 | assert_spin_locked(&task_rq(p)->lock); | 1276 | assert_raw_spin_locked(&task_rq(p)->lock); |
1276 | set_tsk_need_resched(p); | 1277 | set_tsk_need_resched(p); |
1277 | } | 1278 | } |
1278 | 1279 | ||
@@ -1599,11 +1600,11 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1599 | struct rq *rq = cpu_rq(cpu); | 1600 | struct rq *rq = cpu_rq(cpu); |
1600 | unsigned long flags; | 1601 | unsigned long flags; |
1601 | 1602 | ||
1602 | spin_lock_irqsave(&rq->lock, flags); | 1603 | raw_spin_lock_irqsave(&rq->lock, flags); |
1603 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; | 1604 | tg->cfs_rq[cpu]->rq_weight = boost ? 0 : rq_weight; |
1604 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; | 1605 | tg->cfs_rq[cpu]->shares = boost ? 0 : shares; |
1605 | __set_se_shares(tg->se[cpu], shares); | 1606 | __set_se_shares(tg->se[cpu], shares); |
1606 | spin_unlock_irqrestore(&rq->lock, flags); | 1607 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
1607 | } | 1608 | } |
1608 | } | 1609 | } |
1609 | 1610 | ||
@@ -1614,7 +1615,7 @@ static void update_group_shares_cpu(struct task_group *tg, int cpu, | |||
1614 | */ | 1615 | */ |
1615 | static int tg_shares_up(struct task_group *tg, void *data) | 1616 | static int tg_shares_up(struct task_group *tg, void *data) |
1616 | { | 1617 | { |
1617 | unsigned long weight, rq_weight = 0, shares = 0; | 1618 | unsigned long weight, rq_weight = 0, sum_weight = 0, shares = 0; |
1618 | unsigned long *usd_rq_weight; | 1619 | unsigned long *usd_rq_weight; |
1619 | struct sched_domain *sd = data; | 1620 | struct sched_domain *sd = data; |
1620 | unsigned long flags; | 1621 | unsigned long flags; |
@@ -1630,6 +1631,7 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1630 | weight = tg->cfs_rq[i]->load.weight; | 1631 | weight = tg->cfs_rq[i]->load.weight; |
1631 | usd_rq_weight[i] = weight; | 1632 | usd_rq_weight[i] = weight; |
1632 | 1633 | ||
1634 | rq_weight += weight; | ||
1633 | /* | 1635 | /* |
1634 | * If there are currently no tasks on the cpu pretend there | 1636 | * If there are currently no tasks on the cpu pretend there |
1635 | * is one of average load so that when a new task gets to | 1637 | * is one of average load so that when a new task gets to |
@@ -1638,10 +1640,13 @@ static int tg_shares_up(struct task_group *tg, void *data) | |||
1638 | if (!weight) | 1640 | if (!weight) |
1639 | weight = NICE_0_LOAD; | 1641 | weight = NICE_0_LOAD; |
1640 | 1642 | ||
1641 | rq_weight += weight; | 1643 | sum_weight += weight; |
1642 | shares += tg->cfs_rq[i]->shares; | 1644 | shares += tg->cfs_rq[i]->shares; |
1643 | } | 1645 | } |
1644 | 1646 | ||
1647 | if (!rq_weight) | ||
1648 | rq_weight = sum_weight; | ||
1649 | |||
1645 | if ((!shares && rq_weight) || shares > tg->shares) | 1650 | if ((!shares && rq_weight) || shares > tg->shares) |
1646 | shares = tg->shares; | 1651 | shares = tg->shares; |
1647 | 1652 | ||
@@ -1701,9 +1706,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1701 | if (root_task_group_empty()) | 1706 | if (root_task_group_empty()) |
1702 | return; | 1707 | return; |
1703 | 1708 | ||
1704 | spin_unlock(&rq->lock); | 1709 | raw_spin_unlock(&rq->lock); |
1705 | update_shares(sd); | 1710 | update_shares(sd); |
1706 | spin_lock(&rq->lock); | 1711 | raw_spin_lock(&rq->lock); |
1707 | } | 1712 | } |
1708 | 1713 | ||
1709 | static void update_h_load(long cpu) | 1714 | static void update_h_load(long cpu) |
@@ -1743,7 +1748,7 @@ static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1743 | __acquires(busiest->lock) | 1748 | __acquires(busiest->lock) |
1744 | __acquires(this_rq->lock) | 1749 | __acquires(this_rq->lock) |
1745 | { | 1750 | { |
1746 | spin_unlock(&this_rq->lock); | 1751 | raw_spin_unlock(&this_rq->lock); |
1747 | double_rq_lock(this_rq, busiest); | 1752 | double_rq_lock(this_rq, busiest); |
1748 | 1753 | ||
1749 | return 1; | 1754 | return 1; |
@@ -1764,14 +1769,16 @@ static int _double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1764 | { | 1769 | { |
1765 | int ret = 0; | 1770 | int ret = 0; |
1766 | 1771 | ||
1767 | if (unlikely(!spin_trylock(&busiest->lock))) { | 1772 | if (unlikely(!raw_spin_trylock(&busiest->lock))) { |
1768 | if (busiest < this_rq) { | 1773 | if (busiest < this_rq) { |
1769 | spin_unlock(&this_rq->lock); | 1774 | raw_spin_unlock(&this_rq->lock); |
1770 | spin_lock(&busiest->lock); | 1775 | raw_spin_lock(&busiest->lock); |
1771 | spin_lock_nested(&this_rq->lock, SINGLE_DEPTH_NESTING); | 1776 | raw_spin_lock_nested(&this_rq->lock, |
1777 | SINGLE_DEPTH_NESTING); | ||
1772 | ret = 1; | 1778 | ret = 1; |
1773 | } else | 1779 | } else |
1774 | spin_lock_nested(&busiest->lock, SINGLE_DEPTH_NESTING); | 1780 | raw_spin_lock_nested(&busiest->lock, |
1781 | SINGLE_DEPTH_NESTING); | ||
1775 | } | 1782 | } |
1776 | return ret; | 1783 | return ret; |
1777 | } | 1784 | } |
@@ -1785,7 +1792,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1785 | { | 1792 | { |
1786 | if (unlikely(!irqs_disabled())) { | 1793 | if (unlikely(!irqs_disabled())) { |
1787 | /* printk() doesn't work good under rq->lock */ | 1794 | /* printk() doesn't work good under rq->lock */ |
1788 | spin_unlock(&this_rq->lock); | 1795 | raw_spin_unlock(&this_rq->lock); |
1789 | BUG_ON(1); | 1796 | BUG_ON(1); |
1790 | } | 1797 | } |
1791 | 1798 | ||
@@ -1795,7 +1802,7 @@ static int double_lock_balance(struct rq *this_rq, struct rq *busiest) | |||
1795 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) | 1802 | static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest) |
1796 | __releases(busiest->lock) | 1803 | __releases(busiest->lock) |
1797 | { | 1804 | { |
1798 | spin_unlock(&busiest->lock); | 1805 | raw_spin_unlock(&busiest->lock); |
1799 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); | 1806 | lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_); |
1800 | } | 1807 | } |
1801 | #endif | 1808 | #endif |
@@ -1810,6 +1817,22 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1810 | #endif | 1817 | #endif |
1811 | 1818 | ||
1812 | static void calc_load_account_active(struct rq *this_rq); | 1819 | static void calc_load_account_active(struct rq *this_rq); |
1820 | static void update_sysctl(void); | ||
1821 | static int get_update_sysctl_factor(void); | ||
1822 | |||
1823 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1824 | { | ||
1825 | set_task_rq(p, cpu); | ||
1826 | #ifdef CONFIG_SMP | ||
1827 | /* | ||
1828 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1829 | * successfuly executed on another CPU. We must ensure that updates of | ||
1830 | * per-task data have been completed by this moment. | ||
1831 | */ | ||
1832 | smp_wmb(); | ||
1833 | task_thread_info(p)->cpu = cpu; | ||
1834 | #endif | ||
1835 | } | ||
1813 | 1836 | ||
1814 | #include "sched_stats.h" | 1837 | #include "sched_stats.h" |
1815 | #include "sched_idletask.c" | 1838 | #include "sched_idletask.c" |
@@ -1967,20 +1990,6 @@ inline int task_curr(const struct task_struct *p) | |||
1967 | return cpu_curr(task_cpu(p)) == p; | 1990 | return cpu_curr(task_cpu(p)) == p; |
1968 | } | 1991 | } |
1969 | 1992 | ||
1970 | static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | ||
1971 | { | ||
1972 | set_task_rq(p, cpu); | ||
1973 | #ifdef CONFIG_SMP | ||
1974 | /* | ||
1975 | * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be | ||
1976 | * successfuly executed on another CPU. We must ensure that updates of | ||
1977 | * per-task data have been completed by this moment. | ||
1978 | */ | ||
1979 | smp_wmb(); | ||
1980 | task_thread_info(p)->cpu = cpu; | ||
1981 | #endif | ||
1982 | } | ||
1983 | |||
1984 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, | 1993 | static inline void check_class_changed(struct rq *rq, struct task_struct *p, |
1985 | const struct sched_class *prev_class, | 1994 | const struct sched_class *prev_class, |
1986 | int oldprio, int running) | 1995 | int oldprio, int running) |
@@ -1993,38 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1993 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2002 | p->sched_class->prio_changed(rq, p, oldprio, running); |
1994 | } | 2003 | } |
1995 | 2004 | ||
1996 | /** | ||
1997 | * kthread_bind - bind a just-created kthread to a cpu. | ||
1998 | * @p: thread created by kthread_create(). | ||
1999 | * @cpu: cpu (might not be online, must be possible) for @k to run on. | ||
2000 | * | ||
2001 | * Description: This function is equivalent to set_cpus_allowed(), | ||
2002 | * except that @cpu doesn't need to be online, and the thread must be | ||
2003 | * stopped (i.e., just returned from kthread_create()). | ||
2004 | * | ||
2005 | * Function lives here instead of kthread.c because it messes with | ||
2006 | * scheduler internals which require locking. | ||
2007 | */ | ||
2008 | void kthread_bind(struct task_struct *p, unsigned int cpu) | ||
2009 | { | ||
2010 | struct rq *rq = cpu_rq(cpu); | ||
2011 | unsigned long flags; | ||
2012 | |||
2013 | /* Must have done schedule() in kthread() before we set_task_cpu */ | ||
2014 | if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) { | ||
2015 | WARN_ON(1); | ||
2016 | return; | ||
2017 | } | ||
2018 | |||
2019 | spin_lock_irqsave(&rq->lock, flags); | ||
2020 | set_task_cpu(p, cpu); | ||
2021 | p->cpus_allowed = cpumask_of_cpu(cpu); | ||
2022 | p->rt.nr_cpus_allowed = 1; | ||
2023 | p->flags |= PF_THREAD_BOUND; | ||
2024 | spin_unlock_irqrestore(&rq->lock, flags); | ||
2025 | } | ||
2026 | EXPORT_SYMBOL(kthread_bind); | ||
2027 | |||
2028 | #ifdef CONFIG_SMP | 2005 | #ifdef CONFIG_SMP |
2029 | /* | 2006 | /* |
2030 | * Is this task likely cache-hot: | 2007 | * Is this task likely cache-hot: |
@@ -2034,6 +2011,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2034 | { | 2011 | { |
2035 | s64 delta; | 2012 | s64 delta; |
2036 | 2013 | ||
2014 | if (p->sched_class != &fair_sched_class) | ||
2015 | return 0; | ||
2016 | |||
2037 | /* | 2017 | /* |
2038 | * Buddy candidates are cache hot: | 2018 | * Buddy candidates are cache hot: |
2039 | */ | 2019 | */ |
@@ -2042,9 +2022,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2042 | &p->se == cfs_rq_of(&p->se)->last)) | 2022 | &p->se == cfs_rq_of(&p->se)->last)) |
2043 | return 1; | 2023 | return 1; |
2044 | 2024 | ||
2045 | if (p->sched_class != &fair_sched_class) | ||
2046 | return 0; | ||
2047 | |||
2048 | if (sysctl_sched_migration_cost == -1) | 2025 | if (sysctl_sched_migration_cost == -1) |
2049 | return 1; | 2026 | return 1; |
2050 | if (sysctl_sched_migration_cost == 0) | 2027 | if (sysctl_sched_migration_cost == 0) |
@@ -2055,39 +2032,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2055 | return delta < (s64)sysctl_sched_migration_cost; | 2032 | return delta < (s64)sysctl_sched_migration_cost; |
2056 | } | 2033 | } |
2057 | 2034 | ||
2058 | |||
2059 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | 2035 | void set_task_cpu(struct task_struct *p, unsigned int new_cpu) |
2060 | { | 2036 | { |
2061 | int old_cpu = task_cpu(p); | 2037 | #ifdef CONFIG_SCHED_DEBUG |
2062 | struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu); | 2038 | /* |
2063 | struct cfs_rq *old_cfsrq = task_cfs_rq(p), | 2039 | * We should never call set_task_cpu() on a blocked task, |
2064 | *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu); | 2040 | * ttwu() will sort out the placement. |
2065 | u64 clock_offset; | 2041 | */ |
2066 | 2042 | WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING && | |
2067 | clock_offset = old_rq->clock - new_rq->clock; | 2043 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
2044 | #endif | ||
2068 | 2045 | ||
2069 | trace_sched_migrate_task(p, new_cpu); | 2046 | trace_sched_migrate_task(p, new_cpu); |
2070 | 2047 | ||
2071 | #ifdef CONFIG_SCHEDSTATS | 2048 | if (task_cpu(p) != new_cpu) { |
2072 | if (p->se.wait_start) | ||
2073 | p->se.wait_start -= clock_offset; | ||
2074 | if (p->se.sleep_start) | ||
2075 | p->se.sleep_start -= clock_offset; | ||
2076 | if (p->se.block_start) | ||
2077 | p->se.block_start -= clock_offset; | ||
2078 | #endif | ||
2079 | if (old_cpu != new_cpu) { | ||
2080 | p->se.nr_migrations++; | 2049 | p->se.nr_migrations++; |
2081 | new_rq->nr_migrations_in++; | 2050 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); |
2082 | #ifdef CONFIG_SCHEDSTATS | ||
2083 | if (task_hot(p, old_rq->clock, NULL)) | ||
2084 | schedstat_inc(p, se.nr_forced2_migrations); | ||
2085 | #endif | ||
2086 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, | ||
2087 | 1, 1, NULL, 0); | ||
2088 | } | 2051 | } |
2089 | p->se.vruntime -= old_cfsrq->min_vruntime - | ||
2090 | new_cfsrq->min_vruntime; | ||
2091 | 2052 | ||
2092 | __set_task_cpu(p, new_cpu); | 2053 | __set_task_cpu(p, new_cpu); |
2093 | } | 2054 | } |
@@ -2112,12 +2073,10 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2112 | 2073 | ||
2113 | /* | 2074 | /* |
2114 | * If the task is not on a runqueue (and not running), then | 2075 | * If the task is not on a runqueue (and not running), then |
2115 | * it is sufficient to simply update the task's cpu field. | 2076 | * the next wake-up will properly place the task. |
2116 | */ | 2077 | */ |
2117 | if (!p->se.on_rq && !task_running(rq, p)) { | 2078 | if (!p->se.on_rq && !task_running(rq, p)) |
2118 | set_task_cpu(p, dest_cpu); | ||
2119 | return 0; | 2079 | return 0; |
2120 | } | ||
2121 | 2080 | ||
2122 | init_completion(&req->done); | 2081 | init_completion(&req->done); |
2123 | req->task = p; | 2082 | req->task = p; |
@@ -2322,6 +2281,77 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2322 | preempt_enable(); | 2281 | preempt_enable(); |
2323 | } | 2282 | } |
2324 | 2283 | ||
2284 | #ifdef CONFIG_SMP | ||
2285 | static int select_fallback_rq(int cpu, struct task_struct *p) | ||
2286 | { | ||
2287 | int dest_cpu; | ||
2288 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu)); | ||
2289 | |||
2290 | /* Look for allowed, online CPU in same node. */ | ||
2291 | for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask) | ||
2292 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
2293 | return dest_cpu; | ||
2294 | |||
2295 | /* Any allowed, online CPU? */ | ||
2296 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask); | ||
2297 | if (dest_cpu < nr_cpu_ids) | ||
2298 | return dest_cpu; | ||
2299 | |||
2300 | /* No more Mr. Nice Guy. */ | ||
2301 | if (dest_cpu >= nr_cpu_ids) { | ||
2302 | rcu_read_lock(); | ||
2303 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
2304 | rcu_read_unlock(); | ||
2305 | dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); | ||
2306 | |||
2307 | /* | ||
2308 | * Don't tell them about moving exiting tasks or | ||
2309 | * kernel threads (both mm NULL), since they never | ||
2310 | * leave kernel. | ||
2311 | */ | ||
2312 | if (p->mm && printk_ratelimit()) { | ||
2313 | printk(KERN_INFO "process %d (%s) no " | ||
2314 | "longer affine to cpu%d\n", | ||
2315 | task_pid_nr(p), p->comm, cpu); | ||
2316 | } | ||
2317 | } | ||
2318 | |||
2319 | return dest_cpu; | ||
2320 | } | ||
2321 | |||
2322 | /* | ||
2323 | * Called from: | ||
2324 | * | ||
2325 | * - fork, @p is stable because it isn't on the tasklist yet | ||
2326 | * | ||
2327 | * - exec, @p is unstable, retry loop | ||
2328 | * | ||
2329 | * - wake-up, we serialize ->cpus_allowed against TASK_WAKING so | ||
2330 | * we should be good. | ||
2331 | */ | ||
2332 | static inline | ||
2333 | int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags) | ||
2334 | { | ||
2335 | int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags); | ||
2336 | |||
2337 | /* | ||
2338 | * In order not to call set_task_cpu() on a blocking task we need | ||
2339 | * to rely on ttwu() to place the task on a valid ->cpus_allowed | ||
2340 | * cpu. | ||
2341 | * | ||
2342 | * Since this is common to all placement strategies, this lives here. | ||
2343 | * | ||
2344 | * [ this allows ->select_task() to simply return task_cpu(p) and | ||
2345 | * not worry about this generic constraint ] | ||
2346 | */ | ||
2347 | if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) || | ||
2348 | !cpu_online(cpu))) | ||
2349 | cpu = select_fallback_rq(task_cpu(p), p); | ||
2350 | |||
2351 | return cpu; | ||
2352 | } | ||
2353 | #endif | ||
2354 | |||
2325 | /*** | 2355 | /*** |
2326 | * try_to_wake_up - wake up a thread | 2356 | * try_to_wake_up - wake up a thread |
2327 | * @p: the to-be-woken-up thread | 2357 | * @p: the to-be-woken-up thread |
@@ -2373,16 +2403,18 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
2373 | if (task_contributes_to_load(p)) | 2403 | if (task_contributes_to_load(p)) |
2374 | rq->nr_uninterruptible--; | 2404 | rq->nr_uninterruptible--; |
2375 | p->state = TASK_WAKING; | 2405 | p->state = TASK_WAKING; |
2376 | task_rq_unlock(rq, &flags); | ||
2377 | 2406 | ||
2378 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2407 | if (p->sched_class->task_waking) |
2408 | p->sched_class->task_waking(rq, p); | ||
2409 | |||
2410 | __task_rq_unlock(rq); | ||
2411 | |||
2412 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2379 | if (cpu != orig_cpu) | 2413 | if (cpu != orig_cpu) |
2380 | set_task_cpu(p, cpu); | 2414 | set_task_cpu(p, cpu); |
2381 | 2415 | ||
2382 | rq = task_rq_lock(p, &flags); | 2416 | rq = __task_rq_lock(p); |
2383 | 2417 | update_rq_clock(rq); | |
2384 | if (rq != orig_rq) | ||
2385 | update_rq_clock(rq); | ||
2386 | 2418 | ||
2387 | WARN_ON(p->state != TASK_WAKING); | 2419 | WARN_ON(p->state != TASK_WAKING); |
2388 | cpu = task_cpu(p); | 2420 | cpu = task_cpu(p); |
@@ -2438,8 +2470,19 @@ out_running: | |||
2438 | 2470 | ||
2439 | p->state = TASK_RUNNING; | 2471 | p->state = TASK_RUNNING; |
2440 | #ifdef CONFIG_SMP | 2472 | #ifdef CONFIG_SMP |
2441 | if (p->sched_class->task_wake_up) | 2473 | if (p->sched_class->task_woken) |
2442 | p->sched_class->task_wake_up(rq, p); | 2474 | p->sched_class->task_woken(rq, p); |
2475 | |||
2476 | if (unlikely(rq->idle_stamp)) { | ||
2477 | u64 delta = rq->clock - rq->idle_stamp; | ||
2478 | u64 max = 2*sysctl_sched_migration_cost; | ||
2479 | |||
2480 | if (delta > max) | ||
2481 | rq->avg_idle = max; | ||
2482 | else | ||
2483 | update_avg(&rq->avg_idle, delta); | ||
2484 | rq->idle_stamp = 0; | ||
2485 | } | ||
2443 | #endif | 2486 | #endif |
2444 | out: | 2487 | out: |
2445 | task_rq_unlock(rq, &flags); | 2488 | task_rq_unlock(rq, &flags); |
@@ -2486,7 +2529,6 @@ static void __sched_fork(struct task_struct *p) | |||
2486 | p->se.avg_overlap = 0; | 2529 | p->se.avg_overlap = 0; |
2487 | p->se.start_runtime = 0; | 2530 | p->se.start_runtime = 0; |
2488 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2531 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2489 | p->se.avg_running = 0; | ||
2490 | 2532 | ||
2491 | #ifdef CONFIG_SCHEDSTATS | 2533 | #ifdef CONFIG_SCHEDSTATS |
2492 | p->se.wait_start = 0; | 2534 | p->se.wait_start = 0; |
@@ -2508,7 +2550,6 @@ static void __sched_fork(struct task_struct *p) | |||
2508 | p->se.nr_failed_migrations_running = 0; | 2550 | p->se.nr_failed_migrations_running = 0; |
2509 | p->se.nr_failed_migrations_hot = 0; | 2551 | p->se.nr_failed_migrations_hot = 0; |
2510 | p->se.nr_forced_migrations = 0; | 2552 | p->se.nr_forced_migrations = 0; |
2511 | p->se.nr_forced2_migrations = 0; | ||
2512 | 2553 | ||
2513 | p->se.nr_wakeups = 0; | 2554 | p->se.nr_wakeups = 0; |
2514 | p->se.nr_wakeups_sync = 0; | 2555 | p->se.nr_wakeups_sync = 0; |
@@ -2529,14 +2570,6 @@ static void __sched_fork(struct task_struct *p) | |||
2529 | #ifdef CONFIG_PREEMPT_NOTIFIERS | 2570 | #ifdef CONFIG_PREEMPT_NOTIFIERS |
2530 | INIT_HLIST_HEAD(&p->preempt_notifiers); | 2571 | INIT_HLIST_HEAD(&p->preempt_notifiers); |
2531 | #endif | 2572 | #endif |
2532 | |||
2533 | /* | ||
2534 | * We mark the process as running here, but have not actually | ||
2535 | * inserted it onto the runqueue yet. This guarantees that | ||
2536 | * nobody will actually run it, and a signal or other external | ||
2537 | * event cannot wake it up and insert it on the runqueue either. | ||
2538 | */ | ||
2539 | p->state = TASK_RUNNING; | ||
2540 | } | 2573 | } |
2541 | 2574 | ||
2542 | /* | 2575 | /* |
@@ -2547,6 +2580,12 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2547 | int cpu = get_cpu(); | 2580 | int cpu = get_cpu(); |
2548 | 2581 | ||
2549 | __sched_fork(p); | 2582 | __sched_fork(p); |
2583 | /* | ||
2584 | * We mark the process as waking here. This guarantees that | ||
2585 | * nobody will actually run it, and a signal or other external | ||
2586 | * event cannot wake it up and insert it on the runqueue either. | ||
2587 | */ | ||
2588 | p->state = TASK_WAKING; | ||
2550 | 2589 | ||
2551 | /* | 2590 | /* |
2552 | * Revert to default priority/policy on fork if requested. | 2591 | * Revert to default priority/policy on fork if requested. |
@@ -2578,8 +2617,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2578 | if (!rt_prio(p->prio)) | 2617 | if (!rt_prio(p->prio)) |
2579 | p->sched_class = &fair_sched_class; | 2618 | p->sched_class = &fair_sched_class; |
2580 | 2619 | ||
2620 | if (p->sched_class->task_fork) | ||
2621 | p->sched_class->task_fork(p); | ||
2622 | |||
2581 | #ifdef CONFIG_SMP | 2623 | #ifdef CONFIG_SMP |
2582 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | 2624 | cpu = select_task_rq(p, SD_BALANCE_FORK, 0); |
2583 | #endif | 2625 | #endif |
2584 | set_task_cpu(p, cpu); | 2626 | set_task_cpu(p, cpu); |
2585 | 2627 | ||
@@ -2612,24 +2654,15 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2612 | struct rq *rq; | 2654 | struct rq *rq; |
2613 | 2655 | ||
2614 | rq = task_rq_lock(p, &flags); | 2656 | rq = task_rq_lock(p, &flags); |
2615 | BUG_ON(p->state != TASK_RUNNING); | 2657 | BUG_ON(p->state != TASK_WAKING); |
2658 | p->state = TASK_RUNNING; | ||
2616 | update_rq_clock(rq); | 2659 | update_rq_clock(rq); |
2617 | 2660 | activate_task(rq, p, 0); | |
2618 | if (!p->sched_class->task_new || !current->se.on_rq) { | ||
2619 | activate_task(rq, p, 0); | ||
2620 | } else { | ||
2621 | /* | ||
2622 | * Let the scheduling class do new task startup | ||
2623 | * management (if any): | ||
2624 | */ | ||
2625 | p->sched_class->task_new(rq, p); | ||
2626 | inc_nr_running(rq); | ||
2627 | } | ||
2628 | trace_sched_wakeup_new(rq, p, 1); | 2661 | trace_sched_wakeup_new(rq, p, 1); |
2629 | check_preempt_curr(rq, p, WF_FORK); | 2662 | check_preempt_curr(rq, p, WF_FORK); |
2630 | #ifdef CONFIG_SMP | 2663 | #ifdef CONFIG_SMP |
2631 | if (p->sched_class->task_wake_up) | 2664 | if (p->sched_class->task_woken) |
2632 | p->sched_class->task_wake_up(rq, p); | 2665 | p->sched_class->task_woken(rq, p); |
2633 | #endif | 2666 | #endif |
2634 | task_rq_unlock(rq, &flags); | 2667 | task_rq_unlock(rq, &flags); |
2635 | } | 2668 | } |
@@ -2781,10 +2814,10 @@ static inline void post_schedule(struct rq *rq) | |||
2781 | if (rq->post_schedule) { | 2814 | if (rq->post_schedule) { |
2782 | unsigned long flags; | 2815 | unsigned long flags; |
2783 | 2816 | ||
2784 | spin_lock_irqsave(&rq->lock, flags); | 2817 | raw_spin_lock_irqsave(&rq->lock, flags); |
2785 | if (rq->curr->sched_class->post_schedule) | 2818 | if (rq->curr->sched_class->post_schedule) |
2786 | rq->curr->sched_class->post_schedule(rq); | 2819 | rq->curr->sched_class->post_schedule(rq); |
2787 | spin_unlock_irqrestore(&rq->lock, flags); | 2820 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
2788 | 2821 | ||
2789 | rq->post_schedule = 0; | 2822 | rq->post_schedule = 0; |
2790 | } | 2823 | } |
@@ -2848,14 +2881,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2848 | */ | 2881 | */ |
2849 | arch_start_context_switch(prev); | 2882 | arch_start_context_switch(prev); |
2850 | 2883 | ||
2851 | if (unlikely(!mm)) { | 2884 | if (likely(!mm)) { |
2852 | next->active_mm = oldmm; | 2885 | next->active_mm = oldmm; |
2853 | atomic_inc(&oldmm->mm_count); | 2886 | atomic_inc(&oldmm->mm_count); |
2854 | enter_lazy_tlb(oldmm, next); | 2887 | enter_lazy_tlb(oldmm, next); |
2855 | } else | 2888 | } else |
2856 | switch_mm(oldmm, mm, next); | 2889 | switch_mm(oldmm, mm, next); |
2857 | 2890 | ||
2858 | if (unlikely(!prev->mm)) { | 2891 | if (likely(!prev->mm)) { |
2859 | prev->active_mm = NULL; | 2892 | prev->active_mm = NULL; |
2860 | rq->prev_mm = oldmm; | 2893 | rq->prev_mm = oldmm; |
2861 | } | 2894 | } |
@@ -3018,15 +3051,6 @@ static void calc_load_account_active(struct rq *this_rq) | |||
3018 | } | 3051 | } |
3019 | 3052 | ||
3020 | /* | 3053 | /* |
3021 | * Externally visible per-cpu scheduler statistics: | ||
3022 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
3023 | */ | ||
3024 | u64 cpu_nr_migrations(int cpu) | ||
3025 | { | ||
3026 | return cpu_rq(cpu)->nr_migrations_in; | ||
3027 | } | ||
3028 | |||
3029 | /* | ||
3030 | * Update rq->cpu_load[] statistics. This function is usually called every | 3054 | * Update rq->cpu_load[] statistics. This function is usually called every |
3031 | * scheduler tick (TICK_NSEC). | 3055 | * scheduler tick (TICK_NSEC). |
3032 | */ | 3056 | */ |
@@ -3075,15 +3099,15 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) | |||
3075 | { | 3099 | { |
3076 | BUG_ON(!irqs_disabled()); | 3100 | BUG_ON(!irqs_disabled()); |
3077 | if (rq1 == rq2) { | 3101 | if (rq1 == rq2) { |
3078 | spin_lock(&rq1->lock); | 3102 | raw_spin_lock(&rq1->lock); |
3079 | __acquire(rq2->lock); /* Fake it out ;) */ | 3103 | __acquire(rq2->lock); /* Fake it out ;) */ |
3080 | } else { | 3104 | } else { |
3081 | if (rq1 < rq2) { | 3105 | if (rq1 < rq2) { |
3082 | spin_lock(&rq1->lock); | 3106 | raw_spin_lock(&rq1->lock); |
3083 | spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); | 3107 | raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING); |
3084 | } else { | 3108 | } else { |
3085 | spin_lock(&rq2->lock); | 3109 | raw_spin_lock(&rq2->lock); |
3086 | spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); | 3110 | raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING); |
3087 | } | 3111 | } |
3088 | } | 3112 | } |
3089 | update_rq_clock(rq1); | 3113 | update_rq_clock(rq1); |
@@ -3100,29 +3124,44 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2) | |||
3100 | __releases(rq1->lock) | 3124 | __releases(rq1->lock) |
3101 | __releases(rq2->lock) | 3125 | __releases(rq2->lock) |
3102 | { | 3126 | { |
3103 | spin_unlock(&rq1->lock); | 3127 | raw_spin_unlock(&rq1->lock); |
3104 | if (rq1 != rq2) | 3128 | if (rq1 != rq2) |
3105 | spin_unlock(&rq2->lock); | 3129 | raw_spin_unlock(&rq2->lock); |
3106 | else | 3130 | else |
3107 | __release(rq2->lock); | 3131 | __release(rq2->lock); |
3108 | } | 3132 | } |
3109 | 3133 | ||
3110 | /* | 3134 | /* |
3111 | * If dest_cpu is allowed for this process, migrate the task to it. | 3135 | * sched_exec - execve() is a valuable balancing opportunity, because at |
3112 | * This is accomplished by forcing the cpu_allowed mask to only | 3136 | * this point the task has the smallest effective memory and cache footprint. |
3113 | * allow dest_cpu, which will force the cpu onto dest_cpu. Then | ||
3114 | * the cpu_allowed mask is restored. | ||
3115 | */ | 3137 | */ |
3116 | static void sched_migrate_task(struct task_struct *p, int dest_cpu) | 3138 | void sched_exec(void) |
3117 | { | 3139 | { |
3140 | struct task_struct *p = current; | ||
3118 | struct migration_req req; | 3141 | struct migration_req req; |
3142 | int dest_cpu, this_cpu; | ||
3119 | unsigned long flags; | 3143 | unsigned long flags; |
3120 | struct rq *rq; | 3144 | struct rq *rq; |
3121 | 3145 | ||
3146 | again: | ||
3147 | this_cpu = get_cpu(); | ||
3148 | dest_cpu = select_task_rq(p, SD_BALANCE_EXEC, 0); | ||
3149 | if (dest_cpu == this_cpu) { | ||
3150 | put_cpu(); | ||
3151 | return; | ||
3152 | } | ||
3153 | |||
3122 | rq = task_rq_lock(p, &flags); | 3154 | rq = task_rq_lock(p, &flags); |
3155 | put_cpu(); | ||
3156 | |||
3157 | /* | ||
3158 | * select_task_rq() can race against ->cpus_allowed | ||
3159 | */ | ||
3123 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) | 3160 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed) |
3124 | || unlikely(!cpu_active(dest_cpu))) | 3161 | || unlikely(!cpu_active(dest_cpu))) { |
3125 | goto out; | 3162 | task_rq_unlock(rq, &flags); |
3163 | goto again; | ||
3164 | } | ||
3126 | 3165 | ||
3127 | /* force the process onto the specified CPU */ | 3166 | /* force the process onto the specified CPU */ |
3128 | if (migrate_task(p, dest_cpu, &req)) { | 3167 | if (migrate_task(p, dest_cpu, &req)) { |
@@ -3137,24 +3176,10 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu) | |||
3137 | 3176 | ||
3138 | return; | 3177 | return; |
3139 | } | 3178 | } |
3140 | out: | ||
3141 | task_rq_unlock(rq, &flags); | 3179 | task_rq_unlock(rq, &flags); |
3142 | } | 3180 | } |
3143 | 3181 | ||
3144 | /* | 3182 | /* |
3145 | * sched_exec - execve() is a valuable balancing opportunity, because at | ||
3146 | * this point the task has the smallest effective memory and cache footprint. | ||
3147 | */ | ||
3148 | void sched_exec(void) | ||
3149 | { | ||
3150 | int new_cpu, this_cpu = get_cpu(); | ||
3151 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); | ||
3152 | put_cpu(); | ||
3153 | if (new_cpu != this_cpu) | ||
3154 | sched_migrate_task(current, new_cpu); | ||
3155 | } | ||
3156 | |||
3157 | /* | ||
3158 | * pull_task - move a task from a remote runqueue to the local runqueue. | 3183 | * pull_task - move a task from a remote runqueue to the local runqueue. |
3159 | * Both runqueues must be locked. | 3184 | * Both runqueues must be locked. |
3160 | */ | 3185 | */ |
@@ -3164,10 +3189,6 @@ static void pull_task(struct rq *src_rq, struct task_struct *p, | |||
3164 | deactivate_task(src_rq, p, 0); | 3189 | deactivate_task(src_rq, p, 0); |
3165 | set_task_cpu(p, this_cpu); | 3190 | set_task_cpu(p, this_cpu); |
3166 | activate_task(this_rq, p, 0); | 3191 | activate_task(this_rq, p, 0); |
3167 | /* | ||
3168 | * Note that idle threads have a prio of MAX_PRIO, for this test | ||
3169 | * to be always true for them. | ||
3170 | */ | ||
3171 | check_preempt_curr(this_rq, p, 0); | 3192 | check_preempt_curr(this_rq, p, 0); |
3172 | } | 3193 | } |
3173 | 3194 | ||
@@ -4126,7 +4147,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
4126 | unsigned long flags; | 4147 | unsigned long flags; |
4127 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4148 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4128 | 4149 | ||
4129 | cpumask_setall(cpus); | 4150 | cpumask_copy(cpus, cpu_active_mask); |
4130 | 4151 | ||
4131 | /* | 4152 | /* |
4132 | * When power savings policy is enabled for the parent domain, idle | 4153 | * When power savings policy is enabled for the parent domain, idle |
@@ -4199,14 +4220,15 @@ redo: | |||
4199 | 4220 | ||
4200 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { | 4221 | if (unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2)) { |
4201 | 4222 | ||
4202 | spin_lock_irqsave(&busiest->lock, flags); | 4223 | raw_spin_lock_irqsave(&busiest->lock, flags); |
4203 | 4224 | ||
4204 | /* don't kick the migration_thread, if the curr | 4225 | /* don't kick the migration_thread, if the curr |
4205 | * task on busiest cpu can't be moved to this_cpu | 4226 | * task on busiest cpu can't be moved to this_cpu |
4206 | */ | 4227 | */ |
4207 | if (!cpumask_test_cpu(this_cpu, | 4228 | if (!cpumask_test_cpu(this_cpu, |
4208 | &busiest->curr->cpus_allowed)) { | 4229 | &busiest->curr->cpus_allowed)) { |
4209 | spin_unlock_irqrestore(&busiest->lock, flags); | 4230 | raw_spin_unlock_irqrestore(&busiest->lock, |
4231 | flags); | ||
4210 | all_pinned = 1; | 4232 | all_pinned = 1; |
4211 | goto out_one_pinned; | 4233 | goto out_one_pinned; |
4212 | } | 4234 | } |
@@ -4216,7 +4238,7 @@ redo: | |||
4216 | busiest->push_cpu = this_cpu; | 4238 | busiest->push_cpu = this_cpu; |
4217 | active_balance = 1; | 4239 | active_balance = 1; |
4218 | } | 4240 | } |
4219 | spin_unlock_irqrestore(&busiest->lock, flags); | 4241 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
4220 | if (active_balance) | 4242 | if (active_balance) |
4221 | wake_up_process(busiest->migration_thread); | 4243 | wake_up_process(busiest->migration_thread); |
4222 | 4244 | ||
@@ -4289,7 +4311,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
4289 | int all_pinned = 0; | 4311 | int all_pinned = 0; |
4290 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); | 4312 | struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask); |
4291 | 4313 | ||
4292 | cpumask_setall(cpus); | 4314 | cpumask_copy(cpus, cpu_active_mask); |
4293 | 4315 | ||
4294 | /* | 4316 | /* |
4295 | * When power savings policy is enabled for the parent domain, idle | 4317 | * When power savings policy is enabled for the parent domain, idle |
@@ -4398,10 +4420,10 @@ redo: | |||
4398 | /* | 4420 | /* |
4399 | * Should not call ttwu while holding a rq->lock | 4421 | * Should not call ttwu while holding a rq->lock |
4400 | */ | 4422 | */ |
4401 | spin_unlock(&this_rq->lock); | 4423 | raw_spin_unlock(&this_rq->lock); |
4402 | if (active_balance) | 4424 | if (active_balance) |
4403 | wake_up_process(busiest->migration_thread); | 4425 | wake_up_process(busiest->migration_thread); |
4404 | spin_lock(&this_rq->lock); | 4426 | raw_spin_lock(&this_rq->lock); |
4405 | 4427 | ||
4406 | } else | 4428 | } else |
4407 | sd->nr_balance_failed = 0; | 4429 | sd->nr_balance_failed = 0; |
@@ -4429,6 +4451,11 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4429 | int pulled_task = 0; | 4451 | int pulled_task = 0; |
4430 | unsigned long next_balance = jiffies + HZ; | 4452 | unsigned long next_balance = jiffies + HZ; |
4431 | 4453 | ||
4454 | this_rq->idle_stamp = this_rq->clock; | ||
4455 | |||
4456 | if (this_rq->avg_idle < sysctl_sched_migration_cost) | ||
4457 | return; | ||
4458 | |||
4432 | for_each_domain(this_cpu, sd) { | 4459 | for_each_domain(this_cpu, sd) { |
4433 | unsigned long interval; | 4460 | unsigned long interval; |
4434 | 4461 | ||
@@ -4443,8 +4470,10 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
4443 | interval = msecs_to_jiffies(sd->balance_interval); | 4470 | interval = msecs_to_jiffies(sd->balance_interval); |
4444 | if (time_after(next_balance, sd->last_balance + interval)) | 4471 | if (time_after(next_balance, sd->last_balance + interval)) |
4445 | next_balance = sd->last_balance + interval; | 4472 | next_balance = sd->last_balance + interval; |
4446 | if (pulled_task) | 4473 | if (pulled_task) { |
4474 | this_rq->idle_stamp = 0; | ||
4447 | break; | 4475 | break; |
4476 | } | ||
4448 | } | 4477 | } |
4449 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { | 4478 | if (pulled_task || time_after(jiffies, this_rq->next_balance)) { |
4450 | /* | 4479 | /* |
@@ -4679,7 +4708,7 @@ int select_nohz_load_balancer(int stop_tick) | |||
4679 | cpumask_set_cpu(cpu, nohz.cpu_mask); | 4708 | cpumask_set_cpu(cpu, nohz.cpu_mask); |
4680 | 4709 | ||
4681 | /* time for ilb owner also to sleep */ | 4710 | /* time for ilb owner also to sleep */ |
4682 | if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) { | 4711 | if (cpumask_weight(nohz.cpu_mask) == num_active_cpus()) { |
4683 | if (atomic_read(&nohz.load_balancer) == cpu) | 4712 | if (atomic_read(&nohz.load_balancer) == cpu) |
4684 | atomic_set(&nohz.load_balancer, -1); | 4713 | atomic_set(&nohz.load_balancer, -1); |
4685 | return 0; | 4714 | return 0; |
@@ -5046,8 +5075,13 @@ static void account_guest_time(struct task_struct *p, cputime_t cputime, | |||
5046 | p->gtime = cputime_add(p->gtime, cputime); | 5075 | p->gtime = cputime_add(p->gtime, cputime); |
5047 | 5076 | ||
5048 | /* Add guest time to cpustat. */ | 5077 | /* Add guest time to cpustat. */ |
5049 | cpustat->user = cputime64_add(cpustat->user, tmp); | 5078 | if (TASK_NICE(p) > 0) { |
5050 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | 5079 | cpustat->nice = cputime64_add(cpustat->nice, tmp); |
5080 | cpustat->guest_nice = cputime64_add(cpustat->guest_nice, tmp); | ||
5081 | } else { | ||
5082 | cpustat->user = cputime64_add(cpustat->user, tmp); | ||
5083 | cpustat->guest = cputime64_add(cpustat->guest, tmp); | ||
5084 | } | ||
5051 | } | 5085 | } |
5052 | 5086 | ||
5053 | /* | 5087 | /* |
@@ -5162,60 +5196,86 @@ void account_idle_ticks(unsigned long ticks) | |||
5162 | * Use precise platform statistics if available: | 5196 | * Use precise platform statistics if available: |
5163 | */ | 5197 | */ |
5164 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | 5198 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING |
5165 | cputime_t task_utime(struct task_struct *p) | 5199 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5166 | { | 5200 | { |
5167 | return p->utime; | 5201 | *ut = p->utime; |
5202 | *st = p->stime; | ||
5168 | } | 5203 | } |
5169 | 5204 | ||
5170 | cputime_t task_stime(struct task_struct *p) | 5205 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) |
5171 | { | 5206 | { |
5172 | return p->stime; | 5207 | struct task_cputime cputime; |
5208 | |||
5209 | thread_group_cputime(p, &cputime); | ||
5210 | |||
5211 | *ut = cputime.utime; | ||
5212 | *st = cputime.stime; | ||
5173 | } | 5213 | } |
5174 | #else | 5214 | #else |
5175 | cputime_t task_utime(struct task_struct *p) | 5215 | |
5216 | #ifndef nsecs_to_cputime | ||
5217 | # define nsecs_to_cputime(__nsecs) nsecs_to_jiffies(__nsecs) | ||
5218 | #endif | ||
5219 | |||
5220 | void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5176 | { | 5221 | { |
5177 | clock_t utime = cputime_to_clock_t(p->utime), | 5222 | cputime_t rtime, utime = p->utime, total = cputime_add(utime, p->stime); |
5178 | total = utime + cputime_to_clock_t(p->stime); | ||
5179 | u64 temp; | ||
5180 | 5223 | ||
5181 | /* | 5224 | /* |
5182 | * Use CFS's precise accounting: | 5225 | * Use CFS's precise accounting: |
5183 | */ | 5226 | */ |
5184 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | 5227 | rtime = nsecs_to_cputime(p->se.sum_exec_runtime); |
5185 | 5228 | ||
5186 | if (total) { | 5229 | if (total) { |
5187 | temp *= utime; | 5230 | u64 temp; |
5231 | |||
5232 | temp = (u64)(rtime * utime); | ||
5188 | do_div(temp, total); | 5233 | do_div(temp, total); |
5189 | } | 5234 | utime = (cputime_t)temp; |
5190 | utime = (clock_t)temp; | 5235 | } else |
5236 | utime = rtime; | ||
5237 | |||
5238 | /* | ||
5239 | * Compare with previous values, to keep monotonicity: | ||
5240 | */ | ||
5241 | p->prev_utime = max(p->prev_utime, utime); | ||
5242 | p->prev_stime = max(p->prev_stime, cputime_sub(rtime, p->prev_utime)); | ||
5191 | 5243 | ||
5192 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | 5244 | *ut = p->prev_utime; |
5193 | return p->prev_utime; | 5245 | *st = p->prev_stime; |
5194 | } | 5246 | } |
5195 | 5247 | ||
5196 | cputime_t task_stime(struct task_struct *p) | 5248 | /* |
5249 | * Must be called with siglock held. | ||
5250 | */ | ||
5251 | void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st) | ||
5197 | { | 5252 | { |
5198 | clock_t stime; | 5253 | struct signal_struct *sig = p->signal; |
5254 | struct task_cputime cputime; | ||
5255 | cputime_t rtime, utime, total; | ||
5199 | 5256 | ||
5200 | /* | 5257 | thread_group_cputime(p, &cputime); |
5201 | * Use CFS's precise accounting. (we subtract utime from | ||
5202 | * the total, to make sure the total observed by userspace | ||
5203 | * grows monotonically - apps rely on that): | ||
5204 | */ | ||
5205 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
5206 | cputime_to_clock_t(task_utime(p)); | ||
5207 | 5258 | ||
5208 | if (stime >= 0) | 5259 | total = cputime_add(cputime.utime, cputime.stime); |
5209 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | 5260 | rtime = nsecs_to_cputime(cputime.sum_exec_runtime); |
5210 | 5261 | ||
5211 | return p->prev_stime; | 5262 | if (total) { |
5212 | } | 5263 | u64 temp; |
5213 | #endif | ||
5214 | 5264 | ||
5215 | inline cputime_t task_gtime(struct task_struct *p) | 5265 | temp = (u64)(rtime * cputime.utime); |
5216 | { | 5266 | do_div(temp, total); |
5217 | return p->gtime; | 5267 | utime = (cputime_t)temp; |
5268 | } else | ||
5269 | utime = rtime; | ||
5270 | |||
5271 | sig->prev_utime = max(sig->prev_utime, utime); | ||
5272 | sig->prev_stime = max(sig->prev_stime, | ||
5273 | cputime_sub(rtime, sig->prev_utime)); | ||
5274 | |||
5275 | *ut = sig->prev_utime; | ||
5276 | *st = sig->prev_stime; | ||
5218 | } | 5277 | } |
5278 | #endif | ||
5219 | 5279 | ||
5220 | /* | 5280 | /* |
5221 | * This function gets called by the timer code, with HZ frequency. | 5281 | * This function gets called by the timer code, with HZ frequency. |
@@ -5232,11 +5292,11 @@ void scheduler_tick(void) | |||
5232 | 5292 | ||
5233 | sched_clock_tick(); | 5293 | sched_clock_tick(); |
5234 | 5294 | ||
5235 | spin_lock(&rq->lock); | 5295 | raw_spin_lock(&rq->lock); |
5236 | update_rq_clock(rq); | 5296 | update_rq_clock(rq); |
5237 | update_cpu_load(rq); | 5297 | update_cpu_load(rq); |
5238 | curr->sched_class->task_tick(rq, curr, 0); | 5298 | curr->sched_class->task_tick(rq, curr, 0); |
5239 | spin_unlock(&rq->lock); | 5299 | raw_spin_unlock(&rq->lock); |
5240 | 5300 | ||
5241 | perf_event_task_tick(curr, cpu); | 5301 | perf_event_task_tick(curr, cpu); |
5242 | 5302 | ||
@@ -5350,13 +5410,14 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5350 | #endif | 5410 | #endif |
5351 | } | 5411 | } |
5352 | 5412 | ||
5353 | static void put_prev_task(struct rq *rq, struct task_struct *p) | 5413 | static void put_prev_task(struct rq *rq, struct task_struct *prev) |
5354 | { | 5414 | { |
5355 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; | 5415 | if (prev->state == TASK_RUNNING) { |
5416 | u64 runtime = prev->se.sum_exec_runtime; | ||
5356 | 5417 | ||
5357 | update_avg(&p->se.avg_running, runtime); | 5418 | runtime -= prev->se.prev_sum_exec_runtime; |
5419 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5358 | 5420 | ||
5359 | if (p->state == TASK_RUNNING) { | ||
5360 | /* | 5421 | /* |
5361 | * In order to avoid avg_overlap growing stale when we are | 5422 | * In order to avoid avg_overlap growing stale when we are |
5362 | * indeed overlapping and hence not getting put to sleep, grow | 5423 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5366,12 +5427,9 @@ static void put_prev_task(struct rq *rq, struct task_struct *p) | |||
5366 | * correlates to the amount of cache footprint a task can | 5427 | * correlates to the amount of cache footprint a task can |
5367 | * build up. | 5428 | * build up. |
5368 | */ | 5429 | */ |
5369 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | 5430 | update_avg(&prev->se.avg_overlap, runtime); |
5370 | update_avg(&p->se.avg_overlap, runtime); | ||
5371 | } else { | ||
5372 | update_avg(&p->se.avg_running, 0); | ||
5373 | } | 5431 | } |
5374 | p->sched_class->put_prev_task(rq, p); | 5432 | prev->sched_class->put_prev_task(rq, prev); |
5375 | } | 5433 | } |
5376 | 5434 | ||
5377 | /* | 5435 | /* |
@@ -5432,7 +5490,7 @@ need_resched_nonpreemptible: | |||
5432 | if (sched_feat(HRTICK)) | 5490 | if (sched_feat(HRTICK)) |
5433 | hrtick_clear(rq); | 5491 | hrtick_clear(rq); |
5434 | 5492 | ||
5435 | spin_lock_irq(&rq->lock); | 5493 | raw_spin_lock_irq(&rq->lock); |
5436 | update_rq_clock(rq); | 5494 | update_rq_clock(rq); |
5437 | clear_tsk_need_resched(prev); | 5495 | clear_tsk_need_resched(prev); |
5438 | 5496 | ||
@@ -5468,7 +5526,7 @@ need_resched_nonpreemptible: | |||
5468 | cpu = smp_processor_id(); | 5526 | cpu = smp_processor_id(); |
5469 | rq = cpu_rq(cpu); | 5527 | rq = cpu_rq(cpu); |
5470 | } else | 5528 | } else |
5471 | spin_unlock_irq(&rq->lock); | 5529 | raw_spin_unlock_irq(&rq->lock); |
5472 | 5530 | ||
5473 | post_schedule(rq); | 5531 | post_schedule(rq); |
5474 | 5532 | ||
@@ -5481,7 +5539,7 @@ need_resched_nonpreemptible: | |||
5481 | } | 5539 | } |
5482 | EXPORT_SYMBOL(schedule); | 5540 | EXPORT_SYMBOL(schedule); |
5483 | 5541 | ||
5484 | #ifdef CONFIG_SMP | 5542 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
5485 | /* | 5543 | /* |
5486 | * Look out! "owner" is an entirely speculative pointer | 5544 | * Look out! "owner" is an entirely speculative pointer |
5487 | * access and not reliable. | 5545 | * access and not reliable. |
@@ -5885,14 +5943,15 @@ EXPORT_SYMBOL(wait_for_completion_killable); | |||
5885 | */ | 5943 | */ |
5886 | bool try_wait_for_completion(struct completion *x) | 5944 | bool try_wait_for_completion(struct completion *x) |
5887 | { | 5945 | { |
5946 | unsigned long flags; | ||
5888 | int ret = 1; | 5947 | int ret = 1; |
5889 | 5948 | ||
5890 | spin_lock_irq(&x->wait.lock); | 5949 | spin_lock_irqsave(&x->wait.lock, flags); |
5891 | if (!x->done) | 5950 | if (!x->done) |
5892 | ret = 0; | 5951 | ret = 0; |
5893 | else | 5952 | else |
5894 | x->done--; | 5953 | x->done--; |
5895 | spin_unlock_irq(&x->wait.lock); | 5954 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5896 | return ret; | 5955 | return ret; |
5897 | } | 5956 | } |
5898 | EXPORT_SYMBOL(try_wait_for_completion); | 5957 | EXPORT_SYMBOL(try_wait_for_completion); |
@@ -5907,12 +5966,13 @@ EXPORT_SYMBOL(try_wait_for_completion); | |||
5907 | */ | 5966 | */ |
5908 | bool completion_done(struct completion *x) | 5967 | bool completion_done(struct completion *x) |
5909 | { | 5968 | { |
5969 | unsigned long flags; | ||
5910 | int ret = 1; | 5970 | int ret = 1; |
5911 | 5971 | ||
5912 | spin_lock_irq(&x->wait.lock); | 5972 | spin_lock_irqsave(&x->wait.lock, flags); |
5913 | if (!x->done) | 5973 | if (!x->done) |
5914 | ret = 0; | 5974 | ret = 0; |
5915 | spin_unlock_irq(&x->wait.lock); | 5975 | spin_unlock_irqrestore(&x->wait.lock, flags); |
5916 | return ret; | 5976 | return ret; |
5917 | } | 5977 | } |
5918 | EXPORT_SYMBOL(completion_done); | 5978 | EXPORT_SYMBOL(completion_done); |
@@ -6175,22 +6235,14 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) | |||
6175 | BUG_ON(p->se.on_rq); | 6235 | BUG_ON(p->se.on_rq); |
6176 | 6236 | ||
6177 | p->policy = policy; | 6237 | p->policy = policy; |
6178 | switch (p->policy) { | ||
6179 | case SCHED_NORMAL: | ||
6180 | case SCHED_BATCH: | ||
6181 | case SCHED_IDLE: | ||
6182 | p->sched_class = &fair_sched_class; | ||
6183 | break; | ||
6184 | case SCHED_FIFO: | ||
6185 | case SCHED_RR: | ||
6186 | p->sched_class = &rt_sched_class; | ||
6187 | break; | ||
6188 | } | ||
6189 | |||
6190 | p->rt_priority = prio; | 6238 | p->rt_priority = prio; |
6191 | p->normal_prio = normal_prio(p); | 6239 | p->normal_prio = normal_prio(p); |
6192 | /* we are holding p->pi_lock already */ | 6240 | /* we are holding p->pi_lock already */ |
6193 | p->prio = rt_mutex_getprio(p); | 6241 | p->prio = rt_mutex_getprio(p); |
6242 | if (rt_prio(p->prio)) | ||
6243 | p->sched_class = &rt_sched_class; | ||
6244 | else | ||
6245 | p->sched_class = &fair_sched_class; | ||
6194 | set_load_weight(p); | 6246 | set_load_weight(p); |
6195 | } | 6247 | } |
6196 | 6248 | ||
@@ -6305,7 +6357,7 @@ recheck: | |||
6305 | * make sure no PI-waiters arrive (or leave) while we are | 6357 | * make sure no PI-waiters arrive (or leave) while we are |
6306 | * changing the priority of the task: | 6358 | * changing the priority of the task: |
6307 | */ | 6359 | */ |
6308 | spin_lock_irqsave(&p->pi_lock, flags); | 6360 | raw_spin_lock_irqsave(&p->pi_lock, flags); |
6309 | /* | 6361 | /* |
6310 | * To be able to change p->policy safely, the apropriate | 6362 | * To be able to change p->policy safely, the apropriate |
6311 | * runqueue lock must be held. | 6363 | * runqueue lock must be held. |
@@ -6315,7 +6367,7 @@ recheck: | |||
6315 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { | 6367 | if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) { |
6316 | policy = oldpolicy = -1; | 6368 | policy = oldpolicy = -1; |
6317 | __task_rq_unlock(rq); | 6369 | __task_rq_unlock(rq); |
6318 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6370 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6319 | goto recheck; | 6371 | goto recheck; |
6320 | } | 6372 | } |
6321 | update_rq_clock(rq); | 6373 | update_rq_clock(rq); |
@@ -6339,7 +6391,7 @@ recheck: | |||
6339 | check_class_changed(rq, p, prev_class, oldprio, running); | 6391 | check_class_changed(rq, p, prev_class, oldprio, running); |
6340 | } | 6392 | } |
6341 | __task_rq_unlock(rq); | 6393 | __task_rq_unlock(rq); |
6342 | spin_unlock_irqrestore(&p->pi_lock, flags); | 6394 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); |
6343 | 6395 | ||
6344 | rt_mutex_adjust_pi(p); | 6396 | rt_mutex_adjust_pi(p); |
6345 | 6397 | ||
@@ -6439,7 +6491,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6439 | return -EINVAL; | 6491 | return -EINVAL; |
6440 | 6492 | ||
6441 | retval = -ESRCH; | 6493 | retval = -ESRCH; |
6442 | read_lock(&tasklist_lock); | 6494 | rcu_read_lock(); |
6443 | p = find_process_by_pid(pid); | 6495 | p = find_process_by_pid(pid); |
6444 | if (p) { | 6496 | if (p) { |
6445 | retval = security_task_getscheduler(p); | 6497 | retval = security_task_getscheduler(p); |
@@ -6447,7 +6499,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) | |||
6447 | retval = p->policy | 6499 | retval = p->policy |
6448 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); | 6500 | | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0); |
6449 | } | 6501 | } |
6450 | read_unlock(&tasklist_lock); | 6502 | rcu_read_unlock(); |
6451 | return retval; | 6503 | return retval; |
6452 | } | 6504 | } |
6453 | 6505 | ||
@@ -6465,7 +6517,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6465 | if (!param || pid < 0) | 6517 | if (!param || pid < 0) |
6466 | return -EINVAL; | 6518 | return -EINVAL; |
6467 | 6519 | ||
6468 | read_lock(&tasklist_lock); | 6520 | rcu_read_lock(); |
6469 | p = find_process_by_pid(pid); | 6521 | p = find_process_by_pid(pid); |
6470 | retval = -ESRCH; | 6522 | retval = -ESRCH; |
6471 | if (!p) | 6523 | if (!p) |
@@ -6476,7 +6528,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6476 | goto out_unlock; | 6528 | goto out_unlock; |
6477 | 6529 | ||
6478 | lp.sched_priority = p->rt_priority; | 6530 | lp.sched_priority = p->rt_priority; |
6479 | read_unlock(&tasklist_lock); | 6531 | rcu_read_unlock(); |
6480 | 6532 | ||
6481 | /* | 6533 | /* |
6482 | * This one might sleep, we cannot do it with a spinlock held ... | 6534 | * This one might sleep, we cannot do it with a spinlock held ... |
@@ -6486,7 +6538,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) | |||
6486 | return retval; | 6538 | return retval; |
6487 | 6539 | ||
6488 | out_unlock: | 6540 | out_unlock: |
6489 | read_unlock(&tasklist_lock); | 6541 | rcu_read_unlock(); |
6490 | return retval; | 6542 | return retval; |
6491 | } | 6543 | } |
6492 | 6544 | ||
@@ -6497,22 +6549,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
6497 | int retval; | 6549 | int retval; |
6498 | 6550 | ||
6499 | get_online_cpus(); | 6551 | get_online_cpus(); |
6500 | read_lock(&tasklist_lock); | 6552 | rcu_read_lock(); |
6501 | 6553 | ||
6502 | p = find_process_by_pid(pid); | 6554 | p = find_process_by_pid(pid); |
6503 | if (!p) { | 6555 | if (!p) { |
6504 | read_unlock(&tasklist_lock); | 6556 | rcu_read_unlock(); |
6505 | put_online_cpus(); | 6557 | put_online_cpus(); |
6506 | return -ESRCH; | 6558 | return -ESRCH; |
6507 | } | 6559 | } |
6508 | 6560 | ||
6509 | /* | 6561 | /* Prevent p going away */ |
6510 | * It is not safe to call set_cpus_allowed with the | ||
6511 | * tasklist_lock held. We will bump the task_struct's | ||
6512 | * usage count and then drop tasklist_lock. | ||
6513 | */ | ||
6514 | get_task_struct(p); | 6562 | get_task_struct(p); |
6515 | read_unlock(&tasklist_lock); | 6563 | rcu_read_unlock(); |
6516 | 6564 | ||
6517 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { | 6565 | if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) { |
6518 | retval = -ENOMEM; | 6566 | retval = -ENOMEM; |
@@ -6593,10 +6641,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, | |||
6593 | long sched_getaffinity(pid_t pid, struct cpumask *mask) | 6641 | long sched_getaffinity(pid_t pid, struct cpumask *mask) |
6594 | { | 6642 | { |
6595 | struct task_struct *p; | 6643 | struct task_struct *p; |
6644 | unsigned long flags; | ||
6645 | struct rq *rq; | ||
6596 | int retval; | 6646 | int retval; |
6597 | 6647 | ||
6598 | get_online_cpus(); | 6648 | get_online_cpus(); |
6599 | read_lock(&tasklist_lock); | 6649 | rcu_read_lock(); |
6600 | 6650 | ||
6601 | retval = -ESRCH; | 6651 | retval = -ESRCH; |
6602 | p = find_process_by_pid(pid); | 6652 | p = find_process_by_pid(pid); |
@@ -6607,10 +6657,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask) | |||
6607 | if (retval) | 6657 | if (retval) |
6608 | goto out_unlock; | 6658 | goto out_unlock; |
6609 | 6659 | ||
6660 | rq = task_rq_lock(p, &flags); | ||
6610 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); | 6661 | cpumask_and(mask, &p->cpus_allowed, cpu_online_mask); |
6662 | task_rq_unlock(rq, &flags); | ||
6611 | 6663 | ||
6612 | out_unlock: | 6664 | out_unlock: |
6613 | read_unlock(&tasklist_lock); | 6665 | rcu_read_unlock(); |
6614 | put_online_cpus(); | 6666 | put_online_cpus(); |
6615 | 6667 | ||
6616 | return retval; | 6668 | return retval; |
@@ -6665,7 +6717,7 @@ SYSCALL_DEFINE0(sched_yield) | |||
6665 | */ | 6717 | */ |
6666 | __release(rq->lock); | 6718 | __release(rq->lock); |
6667 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); | 6719 | spin_release(&rq->lock.dep_map, 1, _THIS_IP_); |
6668 | _raw_spin_unlock(&rq->lock); | 6720 | do_raw_spin_unlock(&rq->lock); |
6669 | preempt_enable_no_resched(); | 6721 | preempt_enable_no_resched(); |
6670 | 6722 | ||
6671 | schedule(); | 6723 | schedule(); |
@@ -6845,6 +6897,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6845 | { | 6897 | { |
6846 | struct task_struct *p; | 6898 | struct task_struct *p; |
6847 | unsigned int time_slice; | 6899 | unsigned int time_slice; |
6900 | unsigned long flags; | ||
6901 | struct rq *rq; | ||
6848 | int retval; | 6902 | int retval; |
6849 | struct timespec t; | 6903 | struct timespec t; |
6850 | 6904 | ||
@@ -6852,7 +6906,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6852 | return -EINVAL; | 6906 | return -EINVAL; |
6853 | 6907 | ||
6854 | retval = -ESRCH; | 6908 | retval = -ESRCH; |
6855 | read_lock(&tasklist_lock); | 6909 | rcu_read_lock(); |
6856 | p = find_process_by_pid(pid); | 6910 | p = find_process_by_pid(pid); |
6857 | if (!p) | 6911 | if (!p) |
6858 | goto out_unlock; | 6912 | goto out_unlock; |
@@ -6861,15 +6915,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
6861 | if (retval) | 6915 | if (retval) |
6862 | goto out_unlock; | 6916 | goto out_unlock; |
6863 | 6917 | ||
6864 | time_slice = p->sched_class->get_rr_interval(p); | 6918 | rq = task_rq_lock(p, &flags); |
6919 | time_slice = p->sched_class->get_rr_interval(rq, p); | ||
6920 | task_rq_unlock(rq, &flags); | ||
6865 | 6921 | ||
6866 | read_unlock(&tasklist_lock); | 6922 | rcu_read_unlock(); |
6867 | jiffies_to_timespec(time_slice, &t); | 6923 | jiffies_to_timespec(time_slice, &t); |
6868 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 6924 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
6869 | return retval; | 6925 | return retval; |
6870 | 6926 | ||
6871 | out_unlock: | 6927 | out_unlock: |
6872 | read_unlock(&tasklist_lock); | 6928 | rcu_read_unlock(); |
6873 | return retval; | 6929 | return retval; |
6874 | } | 6930 | } |
6875 | 6931 | ||
@@ -6935,7 +6991,7 @@ void show_state_filter(unsigned long state_filter) | |||
6935 | /* | 6991 | /* |
6936 | * Only show locks if all tasks are dumped: | 6992 | * Only show locks if all tasks are dumped: |
6937 | */ | 6993 | */ |
6938 | if (state_filter == -1) | 6994 | if (!state_filter) |
6939 | debug_show_all_locks(); | 6995 | debug_show_all_locks(); |
6940 | } | 6996 | } |
6941 | 6997 | ||
@@ -6957,12 +7013,12 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6957 | struct rq *rq = cpu_rq(cpu); | 7013 | struct rq *rq = cpu_rq(cpu); |
6958 | unsigned long flags; | 7014 | unsigned long flags; |
6959 | 7015 | ||
6960 | spin_lock_irqsave(&rq->lock, flags); | 7016 | raw_spin_lock_irqsave(&rq->lock, flags); |
6961 | 7017 | ||
6962 | __sched_fork(idle); | 7018 | __sched_fork(idle); |
7019 | idle->state = TASK_RUNNING; | ||
6963 | idle->se.exec_start = sched_clock(); | 7020 | idle->se.exec_start = sched_clock(); |
6964 | 7021 | ||
6965 | idle->prio = idle->normal_prio = MAX_PRIO; | ||
6966 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); | 7022 | cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); |
6967 | __set_task_cpu(idle, cpu); | 7023 | __set_task_cpu(idle, cpu); |
6968 | 7024 | ||
@@ -6970,7 +7026,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) | |||
6970 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) | 7026 | #if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) |
6971 | idle->oncpu = 1; | 7027 | idle->oncpu = 1; |
6972 | #endif | 7028 | #endif |
6973 | spin_unlock_irqrestore(&rq->lock, flags); | 7029 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
6974 | 7030 | ||
6975 | /* Set the preempt count _outside_ the spinlocks! */ | 7031 | /* Set the preempt count _outside_ the spinlocks! */ |
6976 | #if defined(CONFIG_PREEMPT) | 7032 | #if defined(CONFIG_PREEMPT) |
@@ -7003,22 +7059,43 @@ cpumask_var_t nohz_cpu_mask; | |||
7003 | * | 7059 | * |
7004 | * This idea comes from the SD scheduler of Con Kolivas: | 7060 | * This idea comes from the SD scheduler of Con Kolivas: |
7005 | */ | 7061 | */ |
7006 | static inline void sched_init_granularity(void) | 7062 | static int get_update_sysctl_factor(void) |
7007 | { | 7063 | { |
7008 | unsigned int factor = 1 + ilog2(num_online_cpus()); | 7064 | unsigned int cpus = min_t(int, num_online_cpus(), 8); |
7009 | const unsigned long limit = 200000000; | 7065 | unsigned int factor; |
7066 | |||
7067 | switch (sysctl_sched_tunable_scaling) { | ||
7068 | case SCHED_TUNABLESCALING_NONE: | ||
7069 | factor = 1; | ||
7070 | break; | ||
7071 | case SCHED_TUNABLESCALING_LINEAR: | ||
7072 | factor = cpus; | ||
7073 | break; | ||
7074 | case SCHED_TUNABLESCALING_LOG: | ||
7075 | default: | ||
7076 | factor = 1 + ilog2(cpus); | ||
7077 | break; | ||
7078 | } | ||
7010 | 7079 | ||
7011 | sysctl_sched_min_granularity *= factor; | 7080 | return factor; |
7012 | if (sysctl_sched_min_granularity > limit) | 7081 | } |
7013 | sysctl_sched_min_granularity = limit; | ||
7014 | 7082 | ||
7015 | sysctl_sched_latency *= factor; | 7083 | static void update_sysctl(void) |
7016 | if (sysctl_sched_latency > limit) | 7084 | { |
7017 | sysctl_sched_latency = limit; | 7085 | unsigned int factor = get_update_sysctl_factor(); |
7018 | 7086 | ||
7019 | sysctl_sched_wakeup_granularity *= factor; | 7087 | #define SET_SYSCTL(name) \ |
7088 | (sysctl_##name = (factor) * normalized_sysctl_##name) | ||
7089 | SET_SYSCTL(sched_min_granularity); | ||
7090 | SET_SYSCTL(sched_latency); | ||
7091 | SET_SYSCTL(sched_wakeup_granularity); | ||
7092 | SET_SYSCTL(sched_shares_ratelimit); | ||
7093 | #undef SET_SYSCTL | ||
7094 | } | ||
7020 | 7095 | ||
7021 | sysctl_sched_shares_ratelimit *= factor; | 7096 | static inline void sched_init_granularity(void) |
7097 | { | ||
7098 | update_sysctl(); | ||
7022 | } | 7099 | } |
7023 | 7100 | ||
7024 | #ifdef CONFIG_SMP | 7101 | #ifdef CONFIG_SMP |
@@ -7054,8 +7131,24 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7054 | struct rq *rq; | 7131 | struct rq *rq; |
7055 | int ret = 0; | 7132 | int ret = 0; |
7056 | 7133 | ||
7134 | /* | ||
7135 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
7136 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
7137 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
7138 | * TASK_WAKING to avoid that. | ||
7139 | */ | ||
7140 | again: | ||
7141 | while (p->state == TASK_WAKING) | ||
7142 | cpu_relax(); | ||
7143 | |||
7057 | rq = task_rq_lock(p, &flags); | 7144 | rq = task_rq_lock(p, &flags); |
7058 | if (!cpumask_intersects(new_mask, cpu_online_mask)) { | 7145 | |
7146 | if (p->state == TASK_WAKING) { | ||
7147 | task_rq_unlock(rq, &flags); | ||
7148 | goto again; | ||
7149 | } | ||
7150 | |||
7151 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | ||
7059 | ret = -EINVAL; | 7152 | ret = -EINVAL; |
7060 | goto out; | 7153 | goto out; |
7061 | } | 7154 | } |
@@ -7077,7 +7170,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
7077 | if (cpumask_test_cpu(task_cpu(p), new_mask)) | 7170 | if (cpumask_test_cpu(task_cpu(p), new_mask)) |
7078 | goto out; | 7171 | goto out; |
7079 | 7172 | ||
7080 | if (migrate_task(p, cpumask_any_and(cpu_online_mask, new_mask), &req)) { | 7173 | if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) { |
7081 | /* Need help from migration thread: drop lock and wait. */ | 7174 | /* Need help from migration thread: drop lock and wait. */ |
7082 | struct task_struct *mt = rq->migration_thread; | 7175 | struct task_struct *mt = rq->migration_thread; |
7083 | 7176 | ||
@@ -7110,7 +7203,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr); | |||
7110 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | 7203 | static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) |
7111 | { | 7204 | { |
7112 | struct rq *rq_dest, *rq_src; | 7205 | struct rq *rq_dest, *rq_src; |
7113 | int ret = 0, on_rq; | 7206 | int ret = 0; |
7114 | 7207 | ||
7115 | if (unlikely(!cpu_active(dest_cpu))) | 7208 | if (unlikely(!cpu_active(dest_cpu))) |
7116 | return ret; | 7209 | return ret; |
@@ -7126,12 +7219,13 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7126 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | 7219 | if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) |
7127 | goto fail; | 7220 | goto fail; |
7128 | 7221 | ||
7129 | on_rq = p->se.on_rq; | 7222 | /* |
7130 | if (on_rq) | 7223 | * If we're not on a rq, the next wake-up will ensure we're |
7224 | * placed properly. | ||
7225 | */ | ||
7226 | if (p->se.on_rq) { | ||
7131 | deactivate_task(rq_src, p, 0); | 7227 | deactivate_task(rq_src, p, 0); |
7132 | 7228 | set_task_cpu(p, dest_cpu); | |
7133 | set_task_cpu(p, dest_cpu); | ||
7134 | if (on_rq) { | ||
7135 | activate_task(rq_dest, p, 0); | 7229 | activate_task(rq_dest, p, 0); |
7136 | check_preempt_curr(rq_dest, p, 0); | 7230 | check_preempt_curr(rq_dest, p, 0); |
7137 | } | 7231 | } |
@@ -7166,10 +7260,10 @@ static int migration_thread(void *data) | |||
7166 | struct migration_req *req; | 7260 | struct migration_req *req; |
7167 | struct list_head *head; | 7261 | struct list_head *head; |
7168 | 7262 | ||
7169 | spin_lock_irq(&rq->lock); | 7263 | raw_spin_lock_irq(&rq->lock); |
7170 | 7264 | ||
7171 | if (cpu_is_offline(cpu)) { | 7265 | if (cpu_is_offline(cpu)) { |
7172 | spin_unlock_irq(&rq->lock); | 7266 | raw_spin_unlock_irq(&rq->lock); |
7173 | break; | 7267 | break; |
7174 | } | 7268 | } |
7175 | 7269 | ||
@@ -7181,7 +7275,7 @@ static int migration_thread(void *data) | |||
7181 | head = &rq->migration_queue; | 7275 | head = &rq->migration_queue; |
7182 | 7276 | ||
7183 | if (list_empty(head)) { | 7277 | if (list_empty(head)) { |
7184 | spin_unlock_irq(&rq->lock); | 7278 | raw_spin_unlock_irq(&rq->lock); |
7185 | schedule(); | 7279 | schedule(); |
7186 | set_current_state(TASK_INTERRUPTIBLE); | 7280 | set_current_state(TASK_INTERRUPTIBLE); |
7187 | continue; | 7281 | continue; |
@@ -7190,14 +7284,14 @@ static int migration_thread(void *data) | |||
7190 | list_del_init(head->next); | 7284 | list_del_init(head->next); |
7191 | 7285 | ||
7192 | if (req->task != NULL) { | 7286 | if (req->task != NULL) { |
7193 | spin_unlock(&rq->lock); | 7287 | raw_spin_unlock(&rq->lock); |
7194 | __migrate_task(req->task, cpu, req->dest_cpu); | 7288 | __migrate_task(req->task, cpu, req->dest_cpu); |
7195 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { | 7289 | } else if (likely(cpu == (badcpu = smp_processor_id()))) { |
7196 | req->dest_cpu = RCU_MIGRATION_GOT_QS; | 7290 | req->dest_cpu = RCU_MIGRATION_GOT_QS; |
7197 | spin_unlock(&rq->lock); | 7291 | raw_spin_unlock(&rq->lock); |
7198 | } else { | 7292 | } else { |
7199 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; | 7293 | req->dest_cpu = RCU_MIGRATION_MUST_SYNC; |
7200 | spin_unlock(&rq->lock); | 7294 | raw_spin_unlock(&rq->lock); |
7201 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); | 7295 | WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu); |
7202 | } | 7296 | } |
7203 | local_irq_enable(); | 7297 | local_irq_enable(); |
@@ -7227,37 +7321,10 @@ static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu) | |||
7227 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) | 7321 | static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p) |
7228 | { | 7322 | { |
7229 | int dest_cpu; | 7323 | int dest_cpu; |
7230 | const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu)); | ||
7231 | 7324 | ||
7232 | again: | 7325 | again: |
7233 | /* Look for allowed, online CPU in same node. */ | 7326 | dest_cpu = select_fallback_rq(dead_cpu, p); |
7234 | for_each_cpu_and(dest_cpu, nodemask, cpu_online_mask) | ||
7235 | if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed)) | ||
7236 | goto move; | ||
7237 | |||
7238 | /* Any allowed, online CPU? */ | ||
7239 | dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_online_mask); | ||
7240 | if (dest_cpu < nr_cpu_ids) | ||
7241 | goto move; | ||
7242 | |||
7243 | /* No more Mr. Nice Guy. */ | ||
7244 | if (dest_cpu >= nr_cpu_ids) { | ||
7245 | cpuset_cpus_allowed_locked(p, &p->cpus_allowed); | ||
7246 | dest_cpu = cpumask_any_and(cpu_online_mask, &p->cpus_allowed); | ||
7247 | 7327 | ||
7248 | /* | ||
7249 | * Don't tell them about moving exiting tasks or | ||
7250 | * kernel threads (both mm NULL), since they never | ||
7251 | * leave kernel. | ||
7252 | */ | ||
7253 | if (p->mm && printk_ratelimit()) { | ||
7254 | printk(KERN_INFO "process %d (%s) no " | ||
7255 | "longer affine to cpu%d\n", | ||
7256 | task_pid_nr(p), p->comm, dead_cpu); | ||
7257 | } | ||
7258 | } | ||
7259 | |||
7260 | move: | ||
7261 | /* It can have affinity changed while we were choosing. */ | 7328 | /* It can have affinity changed while we were choosing. */ |
7262 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) | 7329 | if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu))) |
7263 | goto again; | 7330 | goto again; |
@@ -7272,7 +7339,7 @@ move: | |||
7272 | */ | 7339 | */ |
7273 | static void migrate_nr_uninterruptible(struct rq *rq_src) | 7340 | static void migrate_nr_uninterruptible(struct rq *rq_src) |
7274 | { | 7341 | { |
7275 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_online_mask)); | 7342 | struct rq *rq_dest = cpu_rq(cpumask_any(cpu_active_mask)); |
7276 | unsigned long flags; | 7343 | unsigned long flags; |
7277 | 7344 | ||
7278 | local_irq_save(flags); | 7345 | local_irq_save(flags); |
@@ -7320,14 +7387,14 @@ void sched_idle_next(void) | |||
7320 | * Strictly not necessary since rest of the CPUs are stopped by now | 7387 | * Strictly not necessary since rest of the CPUs are stopped by now |
7321 | * and interrupts disabled on the current cpu. | 7388 | * and interrupts disabled on the current cpu. |
7322 | */ | 7389 | */ |
7323 | spin_lock_irqsave(&rq->lock, flags); | 7390 | raw_spin_lock_irqsave(&rq->lock, flags); |
7324 | 7391 | ||
7325 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); | 7392 | __setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1); |
7326 | 7393 | ||
7327 | update_rq_clock(rq); | 7394 | update_rq_clock(rq); |
7328 | activate_task(rq, p, 0); | 7395 | activate_task(rq, p, 0); |
7329 | 7396 | ||
7330 | spin_unlock_irqrestore(&rq->lock, flags); | 7397 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7331 | } | 7398 | } |
7332 | 7399 | ||
7333 | /* | 7400 | /* |
@@ -7363,9 +7430,9 @@ static void migrate_dead(unsigned int dead_cpu, struct task_struct *p) | |||
7363 | * that's OK. No task can be added to this CPU, so iteration is | 7430 | * that's OK. No task can be added to this CPU, so iteration is |
7364 | * fine. | 7431 | * fine. |
7365 | */ | 7432 | */ |
7366 | spin_unlock_irq(&rq->lock); | 7433 | raw_spin_unlock_irq(&rq->lock); |
7367 | move_task_off_dead_cpu(dead_cpu, p); | 7434 | move_task_off_dead_cpu(dead_cpu, p); |
7368 | spin_lock_irq(&rq->lock); | 7435 | raw_spin_lock_irq(&rq->lock); |
7369 | 7436 | ||
7370 | put_task_struct(p); | 7437 | put_task_struct(p); |
7371 | } | 7438 | } |
@@ -7406,17 +7473,16 @@ static struct ctl_table sd_ctl_dir[] = { | |||
7406 | .procname = "sched_domain", | 7473 | .procname = "sched_domain", |
7407 | .mode = 0555, | 7474 | .mode = 0555, |
7408 | }, | 7475 | }, |
7409 | {0, }, | 7476 | {} |
7410 | }; | 7477 | }; |
7411 | 7478 | ||
7412 | static struct ctl_table sd_ctl_root[] = { | 7479 | static struct ctl_table sd_ctl_root[] = { |
7413 | { | 7480 | { |
7414 | .ctl_name = CTL_KERN, | ||
7415 | .procname = "kernel", | 7481 | .procname = "kernel", |
7416 | .mode = 0555, | 7482 | .mode = 0555, |
7417 | .child = sd_ctl_dir, | 7483 | .child = sd_ctl_dir, |
7418 | }, | 7484 | }, |
7419 | {0, }, | 7485 | {} |
7420 | }; | 7486 | }; |
7421 | 7487 | ||
7422 | static struct ctl_table *sd_alloc_ctl_entry(int n) | 7488 | static struct ctl_table *sd_alloc_ctl_entry(int n) |
@@ -7526,7 +7592,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) | |||
7526 | static struct ctl_table_header *sd_sysctl_header; | 7592 | static struct ctl_table_header *sd_sysctl_header; |
7527 | static void register_sched_domain_sysctl(void) | 7593 | static void register_sched_domain_sysctl(void) |
7528 | { | 7594 | { |
7529 | int i, cpu_num = num_online_cpus(); | 7595 | int i, cpu_num = num_possible_cpus(); |
7530 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); | 7596 | struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1); |
7531 | char buf[32]; | 7597 | char buf[32]; |
7532 | 7598 | ||
@@ -7536,7 +7602,7 @@ static void register_sched_domain_sysctl(void) | |||
7536 | if (entry == NULL) | 7602 | if (entry == NULL) |
7537 | return; | 7603 | return; |
7538 | 7604 | ||
7539 | for_each_online_cpu(i) { | 7605 | for_each_possible_cpu(i) { |
7540 | snprintf(buf, 32, "cpu%d", i); | 7606 | snprintf(buf, 32, "cpu%d", i); |
7541 | entry->procname = kstrdup(buf, GFP_KERNEL); | 7607 | entry->procname = kstrdup(buf, GFP_KERNEL); |
7542 | entry->mode = 0555; | 7608 | entry->mode = 0555; |
@@ -7632,13 +7698,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7632 | 7698 | ||
7633 | /* Update our root-domain */ | 7699 | /* Update our root-domain */ |
7634 | rq = cpu_rq(cpu); | 7700 | rq = cpu_rq(cpu); |
7635 | spin_lock_irqsave(&rq->lock, flags); | 7701 | raw_spin_lock_irqsave(&rq->lock, flags); |
7636 | if (rq->rd) { | 7702 | if (rq->rd) { |
7637 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7703 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7638 | 7704 | ||
7639 | set_rq_online(rq); | 7705 | set_rq_online(rq); |
7640 | } | 7706 | } |
7641 | spin_unlock_irqrestore(&rq->lock, flags); | 7707 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7642 | break; | 7708 | break; |
7643 | 7709 | ||
7644 | #ifdef CONFIG_HOTPLUG_CPU | 7710 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -7663,14 +7729,13 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7663 | put_task_struct(rq->migration_thread); | 7729 | put_task_struct(rq->migration_thread); |
7664 | rq->migration_thread = NULL; | 7730 | rq->migration_thread = NULL; |
7665 | /* Idle task back to normal (off runqueue, low prio) */ | 7731 | /* Idle task back to normal (off runqueue, low prio) */ |
7666 | spin_lock_irq(&rq->lock); | 7732 | raw_spin_lock_irq(&rq->lock); |
7667 | update_rq_clock(rq); | 7733 | update_rq_clock(rq); |
7668 | deactivate_task(rq, rq->idle, 0); | 7734 | deactivate_task(rq, rq->idle, 0); |
7669 | rq->idle->static_prio = MAX_PRIO; | ||
7670 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); | 7735 | __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); |
7671 | rq->idle->sched_class = &idle_sched_class; | 7736 | rq->idle->sched_class = &idle_sched_class; |
7672 | migrate_dead_tasks(cpu); | 7737 | migrate_dead_tasks(cpu); |
7673 | spin_unlock_irq(&rq->lock); | 7738 | raw_spin_unlock_irq(&rq->lock); |
7674 | cpuset_unlock(); | 7739 | cpuset_unlock(); |
7675 | migrate_nr_uninterruptible(rq); | 7740 | migrate_nr_uninterruptible(rq); |
7676 | BUG_ON(rq->nr_running != 0); | 7741 | BUG_ON(rq->nr_running != 0); |
@@ -7680,30 +7745,30 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7680 | * they didn't take sched_hotcpu_mutex. Just wake up | 7745 | * they didn't take sched_hotcpu_mutex. Just wake up |
7681 | * the requestors. | 7746 | * the requestors. |
7682 | */ | 7747 | */ |
7683 | spin_lock_irq(&rq->lock); | 7748 | raw_spin_lock_irq(&rq->lock); |
7684 | while (!list_empty(&rq->migration_queue)) { | 7749 | while (!list_empty(&rq->migration_queue)) { |
7685 | struct migration_req *req; | 7750 | struct migration_req *req; |
7686 | 7751 | ||
7687 | req = list_entry(rq->migration_queue.next, | 7752 | req = list_entry(rq->migration_queue.next, |
7688 | struct migration_req, list); | 7753 | struct migration_req, list); |
7689 | list_del_init(&req->list); | 7754 | list_del_init(&req->list); |
7690 | spin_unlock_irq(&rq->lock); | 7755 | raw_spin_unlock_irq(&rq->lock); |
7691 | complete(&req->done); | 7756 | complete(&req->done); |
7692 | spin_lock_irq(&rq->lock); | 7757 | raw_spin_lock_irq(&rq->lock); |
7693 | } | 7758 | } |
7694 | spin_unlock_irq(&rq->lock); | 7759 | raw_spin_unlock_irq(&rq->lock); |
7695 | break; | 7760 | break; |
7696 | 7761 | ||
7697 | case CPU_DYING: | 7762 | case CPU_DYING: |
7698 | case CPU_DYING_FROZEN: | 7763 | case CPU_DYING_FROZEN: |
7699 | /* Update our root-domain */ | 7764 | /* Update our root-domain */ |
7700 | rq = cpu_rq(cpu); | 7765 | rq = cpu_rq(cpu); |
7701 | spin_lock_irqsave(&rq->lock, flags); | 7766 | raw_spin_lock_irqsave(&rq->lock, flags); |
7702 | if (rq->rd) { | 7767 | if (rq->rd) { |
7703 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7768 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7704 | set_rq_offline(rq); | 7769 | set_rq_offline(rq); |
7705 | } | 7770 | } |
7706 | spin_unlock_irqrestore(&rq->lock, flags); | 7771 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7707 | break; | 7772 | break; |
7708 | #endif | 7773 | #endif |
7709 | } | 7774 | } |
@@ -7740,6 +7805,16 @@ early_initcall(migration_init); | |||
7740 | 7805 | ||
7741 | #ifdef CONFIG_SCHED_DEBUG | 7806 | #ifdef CONFIG_SCHED_DEBUG |
7742 | 7807 | ||
7808 | static __read_mostly int sched_domain_debug_enabled; | ||
7809 | |||
7810 | static int __init sched_domain_debug_setup(char *str) | ||
7811 | { | ||
7812 | sched_domain_debug_enabled = 1; | ||
7813 | |||
7814 | return 0; | ||
7815 | } | ||
7816 | early_param("sched_debug", sched_domain_debug_setup); | ||
7817 | |||
7743 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | 7818 | static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, |
7744 | struct cpumask *groupmask) | 7819 | struct cpumask *groupmask) |
7745 | { | 7820 | { |
@@ -7826,6 +7901,9 @@ static void sched_domain_debug(struct sched_domain *sd, int cpu) | |||
7826 | cpumask_var_t groupmask; | 7901 | cpumask_var_t groupmask; |
7827 | int level = 0; | 7902 | int level = 0; |
7828 | 7903 | ||
7904 | if (!sched_domain_debug_enabled) | ||
7905 | return; | ||
7906 | |||
7829 | if (!sd) { | 7907 | if (!sd) { |
7830 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); | 7908 | printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu); |
7831 | return; | 7909 | return; |
@@ -7905,6 +7983,8 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
7905 | 7983 | ||
7906 | static void free_rootdomain(struct root_domain *rd) | 7984 | static void free_rootdomain(struct root_domain *rd) |
7907 | { | 7985 | { |
7986 | synchronize_sched(); | ||
7987 | |||
7908 | cpupri_cleanup(&rd->cpupri); | 7988 | cpupri_cleanup(&rd->cpupri); |
7909 | 7989 | ||
7910 | free_cpumask_var(rd->rto_mask); | 7990 | free_cpumask_var(rd->rto_mask); |
@@ -7918,7 +7998,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7918 | struct root_domain *old_rd = NULL; | 7998 | struct root_domain *old_rd = NULL; |
7919 | unsigned long flags; | 7999 | unsigned long flags; |
7920 | 8000 | ||
7921 | spin_lock_irqsave(&rq->lock, flags); | 8001 | raw_spin_lock_irqsave(&rq->lock, flags); |
7922 | 8002 | ||
7923 | if (rq->rd) { | 8003 | if (rq->rd) { |
7924 | old_rd = rq->rd; | 8004 | old_rd = rq->rd; |
@@ -7944,7 +8024,7 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7944 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) | 8024 | if (cpumask_test_cpu(rq->cpu, cpu_active_mask)) |
7945 | set_rq_online(rq); | 8025 | set_rq_online(rq); |
7946 | 8026 | ||
7947 | spin_unlock_irqrestore(&rq->lock, flags); | 8027 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
7948 | 8028 | ||
7949 | if (old_rd) | 8029 | if (old_rd) |
7950 | free_rootdomain(old_rd); | 8030 | free_rootdomain(old_rd); |
@@ -8045,6 +8125,7 @@ static cpumask_var_t cpu_isolated_map; | |||
8045 | /* Setup the mask of cpus configured for isolated domains */ | 8125 | /* Setup the mask of cpus configured for isolated domains */ |
8046 | static int __init isolated_cpu_setup(char *str) | 8126 | static int __init isolated_cpu_setup(char *str) |
8047 | { | 8127 | { |
8128 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | ||
8048 | cpulist_parse(str, cpu_isolated_map); | 8129 | cpulist_parse(str, cpu_isolated_map); |
8049 | return 1; | 8130 | return 1; |
8050 | } | 8131 | } |
@@ -8229,14 +8310,14 @@ enum s_alloc { | |||
8229 | */ | 8310 | */ |
8230 | #ifdef CONFIG_SCHED_SMT | 8311 | #ifdef CONFIG_SCHED_SMT |
8231 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); | 8312 | static DEFINE_PER_CPU(struct static_sched_domain, cpu_domains); |
8232 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_cpus); | 8313 | static DEFINE_PER_CPU(struct static_sched_group, sched_groups); |
8233 | 8314 | ||
8234 | static int | 8315 | static int |
8235 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | 8316 | cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, |
8236 | struct sched_group **sg, struct cpumask *unused) | 8317 | struct sched_group **sg, struct cpumask *unused) |
8237 | { | 8318 | { |
8238 | if (sg) | 8319 | if (sg) |
8239 | *sg = &per_cpu(sched_group_cpus, cpu).sg; | 8320 | *sg = &per_cpu(sched_groups, cpu).sg; |
8240 | return cpu; | 8321 | return cpu; |
8241 | } | 8322 | } |
8242 | #endif /* CONFIG_SCHED_SMT */ | 8323 | #endif /* CONFIG_SCHED_SMT */ |
@@ -8881,7 +8962,7 @@ static int build_sched_domains(const struct cpumask *cpu_map) | |||
8881 | return __build_sched_domains(cpu_map, NULL); | 8962 | return __build_sched_domains(cpu_map, NULL); |
8882 | } | 8963 | } |
8883 | 8964 | ||
8884 | static struct cpumask *doms_cur; /* current sched domains */ | 8965 | static cpumask_var_t *doms_cur; /* current sched domains */ |
8885 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ | 8966 | static int ndoms_cur; /* number of sched domains in 'doms_cur' */ |
8886 | static struct sched_domain_attr *dattr_cur; | 8967 | static struct sched_domain_attr *dattr_cur; |
8887 | /* attribues of custom domains in 'doms_cur' */ | 8968 | /* attribues of custom domains in 'doms_cur' */ |
@@ -8903,6 +8984,31 @@ int __attribute__((weak)) arch_update_cpu_topology(void) | |||
8903 | return 0; | 8984 | return 0; |
8904 | } | 8985 | } |
8905 | 8986 | ||
8987 | cpumask_var_t *alloc_sched_domains(unsigned int ndoms) | ||
8988 | { | ||
8989 | int i; | ||
8990 | cpumask_var_t *doms; | ||
8991 | |||
8992 | doms = kmalloc(sizeof(*doms) * ndoms, GFP_KERNEL); | ||
8993 | if (!doms) | ||
8994 | return NULL; | ||
8995 | for (i = 0; i < ndoms; i++) { | ||
8996 | if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) { | ||
8997 | free_sched_domains(doms, i); | ||
8998 | return NULL; | ||
8999 | } | ||
9000 | } | ||
9001 | return doms; | ||
9002 | } | ||
9003 | |||
9004 | void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms) | ||
9005 | { | ||
9006 | unsigned int i; | ||
9007 | for (i = 0; i < ndoms; i++) | ||
9008 | free_cpumask_var(doms[i]); | ||
9009 | kfree(doms); | ||
9010 | } | ||
9011 | |||
8906 | /* | 9012 | /* |
8907 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | 9013 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. |
8908 | * For now this just excludes isolated cpus, but could be used to | 9014 | * For now this just excludes isolated cpus, but could be used to |
@@ -8914,12 +9020,12 @@ static int arch_init_sched_domains(const struct cpumask *cpu_map) | |||
8914 | 9020 | ||
8915 | arch_update_cpu_topology(); | 9021 | arch_update_cpu_topology(); |
8916 | ndoms_cur = 1; | 9022 | ndoms_cur = 1; |
8917 | doms_cur = kmalloc(cpumask_size(), GFP_KERNEL); | 9023 | doms_cur = alloc_sched_domains(ndoms_cur); |
8918 | if (!doms_cur) | 9024 | if (!doms_cur) |
8919 | doms_cur = fallback_doms; | 9025 | doms_cur = &fallback_doms; |
8920 | cpumask_andnot(doms_cur, cpu_map, cpu_isolated_map); | 9026 | cpumask_andnot(doms_cur[0], cpu_map, cpu_isolated_map); |
8921 | dattr_cur = NULL; | 9027 | dattr_cur = NULL; |
8922 | err = build_sched_domains(doms_cur); | 9028 | err = build_sched_domains(doms_cur[0]); |
8923 | register_sched_domain_sysctl(); | 9029 | register_sched_domain_sysctl(); |
8924 | 9030 | ||
8925 | return err; | 9031 | return err; |
@@ -8969,19 +9075,19 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8969 | * doms_new[] to the current sched domain partitioning, doms_cur[]. | 9075 | * doms_new[] to the current sched domain partitioning, doms_cur[]. |
8970 | * It destroys each deleted domain and builds each new domain. | 9076 | * It destroys each deleted domain and builds each new domain. |
8971 | * | 9077 | * |
8972 | * 'doms_new' is an array of cpumask's of length 'ndoms_new'. | 9078 | * 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'. |
8973 | * The masks don't intersect (don't overlap.) We should setup one | 9079 | * The masks don't intersect (don't overlap.) We should setup one |
8974 | * sched domain for each mask. CPUs not in any of the cpumasks will | 9080 | * sched domain for each mask. CPUs not in any of the cpumasks will |
8975 | * not be load balanced. If the same cpumask appears both in the | 9081 | * not be load balanced. If the same cpumask appears both in the |
8976 | * current 'doms_cur' domains and in the new 'doms_new', we can leave | 9082 | * current 'doms_cur' domains and in the new 'doms_new', we can leave |
8977 | * it as it is. | 9083 | * it as it is. |
8978 | * | 9084 | * |
8979 | * The passed in 'doms_new' should be kmalloc'd. This routine takes | 9085 | * The passed in 'doms_new' should be allocated using |
8980 | * ownership of it and will kfree it when done with it. If the caller | 9086 | * alloc_sched_domains. This routine takes ownership of it and will |
8981 | * failed the kmalloc call, then it can pass in doms_new == NULL && | 9087 | * free_sched_domains it when done with it. If the caller failed the |
8982 | * ndoms_new == 1, and partition_sched_domains() will fallback to | 9088 | * alloc call, then it can pass in doms_new == NULL && ndoms_new == 1, |
8983 | * the single partition 'fallback_doms', it also forces the domains | 9089 | * and partition_sched_domains() will fallback to the single partition |
8984 | * to be rebuilt. | 9090 | * 'fallback_doms', it also forces the domains to be rebuilt. |
8985 | * | 9091 | * |
8986 | * If doms_new == NULL it will be replaced with cpu_online_mask. | 9092 | * If doms_new == NULL it will be replaced with cpu_online_mask. |
8987 | * ndoms_new == 0 is a special case for destroying existing domains, | 9093 | * ndoms_new == 0 is a special case for destroying existing domains, |
@@ -8989,8 +9095,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur, | |||
8989 | * | 9095 | * |
8990 | * Call with hotplug lock held | 9096 | * Call with hotplug lock held |
8991 | */ | 9097 | */ |
8992 | /* FIXME: Change to struct cpumask *doms_new[] */ | 9098 | void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[], |
8993 | void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | ||
8994 | struct sched_domain_attr *dattr_new) | 9099 | struct sched_domain_attr *dattr_new) |
8995 | { | 9100 | { |
8996 | int i, j, n; | 9101 | int i, j, n; |
@@ -9009,40 +9114,40 @@ void partition_sched_domains(int ndoms_new, struct cpumask *doms_new, | |||
9009 | /* Destroy deleted domains */ | 9114 | /* Destroy deleted domains */ |
9010 | for (i = 0; i < ndoms_cur; i++) { | 9115 | for (i = 0; i < ndoms_cur; i++) { |
9011 | for (j = 0; j < n && !new_topology; j++) { | 9116 | for (j = 0; j < n && !new_topology; j++) { |
9012 | if (cpumask_equal(&doms_cur[i], &doms_new[j]) | 9117 | if (cpumask_equal(doms_cur[i], doms_new[j]) |
9013 | && dattrs_equal(dattr_cur, i, dattr_new, j)) | 9118 | && dattrs_equal(dattr_cur, i, dattr_new, j)) |
9014 | goto match1; | 9119 | goto match1; |
9015 | } | 9120 | } |
9016 | /* no match - a current sched domain not in new doms_new[] */ | 9121 | /* no match - a current sched domain not in new doms_new[] */ |
9017 | detach_destroy_domains(doms_cur + i); | 9122 | detach_destroy_domains(doms_cur[i]); |
9018 | match1: | 9123 | match1: |
9019 | ; | 9124 | ; |
9020 | } | 9125 | } |
9021 | 9126 | ||
9022 | if (doms_new == NULL) { | 9127 | if (doms_new == NULL) { |
9023 | ndoms_cur = 0; | 9128 | ndoms_cur = 0; |
9024 | doms_new = fallback_doms; | 9129 | doms_new = &fallback_doms; |
9025 | cpumask_andnot(&doms_new[0], cpu_online_mask, cpu_isolated_map); | 9130 | cpumask_andnot(doms_new[0], cpu_active_mask, cpu_isolated_map); |
9026 | WARN_ON_ONCE(dattr_new); | 9131 | WARN_ON_ONCE(dattr_new); |
9027 | } | 9132 | } |
9028 | 9133 | ||
9029 | /* Build new domains */ | 9134 | /* Build new domains */ |
9030 | for (i = 0; i < ndoms_new; i++) { | 9135 | for (i = 0; i < ndoms_new; i++) { |
9031 | for (j = 0; j < ndoms_cur && !new_topology; j++) { | 9136 | for (j = 0; j < ndoms_cur && !new_topology; j++) { |
9032 | if (cpumask_equal(&doms_new[i], &doms_cur[j]) | 9137 | if (cpumask_equal(doms_new[i], doms_cur[j]) |
9033 | && dattrs_equal(dattr_new, i, dattr_cur, j)) | 9138 | && dattrs_equal(dattr_new, i, dattr_cur, j)) |
9034 | goto match2; | 9139 | goto match2; |
9035 | } | 9140 | } |
9036 | /* no match - add a new doms_new */ | 9141 | /* no match - add a new doms_new */ |
9037 | __build_sched_domains(doms_new + i, | 9142 | __build_sched_domains(doms_new[i], |
9038 | dattr_new ? dattr_new + i : NULL); | 9143 | dattr_new ? dattr_new + i : NULL); |
9039 | match2: | 9144 | match2: |
9040 | ; | 9145 | ; |
9041 | } | 9146 | } |
9042 | 9147 | ||
9043 | /* Remember the new sched domains */ | 9148 | /* Remember the new sched domains */ |
9044 | if (doms_cur != fallback_doms) | 9149 | if (doms_cur != &fallback_doms) |
9045 | kfree(doms_cur); | 9150 | free_sched_domains(doms_cur, ndoms_cur); |
9046 | kfree(dattr_cur); /* kfree(NULL) is safe */ | 9151 | kfree(dattr_cur); /* kfree(NULL) is safe */ |
9047 | doms_cur = doms_new; | 9152 | doms_cur = doms_new; |
9048 | dattr_cur = dattr_new; | 9153 | dattr_cur = dattr_new; |
@@ -9153,8 +9258,10 @@ static int update_sched_domains(struct notifier_block *nfb, | |||
9153 | switch (action) { | 9258 | switch (action) { |
9154 | case CPU_ONLINE: | 9259 | case CPU_ONLINE: |
9155 | case CPU_ONLINE_FROZEN: | 9260 | case CPU_ONLINE_FROZEN: |
9156 | case CPU_DEAD: | 9261 | case CPU_DOWN_PREPARE: |
9157 | case CPU_DEAD_FROZEN: | 9262 | case CPU_DOWN_PREPARE_FROZEN: |
9263 | case CPU_DOWN_FAILED: | ||
9264 | case CPU_DOWN_FAILED_FROZEN: | ||
9158 | partition_sched_domains(1, NULL, NULL); | 9265 | partition_sched_domains(1, NULL, NULL); |
9159 | return NOTIFY_OK; | 9266 | return NOTIFY_OK; |
9160 | 9267 | ||
@@ -9201,7 +9308,7 @@ void __init sched_init_smp(void) | |||
9201 | #endif | 9308 | #endif |
9202 | get_online_cpus(); | 9309 | get_online_cpus(); |
9203 | mutex_lock(&sched_domains_mutex); | 9310 | mutex_lock(&sched_domains_mutex); |
9204 | arch_init_sched_domains(cpu_online_mask); | 9311 | arch_init_sched_domains(cpu_active_mask); |
9205 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); | 9312 | cpumask_andnot(non_isolated_cpus, cpu_possible_mask, cpu_isolated_map); |
9206 | if (cpumask_empty(non_isolated_cpus)) | 9313 | if (cpumask_empty(non_isolated_cpus)) |
9207 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); | 9314 | cpumask_set_cpu(smp_processor_id(), non_isolated_cpus); |
@@ -9274,13 +9381,13 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
9274 | #ifdef CONFIG_SMP | 9381 | #ifdef CONFIG_SMP |
9275 | rt_rq->rt_nr_migratory = 0; | 9382 | rt_rq->rt_nr_migratory = 0; |
9276 | rt_rq->overloaded = 0; | 9383 | rt_rq->overloaded = 0; |
9277 | plist_head_init(&rt_rq->pushable_tasks, &rq->lock); | 9384 | plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock); |
9278 | #endif | 9385 | #endif |
9279 | 9386 | ||
9280 | rt_rq->rt_time = 0; | 9387 | rt_rq->rt_time = 0; |
9281 | rt_rq->rt_throttled = 0; | 9388 | rt_rq->rt_throttled = 0; |
9282 | rt_rq->rt_runtime = 0; | 9389 | rt_rq->rt_runtime = 0; |
9283 | spin_lock_init(&rt_rq->rt_runtime_lock); | 9390 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
9284 | 9391 | ||
9285 | #ifdef CONFIG_RT_GROUP_SCHED | 9392 | #ifdef CONFIG_RT_GROUP_SCHED |
9286 | rt_rq->rt_nr_boosted = 0; | 9393 | rt_rq->rt_nr_boosted = 0; |
@@ -9364,10 +9471,6 @@ void __init sched_init(void) | |||
9364 | #ifdef CONFIG_CPUMASK_OFFSTACK | 9471 | #ifdef CONFIG_CPUMASK_OFFSTACK |
9365 | alloc_size += num_possible_cpus() * cpumask_size(); | 9472 | alloc_size += num_possible_cpus() * cpumask_size(); |
9366 | #endif | 9473 | #endif |
9367 | /* | ||
9368 | * As sched_init() is called before page_alloc is setup, | ||
9369 | * we use alloc_bootmem(). | ||
9370 | */ | ||
9371 | if (alloc_size) { | 9474 | if (alloc_size) { |
9372 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); | 9475 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
9373 | 9476 | ||
@@ -9444,7 +9547,7 @@ void __init sched_init(void) | |||
9444 | struct rq *rq; | 9547 | struct rq *rq; |
9445 | 9548 | ||
9446 | rq = cpu_rq(i); | 9549 | rq = cpu_rq(i); |
9447 | spin_lock_init(&rq->lock); | 9550 | raw_spin_lock_init(&rq->lock); |
9448 | rq->nr_running = 0; | 9551 | rq->nr_running = 0; |
9449 | rq->calc_load_active = 0; | 9552 | rq->calc_load_active = 0; |
9450 | rq->calc_load_update = jiffies + LOAD_FREQ; | 9553 | rq->calc_load_update = jiffies + LOAD_FREQ; |
@@ -9504,7 +9607,7 @@ void __init sched_init(void) | |||
9504 | #elif defined CONFIG_USER_SCHED | 9607 | #elif defined CONFIG_USER_SCHED |
9505 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); | 9608 | init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL); |
9506 | init_tg_rt_entry(&init_task_group, | 9609 | init_tg_rt_entry(&init_task_group, |
9507 | &per_cpu(init_rt_rq, i), | 9610 | &per_cpu(init_rt_rq_var, i), |
9508 | &per_cpu(init_sched_rt_entity, i), i, 1, | 9611 | &per_cpu(init_sched_rt_entity, i), i, 1, |
9509 | root_task_group.rt_se[i]); | 9612 | root_task_group.rt_se[i]); |
9510 | #endif | 9613 | #endif |
@@ -9522,6 +9625,8 @@ void __init sched_init(void) | |||
9522 | rq->cpu = i; | 9625 | rq->cpu = i; |
9523 | rq->online = 0; | 9626 | rq->online = 0; |
9524 | rq->migration_thread = NULL; | 9627 | rq->migration_thread = NULL; |
9628 | rq->idle_stamp = 0; | ||
9629 | rq->avg_idle = 2*sysctl_sched_migration_cost; | ||
9525 | INIT_LIST_HEAD(&rq->migration_queue); | 9630 | INIT_LIST_HEAD(&rq->migration_queue); |
9526 | rq_attach_root(rq, &def_root_domain); | 9631 | rq_attach_root(rq, &def_root_domain); |
9527 | #endif | 9632 | #endif |
@@ -9540,7 +9645,7 @@ void __init sched_init(void) | |||
9540 | #endif | 9645 | #endif |
9541 | 9646 | ||
9542 | #ifdef CONFIG_RT_MUTEXES | 9647 | #ifdef CONFIG_RT_MUTEXES |
9543 | plist_head_init(&init_task.pi_waiters, &init_task.pi_lock); | 9648 | plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock); |
9544 | #endif | 9649 | #endif |
9545 | 9650 | ||
9546 | /* | 9651 | /* |
@@ -9571,7 +9676,9 @@ void __init sched_init(void) | |||
9571 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); | 9676 | zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9572 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | 9677 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); |
9573 | #endif | 9678 | #endif |
9574 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9679 | /* May be allocated at isolcpus cmdline parse time */ |
9680 | if (cpu_isolated_map == NULL) | ||
9681 | zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | ||
9575 | #endif /* SMP */ | 9682 | #endif /* SMP */ |
9576 | 9683 | ||
9577 | perf_event_init(); | 9684 | perf_event_init(); |
@@ -9582,7 +9689,7 @@ void __init sched_init(void) | |||
9582 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 9689 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP |
9583 | static inline int preempt_count_equals(int preempt_offset) | 9690 | static inline int preempt_count_equals(int preempt_offset) |
9584 | { | 9691 | { |
9585 | int nested = preempt_count() & ~PREEMPT_ACTIVE; | 9692 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
9586 | 9693 | ||
9587 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); | 9694 | return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); |
9588 | } | 9695 | } |
@@ -9663,13 +9770,13 @@ void normalize_rt_tasks(void) | |||
9663 | continue; | 9770 | continue; |
9664 | } | 9771 | } |
9665 | 9772 | ||
9666 | spin_lock(&p->pi_lock); | 9773 | raw_spin_lock(&p->pi_lock); |
9667 | rq = __task_rq_lock(p); | 9774 | rq = __task_rq_lock(p); |
9668 | 9775 | ||
9669 | normalize_task(rq, p); | 9776 | normalize_task(rq, p); |
9670 | 9777 | ||
9671 | __task_rq_unlock(rq); | 9778 | __task_rq_unlock(rq); |
9672 | spin_unlock(&p->pi_lock); | 9779 | raw_spin_unlock(&p->pi_lock); |
9673 | } while_each_thread(g, p); | 9780 | } while_each_thread(g, p); |
9674 | 9781 | ||
9675 | read_unlock_irqrestore(&tasklist_lock, flags); | 9782 | read_unlock_irqrestore(&tasklist_lock, flags); |
@@ -9765,13 +9872,15 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
9765 | se = kzalloc_node(sizeof(struct sched_entity), | 9872 | se = kzalloc_node(sizeof(struct sched_entity), |
9766 | GFP_KERNEL, cpu_to_node(i)); | 9873 | GFP_KERNEL, cpu_to_node(i)); |
9767 | if (!se) | 9874 | if (!se) |
9768 | goto err; | 9875 | goto err_free_rq; |
9769 | 9876 | ||
9770 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); | 9877 | init_tg_cfs_entry(tg, cfs_rq, se, i, 0, parent->se[i]); |
9771 | } | 9878 | } |
9772 | 9879 | ||
9773 | return 1; | 9880 | return 1; |
9774 | 9881 | ||
9882 | err_free_rq: | ||
9883 | kfree(cfs_rq); | ||
9775 | err: | 9884 | err: |
9776 | return 0; | 9885 | return 0; |
9777 | } | 9886 | } |
@@ -9853,13 +9962,15 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
9853 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), | 9962 | rt_se = kzalloc_node(sizeof(struct sched_rt_entity), |
9854 | GFP_KERNEL, cpu_to_node(i)); | 9963 | GFP_KERNEL, cpu_to_node(i)); |
9855 | if (!rt_se) | 9964 | if (!rt_se) |
9856 | goto err; | 9965 | goto err_free_rq; |
9857 | 9966 | ||
9858 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); | 9967 | init_tg_rt_entry(tg, rt_rq, rt_se, i, 0, parent->rt_se[i]); |
9859 | } | 9968 | } |
9860 | 9969 | ||
9861 | return 1; | 9970 | return 1; |
9862 | 9971 | ||
9972 | err_free_rq: | ||
9973 | kfree(rt_rq); | ||
9863 | err: | 9974 | err: |
9864 | return 0; | 9975 | return 0; |
9865 | } | 9976 | } |
@@ -9993,7 +10104,7 @@ void sched_move_task(struct task_struct *tsk) | |||
9993 | 10104 | ||
9994 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10105 | #ifdef CONFIG_FAIR_GROUP_SCHED |
9995 | if (tsk->sched_class->moved_group) | 10106 | if (tsk->sched_class->moved_group) |
9996 | tsk->sched_class->moved_group(tsk); | 10107 | tsk->sched_class->moved_group(tsk, on_rq); |
9997 | #endif | 10108 | #endif |
9998 | 10109 | ||
9999 | if (unlikely(running)) | 10110 | if (unlikely(running)) |
@@ -10028,9 +10139,9 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
10028 | struct rq *rq = cfs_rq->rq; | 10139 | struct rq *rq = cfs_rq->rq; |
10029 | unsigned long flags; | 10140 | unsigned long flags; |
10030 | 10141 | ||
10031 | spin_lock_irqsave(&rq->lock, flags); | 10142 | raw_spin_lock_irqsave(&rq->lock, flags); |
10032 | __set_se_shares(se, shares); | 10143 | __set_se_shares(se, shares); |
10033 | spin_unlock_irqrestore(&rq->lock, flags); | 10144 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10034 | } | 10145 | } |
10035 | 10146 | ||
10036 | static DEFINE_MUTEX(shares_mutex); | 10147 | static DEFINE_MUTEX(shares_mutex); |
@@ -10215,18 +10326,18 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
10215 | if (err) | 10326 | if (err) |
10216 | goto unlock; | 10327 | goto unlock; |
10217 | 10328 | ||
10218 | spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10329 | raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10219 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); | 10330 | tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); |
10220 | tg->rt_bandwidth.rt_runtime = rt_runtime; | 10331 | tg->rt_bandwidth.rt_runtime = rt_runtime; |
10221 | 10332 | ||
10222 | for_each_possible_cpu(i) { | 10333 | for_each_possible_cpu(i) { |
10223 | struct rt_rq *rt_rq = tg->rt_rq[i]; | 10334 | struct rt_rq *rt_rq = tg->rt_rq[i]; |
10224 | 10335 | ||
10225 | spin_lock(&rt_rq->rt_runtime_lock); | 10336 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10226 | rt_rq->rt_runtime = rt_runtime; | 10337 | rt_rq->rt_runtime = rt_runtime; |
10227 | spin_unlock(&rt_rq->rt_runtime_lock); | 10338 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10228 | } | 10339 | } |
10229 | spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 10340 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
10230 | unlock: | 10341 | unlock: |
10231 | read_unlock(&tasklist_lock); | 10342 | read_unlock(&tasklist_lock); |
10232 | mutex_unlock(&rt_constraints_mutex); | 10343 | mutex_unlock(&rt_constraints_mutex); |
@@ -10331,15 +10442,15 @@ static int sched_rt_global_constraints(void) | |||
10331 | if (sysctl_sched_rt_runtime == 0) | 10442 | if (sysctl_sched_rt_runtime == 0) |
10332 | return -EBUSY; | 10443 | return -EBUSY; |
10333 | 10444 | ||
10334 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10445 | raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
10335 | for_each_possible_cpu(i) { | 10446 | for_each_possible_cpu(i) { |
10336 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10447 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |
10337 | 10448 | ||
10338 | spin_lock(&rt_rq->rt_runtime_lock); | 10449 | raw_spin_lock(&rt_rq->rt_runtime_lock); |
10339 | rt_rq->rt_runtime = global_rt_runtime(); | 10450 | rt_rq->rt_runtime = global_rt_runtime(); |
10340 | spin_unlock(&rt_rq->rt_runtime_lock); | 10451 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
10341 | } | 10452 | } |
10342 | spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); | 10453 | raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); |
10343 | 10454 | ||
10344 | return 0; | 10455 | return 0; |
10345 | } | 10456 | } |
@@ -10630,9 +10741,9 @@ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu) | |||
10630 | /* | 10741 | /* |
10631 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. | 10742 | * Take rq->lock to make 64-bit read safe on 32-bit platforms. |
10632 | */ | 10743 | */ |
10633 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10744 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10634 | data = *cpuusage; | 10745 | data = *cpuusage; |
10635 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10746 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10636 | #else | 10747 | #else |
10637 | data = *cpuusage; | 10748 | data = *cpuusage; |
10638 | #endif | 10749 | #endif |
@@ -10648,9 +10759,9 @@ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val) | |||
10648 | /* | 10759 | /* |
10649 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. | 10760 | * Take rq->lock to make 64-bit write safe on 32-bit platforms. |
10650 | */ | 10761 | */ |
10651 | spin_lock_irq(&cpu_rq(cpu)->lock); | 10762 | raw_spin_lock_irq(&cpu_rq(cpu)->lock); |
10652 | *cpuusage = val; | 10763 | *cpuusage = val; |
10653 | spin_unlock_irq(&cpu_rq(cpu)->lock); | 10764 | raw_spin_unlock_irq(&cpu_rq(cpu)->lock); |
10654 | #else | 10765 | #else |
10655 | *cpuusage = val; | 10766 | *cpuusage = val; |
10656 | #endif | 10767 | #endif |
@@ -10884,9 +10995,9 @@ void synchronize_sched_expedited(void) | |||
10884 | init_completion(&req->done); | 10995 | init_completion(&req->done); |
10885 | req->task = NULL; | 10996 | req->task = NULL; |
10886 | req->dest_cpu = RCU_MIGRATION_NEED_QS; | 10997 | req->dest_cpu = RCU_MIGRATION_NEED_QS; |
10887 | spin_lock_irqsave(&rq->lock, flags); | 10998 | raw_spin_lock_irqsave(&rq->lock, flags); |
10888 | list_add(&req->list, &rq->migration_queue); | 10999 | list_add(&req->list, &rq->migration_queue); |
10889 | spin_unlock_irqrestore(&rq->lock, flags); | 11000 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10890 | wake_up_process(rq->migration_thread); | 11001 | wake_up_process(rq->migration_thread); |
10891 | } | 11002 | } |
10892 | for_each_online_cpu(cpu) { | 11003 | for_each_online_cpu(cpu) { |
@@ -10894,13 +11005,14 @@ void synchronize_sched_expedited(void) | |||
10894 | req = &per_cpu(rcu_migration_req, cpu); | 11005 | req = &per_cpu(rcu_migration_req, cpu); |
10895 | rq = cpu_rq(cpu); | 11006 | rq = cpu_rq(cpu); |
10896 | wait_for_completion(&req->done); | 11007 | wait_for_completion(&req->done); |
10897 | spin_lock_irqsave(&rq->lock, flags); | 11008 | raw_spin_lock_irqsave(&rq->lock, flags); |
10898 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) | 11009 | if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC)) |
10899 | need_full_sync = 1; | 11010 | need_full_sync = 1; |
10900 | req->dest_cpu = RCU_MIGRATION_IDLE; | 11011 | req->dest_cpu = RCU_MIGRATION_IDLE; |
10901 | spin_unlock_irqrestore(&rq->lock, flags); | 11012 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
10902 | } | 11013 | } |
10903 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; | 11014 | rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE; |
11015 | synchronize_sched_expedited_count++; | ||
10904 | mutex_unlock(&rcu_sched_expedited_mutex); | 11016 | mutex_unlock(&rcu_sched_expedited_mutex); |
10905 | put_online_cpus(); | 11017 | put_online_cpus(); |
10906 | if (need_full_sync) | 11018 | if (need_full_sync) |