aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c425
1 files changed, 181 insertions, 244 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 99e6d850ecab..04160d277e7a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -571,8 +571,10 @@ struct rq {
571#endif 571#endif
572 572
573#ifdef CONFIG_SCHED_HRTICK 573#ifdef CONFIG_SCHED_HRTICK
574 unsigned long hrtick_flags; 574#ifdef CONFIG_SMP
575 ktime_t hrtick_expire; 575 int hrtick_csd_pending;
576 struct call_single_data hrtick_csd;
577#endif
576 struct hrtimer hrtick_timer; 578 struct hrtimer hrtick_timer;
577#endif 579#endif
578 580
@@ -983,13 +985,6 @@ static struct rq *this_rq_lock(void)
983 return rq; 985 return rq;
984} 986}
985 987
986static void __resched_task(struct task_struct *p, int tif_bit);
987
988static inline void resched_task(struct task_struct *p)
989{
990 __resched_task(p, TIF_NEED_RESCHED);
991}
992
993#ifdef CONFIG_SCHED_HRTICK 988#ifdef CONFIG_SCHED_HRTICK
994/* 989/*
995 * Use HR-timers to deliver accurate preemption points. 990 * Use HR-timers to deliver accurate preemption points.
@@ -1001,25 +996,6 @@ static inline void resched_task(struct task_struct *p)
1001 * When we get rescheduled we reprogram the hrtick_timer outside of the 996 * When we get rescheduled we reprogram the hrtick_timer outside of the
1002 * rq->lock. 997 * rq->lock.
1003 */ 998 */
1004static inline void resched_hrt(struct task_struct *p)
1005{
1006 __resched_task(p, TIF_HRTICK_RESCHED);
1007}
1008
1009static inline void resched_rq(struct rq *rq)
1010{
1011 unsigned long flags;
1012
1013 spin_lock_irqsave(&rq->lock, flags);
1014 resched_task(rq->curr);
1015 spin_unlock_irqrestore(&rq->lock, flags);
1016}
1017
1018enum {
1019 HRTICK_SET, /* re-programm hrtick_timer */
1020 HRTICK_RESET, /* not a new slice */
1021 HRTICK_BLOCK, /* stop hrtick operations */
1022};
1023 999
1024/* 1000/*
1025 * Use hrtick when: 1001 * Use hrtick when:
@@ -1030,40 +1006,11 @@ static inline int hrtick_enabled(struct rq *rq)
1030{ 1006{
1031 if (!sched_feat(HRTICK)) 1007 if (!sched_feat(HRTICK))
1032 return 0; 1008 return 0;
1033 if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags))) 1009 if (!cpu_active(cpu_of(rq)))
1034 return 0; 1010 return 0;
1035 return hrtimer_is_hres_active(&rq->hrtick_timer); 1011 return hrtimer_is_hres_active(&rq->hrtick_timer);
1036} 1012}
1037 1013
1038/*
1039 * Called to set the hrtick timer state.
1040 *
1041 * called with rq->lock held and irqs disabled
1042 */
1043static void hrtick_start(struct rq *rq, u64 delay, int reset)
1044{
1045 assert_spin_locked(&rq->lock);
1046
1047 /*
1048 * preempt at: now + delay
1049 */
1050 rq->hrtick_expire =
1051 ktime_add_ns(rq->hrtick_timer.base->get_time(), delay);
1052 /*
1053 * indicate we need to program the timer
1054 */
1055 __set_bit(HRTICK_SET, &rq->hrtick_flags);
1056 if (reset)
1057 __set_bit(HRTICK_RESET, &rq->hrtick_flags);
1058
1059 /*
1060 * New slices are called from the schedule path and don't need a
1061 * forced reschedule.
1062 */
1063 if (reset)
1064 resched_hrt(rq->curr);
1065}
1066
1067static void hrtick_clear(struct rq *rq) 1014static void hrtick_clear(struct rq *rq)
1068{ 1015{
1069 if (hrtimer_active(&rq->hrtick_timer)) 1016 if (hrtimer_active(&rq->hrtick_timer))
@@ -1071,32 +1018,6 @@ static void hrtick_clear(struct rq *rq)
1071} 1018}
1072 1019
1073/* 1020/*
1074 * Update the timer from the possible pending state.
1075 */
1076static void hrtick_set(struct rq *rq)
1077{
1078 ktime_t time;
1079 int set, reset;
1080 unsigned long flags;
1081
1082 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
1083
1084 spin_lock_irqsave(&rq->lock, flags);
1085 set = __test_and_clear_bit(HRTICK_SET, &rq->hrtick_flags);
1086 reset = __test_and_clear_bit(HRTICK_RESET, &rq->hrtick_flags);
1087 time = rq->hrtick_expire;
1088 clear_thread_flag(TIF_HRTICK_RESCHED);
1089 spin_unlock_irqrestore(&rq->lock, flags);
1090
1091 if (set) {
1092 hrtimer_start(&rq->hrtick_timer, time, HRTIMER_MODE_ABS);
1093 if (reset && !hrtimer_active(&rq->hrtick_timer))
1094 resched_rq(rq);
1095 } else
1096 hrtick_clear(rq);
1097}
1098
1099/*
1100 * High-resolution timer tick. 1021 * High-resolution timer tick.
1101 * Runs from hardirq context with interrupts disabled. 1022 * Runs from hardirq context with interrupts disabled.
1102 */ 1023 */
@@ -1115,27 +1036,37 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1115} 1036}
1116 1037
1117#ifdef CONFIG_SMP 1038#ifdef CONFIG_SMP
1118static void hotplug_hrtick_disable(int cpu) 1039/*
1040 * called from hardirq (IPI) context
1041 */
1042static void __hrtick_start(void *arg)
1119{ 1043{
1120 struct rq *rq = cpu_rq(cpu); 1044 struct rq *rq = arg;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&rq->lock, flags);
1124 rq->hrtick_flags = 0;
1125 __set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
1126 spin_unlock_irqrestore(&rq->lock, flags);
1127 1045
1128 hrtick_clear(rq); 1046 spin_lock(&rq->lock);
1047 hrtimer_restart(&rq->hrtick_timer);
1048 rq->hrtick_csd_pending = 0;
1049 spin_unlock(&rq->lock);
1129} 1050}
1130 1051
1131static void hotplug_hrtick_enable(int cpu) 1052/*
1053 * Called to set the hrtick timer state.
1054 *
1055 * called with rq->lock held and irqs disabled
1056 */
1057static void hrtick_start(struct rq *rq, u64 delay)
1132{ 1058{
1133 struct rq *rq = cpu_rq(cpu); 1059 struct hrtimer *timer = &rq->hrtick_timer;
1134 unsigned long flags; 1060 ktime_t time = ktime_add_ns(timer->base->get_time(), delay);
1135 1061
1136 spin_lock_irqsave(&rq->lock, flags); 1062 timer->expires = time;
1137 __clear_bit(HRTICK_BLOCK, &rq->hrtick_flags); 1063
1138 spin_unlock_irqrestore(&rq->lock, flags); 1064 if (rq == this_rq()) {
1065 hrtimer_restart(timer);
1066 } else if (!rq->hrtick_csd_pending) {
1067 __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
1068 rq->hrtick_csd_pending = 1;
1069 }
1139} 1070}
1140 1071
1141static int 1072static int
@@ -1150,16 +1081,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1150 case CPU_DOWN_PREPARE_FROZEN: 1081 case CPU_DOWN_PREPARE_FROZEN:
1151 case CPU_DEAD: 1082 case CPU_DEAD:
1152 case CPU_DEAD_FROZEN: 1083 case CPU_DEAD_FROZEN:
1153 hotplug_hrtick_disable(cpu); 1084 hrtick_clear(cpu_rq(cpu));
1154 return NOTIFY_OK;
1155
1156 case CPU_UP_PREPARE:
1157 case CPU_UP_PREPARE_FROZEN:
1158 case CPU_DOWN_FAILED:
1159 case CPU_DOWN_FAILED_FROZEN:
1160 case CPU_ONLINE:
1161 case CPU_ONLINE_FROZEN:
1162 hotplug_hrtick_enable(cpu);
1163 return NOTIFY_OK; 1085 return NOTIFY_OK;
1164 } 1086 }
1165 1087
@@ -1170,46 +1092,45 @@ static void init_hrtick(void)
1170{ 1092{
1171 hotcpu_notifier(hotplug_hrtick, 0); 1093 hotcpu_notifier(hotplug_hrtick, 0);
1172} 1094}
1173#endif /* CONFIG_SMP */ 1095#else
1096/*
1097 * Called to set the hrtick timer state.
1098 *
1099 * called with rq->lock held and irqs disabled
1100 */
1101static void hrtick_start(struct rq *rq, u64 delay)
1102{
1103 hrtimer_start(&rq->hrtick_timer, ns_to_ktime(delay), HRTIMER_MODE_REL);
1104}
1174 1105
1175static void init_rq_hrtick(struct rq *rq) 1106static void init_hrtick(void)
1176{ 1107{
1177 rq->hrtick_flags = 0;
1178 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1179 rq->hrtick_timer.function = hrtick;
1180 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1181} 1108}
1109#endif /* CONFIG_SMP */
1182 1110
1183void hrtick_resched(void) 1111static void init_rq_hrtick(struct rq *rq)
1184{ 1112{
1185 struct rq *rq; 1113#ifdef CONFIG_SMP
1186 unsigned long flags; 1114 rq->hrtick_csd_pending = 0;
1187 1115
1188 if (!test_thread_flag(TIF_HRTICK_RESCHED)) 1116 rq->hrtick_csd.flags = 0;
1189 return; 1117 rq->hrtick_csd.func = __hrtick_start;
1118 rq->hrtick_csd.info = rq;
1119#endif
1190 1120
1191 local_irq_save(flags); 1121 hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
1192 rq = cpu_rq(smp_processor_id()); 1122 rq->hrtick_timer.function = hrtick;
1193 hrtick_set(rq); 1123 rq->hrtick_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
1194 local_irq_restore(flags);
1195} 1124}
1196#else 1125#else
1197static inline void hrtick_clear(struct rq *rq) 1126static inline void hrtick_clear(struct rq *rq)
1198{ 1127{
1199} 1128}
1200 1129
1201static inline void hrtick_set(struct rq *rq)
1202{
1203}
1204
1205static inline void init_rq_hrtick(struct rq *rq) 1130static inline void init_rq_hrtick(struct rq *rq)
1206{ 1131{
1207} 1132}
1208 1133
1209void hrtick_resched(void)
1210{
1211}
1212
1213static inline void init_hrtick(void) 1134static inline void init_hrtick(void)
1214{ 1135{
1215} 1136}
@@ -1228,16 +1149,16 @@ static inline void init_hrtick(void)
1228#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG) 1149#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
1229#endif 1150#endif
1230 1151
1231static void __resched_task(struct task_struct *p, int tif_bit) 1152static void resched_task(struct task_struct *p)
1232{ 1153{
1233 int cpu; 1154 int cpu;
1234 1155
1235 assert_spin_locked(&task_rq(p)->lock); 1156 assert_spin_locked(&task_rq(p)->lock);
1236 1157
1237 if (unlikely(test_tsk_thread_flag(p, tif_bit))) 1158 if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
1238 return; 1159 return;
1239 1160
1240 set_tsk_thread_flag(p, tif_bit); 1161 set_tsk_thread_flag(p, TIF_NEED_RESCHED);
1241 1162
1242 cpu = task_cpu(p); 1163 cpu = task_cpu(p);
1243 if (cpu == smp_processor_id()) 1164 if (cpu == smp_processor_id())
@@ -1303,10 +1224,10 @@ void wake_up_idle_cpu(int cpu)
1303#endif /* CONFIG_NO_HZ */ 1224#endif /* CONFIG_NO_HZ */
1304 1225
1305#else /* !CONFIG_SMP */ 1226#else /* !CONFIG_SMP */
1306static void __resched_task(struct task_struct *p, int tif_bit) 1227static void resched_task(struct task_struct *p)
1307{ 1228{
1308 assert_spin_locked(&task_rq(p)->lock); 1229 assert_spin_locked(&task_rq(p)->lock);
1309 set_tsk_thread_flag(p, tif_bit); 1230 set_tsk_need_resched(p);
1310} 1231}
1311#endif /* CONFIG_SMP */ 1232#endif /* CONFIG_SMP */
1312 1233
@@ -1946,16 +1867,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
1946/* 1867/*
1947 * wait_task_inactive - wait for a thread to unschedule. 1868 * wait_task_inactive - wait for a thread to unschedule.
1948 * 1869 *
1870 * If @match_state is nonzero, it's the @p->state value just checked and
1871 * not expected to change. If it changes, i.e. @p might have woken up,
1872 * then return zero. When we succeed in waiting for @p to be off its CPU,
1873 * we return a positive number (its total switch count). If a second call
1874 * a short while later returns the same number, the caller can be sure that
1875 * @p has remained unscheduled the whole time.
1876 *
1949 * The caller must ensure that the task *will* unschedule sometime soon, 1877 * The caller must ensure that the task *will* unschedule sometime soon,
1950 * else this function might spin for a *long* time. This function can't 1878 * else this function might spin for a *long* time. This function can't
1951 * be called with interrupts off, or it may introduce deadlock with 1879 * be called with interrupts off, or it may introduce deadlock with
1952 * smp_call_function() if an IPI is sent by the same process we are 1880 * smp_call_function() if an IPI is sent by the same process we are
1953 * waiting to become inactive. 1881 * waiting to become inactive.
1954 */ 1882 */
1955void wait_task_inactive(struct task_struct *p) 1883unsigned long wait_task_inactive(struct task_struct *p, long match_state)
1956{ 1884{
1957 unsigned long flags; 1885 unsigned long flags;
1958 int running, on_rq; 1886 int running, on_rq;
1887 unsigned long ncsw;
1959 struct rq *rq; 1888 struct rq *rq;
1960 1889
1961 for (;;) { 1890 for (;;) {
@@ -1978,8 +1907,11 @@ void wait_task_inactive(struct task_struct *p)
1978 * return false if the runqueue has changed and p 1907 * return false if the runqueue has changed and p
1979 * is actually now running somewhere else! 1908 * is actually now running somewhere else!
1980 */ 1909 */
1981 while (task_running(rq, p)) 1910 while (task_running(rq, p)) {
1911 if (match_state && unlikely(p->state != match_state))
1912 return 0;
1982 cpu_relax(); 1913 cpu_relax();
1914 }
1983 1915
1984 /* 1916 /*
1985 * Ok, time to look more closely! We need the rq 1917 * Ok, time to look more closely! We need the rq
@@ -1989,9 +1921,21 @@ void wait_task_inactive(struct task_struct *p)
1989 rq = task_rq_lock(p, &flags); 1921 rq = task_rq_lock(p, &flags);
1990 running = task_running(rq, p); 1922 running = task_running(rq, p);
1991 on_rq = p->se.on_rq; 1923 on_rq = p->se.on_rq;
1924 ncsw = 0;
1925 if (!match_state || p->state == match_state) {
1926 ncsw = p->nivcsw + p->nvcsw;
1927 if (unlikely(!ncsw))
1928 ncsw = 1;
1929 }
1992 task_rq_unlock(rq, &flags); 1930 task_rq_unlock(rq, &flags);
1993 1931
1994 /* 1932 /*
1933 * If it changed from the expected state, bail out now.
1934 */
1935 if (unlikely(!ncsw))
1936 break;
1937
1938 /*
1995 * Was it really running after all now that we 1939 * Was it really running after all now that we
1996 * checked with the proper locks actually held? 1940 * checked with the proper locks actually held?
1997 * 1941 *
@@ -2023,6 +1967,8 @@ void wait_task_inactive(struct task_struct *p)
2023 */ 1967 */
2024 break; 1968 break;
2025 } 1969 }
1970
1971 return ncsw;
2026} 1972}
2027 1973
2028/*** 1974/***
@@ -2108,7 +2054,7 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
2108 /* Tally up the load of all CPUs in the group */ 2054 /* Tally up the load of all CPUs in the group */
2109 avg_load = 0; 2055 avg_load = 0;
2110 2056
2111 for_each_cpu_mask(i, group->cpumask) { 2057 for_each_cpu_mask_nr(i, group->cpumask) {
2112 /* Bias balancing toward cpus of our domain */ 2058 /* Bias balancing toward cpus of our domain */
2113 if (local_group) 2059 if (local_group)
2114 load = source_load(i, load_idx); 2060 load = source_load(i, load_idx);
@@ -2150,7 +2096,7 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu,
2150 /* Traverse only the allowed CPUs */ 2096 /* Traverse only the allowed CPUs */
2151 cpus_and(*tmp, group->cpumask, p->cpus_allowed); 2097 cpus_and(*tmp, group->cpumask, p->cpus_allowed);
2152 2098
2153 for_each_cpu_mask(i, *tmp) { 2099 for_each_cpu_mask_nr(i, *tmp) {
2154 load = weighted_cpuload(i); 2100 load = weighted_cpuload(i);
2155 2101
2156 if (load < min_load || (load == min_load && i == this_cpu)) { 2102 if (load < min_load || (load == min_load && i == this_cpu)) {
@@ -2881,7 +2827,7 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
2881 2827
2882 rq = task_rq_lock(p, &flags); 2828 rq = task_rq_lock(p, &flags);
2883 if (!cpu_isset(dest_cpu, p->cpus_allowed) 2829 if (!cpu_isset(dest_cpu, p->cpus_allowed)
2884 || unlikely(cpu_is_offline(dest_cpu))) 2830 || unlikely(!cpu_active(dest_cpu)))
2885 goto out; 2831 goto out;
2886 2832
2887 /* force the process onto the specified CPU */ 2833 /* force the process onto the specified CPU */
@@ -3168,7 +3114,7 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
3168 max_cpu_load = 0; 3114 max_cpu_load = 0;
3169 min_cpu_load = ~0UL; 3115 min_cpu_load = ~0UL;
3170 3116
3171 for_each_cpu_mask(i, group->cpumask) { 3117 for_each_cpu_mask_nr(i, group->cpumask) {
3172 struct rq *rq; 3118 struct rq *rq;
3173 3119
3174 if (!cpu_isset(i, *cpus)) 3120 if (!cpu_isset(i, *cpus))
@@ -3447,7 +3393,7 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
3447 unsigned long max_load = 0; 3393 unsigned long max_load = 0;
3448 int i; 3394 int i;
3449 3395
3450 for_each_cpu_mask(i, group->cpumask) { 3396 for_each_cpu_mask_nr(i, group->cpumask) {
3451 unsigned long wl; 3397 unsigned long wl;
3452 3398
3453 if (!cpu_isset(i, *cpus)) 3399 if (!cpu_isset(i, *cpus))
@@ -3849,7 +3795,7 @@ int select_nohz_load_balancer(int stop_tick)
3849 /* 3795 /*
3850 * If we are going offline and still the leader, give up! 3796 * If we are going offline and still the leader, give up!
3851 */ 3797 */
3852 if (cpu_is_offline(cpu) && 3798 if (!cpu_active(cpu) &&
3853 atomic_read(&nohz.load_balancer) == cpu) { 3799 atomic_read(&nohz.load_balancer) == cpu) {
3854 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu) 3800 if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)
3855 BUG(); 3801 BUG();
@@ -3989,7 +3935,7 @@ static void run_rebalance_domains(struct softirq_action *h)
3989 int balance_cpu; 3935 int balance_cpu;
3990 3936
3991 cpu_clear(this_cpu, cpus); 3937 cpu_clear(this_cpu, cpus);
3992 for_each_cpu_mask(balance_cpu, cpus) { 3938 for_each_cpu_mask_nr(balance_cpu, cpus) {
3993 /* 3939 /*
3994 * If this cpu gets work to do, stop the load balancing 3940 * If this cpu gets work to do, stop the load balancing
3995 * work being done for other cpus. Next load 3941 * work being done for other cpus. Next load
@@ -4125,6 +4071,8 @@ void account_user_time(struct task_struct *p, cputime_t cputime)
4125 cpustat->nice = cputime64_add(cpustat->nice, tmp); 4071 cpustat->nice = cputime64_add(cpustat->nice, tmp);
4126 else 4072 else
4127 cpustat->user = cputime64_add(cpustat->user, tmp); 4073 cpustat->user = cputime64_add(cpustat->user, tmp);
4074 /* Account for user time used */
4075 acct_update_integrals(p);
4128} 4076}
4129 4077
4130/* 4078/*
@@ -4395,7 +4343,7 @@ asmlinkage void __sched schedule(void)
4395 struct task_struct *prev, *next; 4343 struct task_struct *prev, *next;
4396 unsigned long *switch_count; 4344 unsigned long *switch_count;
4397 struct rq *rq; 4345 struct rq *rq;
4398 int cpu, hrtick = sched_feat(HRTICK); 4346 int cpu;
4399 4347
4400need_resched: 4348need_resched:
4401 preempt_disable(); 4349 preempt_disable();
@@ -4410,7 +4358,7 @@ need_resched_nonpreemptible:
4410 4358
4411 schedule_debug(prev); 4359 schedule_debug(prev);
4412 4360
4413 if (hrtick) 4361 if (sched_feat(HRTICK))
4414 hrtick_clear(rq); 4362 hrtick_clear(rq);
4415 4363
4416 /* 4364 /*
@@ -4457,9 +4405,6 @@ need_resched_nonpreemptible:
4457 } else 4405 } else
4458 spin_unlock_irq(&rq->lock); 4406 spin_unlock_irq(&rq->lock);
4459 4407
4460 if (hrtick)
4461 hrtick_set(rq);
4462
4463 if (unlikely(reacquire_kernel_lock(current) < 0)) 4408 if (unlikely(reacquire_kernel_lock(current) < 0))
4464 goto need_resched_nonpreemptible; 4409 goto need_resched_nonpreemptible;
4465 4410
@@ -5059,19 +5004,21 @@ recheck:
5059 return -EPERM; 5004 return -EPERM;
5060 } 5005 }
5061 5006
5007 if (user) {
5062#ifdef CONFIG_RT_GROUP_SCHED 5008#ifdef CONFIG_RT_GROUP_SCHED
5063 /* 5009 /*
5064 * Do not allow realtime tasks into groups that have no runtime 5010 * Do not allow realtime tasks into groups that have no runtime
5065 * assigned. 5011 * assigned.
5066 */ 5012 */
5067 if (user 5013 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
5068 && rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) 5014 return -EPERM;
5069 return -EPERM;
5070#endif 5015#endif
5071 5016
5072 retval = security_task_setscheduler(p, policy, param); 5017 retval = security_task_setscheduler(p, policy, param);
5073 if (retval) 5018 if (retval)
5074 return retval; 5019 return retval;
5020 }
5021
5075 /* 5022 /*
5076 * make sure no PI-waiters arrive (or leave) while we are 5023 * make sure no PI-waiters arrive (or leave) while we are
5077 * changing the priority of the task: 5024 * changing the priority of the task:
@@ -5876,7 +5823,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
5876 struct rq *rq_dest, *rq_src; 5823 struct rq *rq_dest, *rq_src;
5877 int ret = 0, on_rq; 5824 int ret = 0, on_rq;
5878 5825
5879 if (unlikely(cpu_is_offline(dest_cpu))) 5826 if (unlikely(!cpu_active(dest_cpu)))
5880 return ret; 5827 return ret;
5881 5828
5882 rq_src = cpu_rq(src_cpu); 5829 rq_src = cpu_rq(src_cpu);
@@ -6469,7 +6416,7 @@ static struct notifier_block __cpuinitdata migration_notifier = {
6469 .priority = 10 6416 .priority = 10
6470}; 6417};
6471 6418
6472void __init migration_init(void) 6419static int __init migration_init(void)
6473{ 6420{
6474 void *cpu = (void *)(long)smp_processor_id(); 6421 void *cpu = (void *)(long)smp_processor_id();
6475 int err; 6422 int err;
@@ -6479,7 +6426,10 @@ void __init migration_init(void)
6479 BUG_ON(err == NOTIFY_BAD); 6426 BUG_ON(err == NOTIFY_BAD);
6480 migration_call(&migration_notifier, CPU_ONLINE, cpu); 6427 migration_call(&migration_notifier, CPU_ONLINE, cpu);
6481 register_cpu_notifier(&migration_notifier); 6428 register_cpu_notifier(&migration_notifier);
6429
6430 return err;
6482} 6431}
6432early_initcall(migration_init);
6483#endif 6433#endif
6484 6434
6485#ifdef CONFIG_SMP 6435#ifdef CONFIG_SMP
@@ -6768,7 +6718,8 @@ static cpumask_t cpu_isolated_map = CPU_MASK_NONE;
6768/* Setup the mask of cpus configured for isolated domains */ 6718/* Setup the mask of cpus configured for isolated domains */
6769static int __init isolated_cpu_setup(char *str) 6719static int __init isolated_cpu_setup(char *str)
6770{ 6720{
6771 int ints[NR_CPUS], i; 6721 static int __initdata ints[NR_CPUS];
6722 int i;
6772 6723
6773 str = get_options(str, ARRAY_SIZE(ints), ints); 6724 str = get_options(str, ARRAY_SIZE(ints), ints);
6774 cpus_clear(cpu_isolated_map); 6725 cpus_clear(cpu_isolated_map);
@@ -6802,7 +6753,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
6802 6753
6803 cpus_clear(*covered); 6754 cpus_clear(*covered);
6804 6755
6805 for_each_cpu_mask(i, *span) { 6756 for_each_cpu_mask_nr(i, *span) {
6806 struct sched_group *sg; 6757 struct sched_group *sg;
6807 int group = group_fn(i, cpu_map, &sg, tmpmask); 6758 int group = group_fn(i, cpu_map, &sg, tmpmask);
6808 int j; 6759 int j;
@@ -6813,7 +6764,7 @@ init_sched_build_groups(const cpumask_t *span, const cpumask_t *cpu_map,
6813 cpus_clear(sg->cpumask); 6764 cpus_clear(sg->cpumask);
6814 sg->__cpu_power = 0; 6765 sg->__cpu_power = 0;
6815 6766
6816 for_each_cpu_mask(j, *span) { 6767 for_each_cpu_mask_nr(j, *span) {
6817 if (group_fn(j, cpu_map, NULL, tmpmask) != group) 6768 if (group_fn(j, cpu_map, NULL, tmpmask) != group)
6818 continue; 6769 continue;
6819 6770
@@ -7013,7 +6964,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head)
7013 if (!sg) 6964 if (!sg)
7014 return; 6965 return;
7015 do { 6966 do {
7016 for_each_cpu_mask(j, sg->cpumask) { 6967 for_each_cpu_mask_nr(j, sg->cpumask) {
7017 struct sched_domain *sd; 6968 struct sched_domain *sd;
7018 6969
7019 sd = &per_cpu(phys_domains, j); 6970 sd = &per_cpu(phys_domains, j);
@@ -7038,7 +6989,7 @@ static void free_sched_groups(const cpumask_t *cpu_map, cpumask_t *nodemask)
7038{ 6989{
7039 int cpu, i; 6990 int cpu, i;
7040 6991
7041 for_each_cpu_mask(cpu, *cpu_map) { 6992 for_each_cpu_mask_nr(cpu, *cpu_map) {
7042 struct sched_group **sched_group_nodes 6993 struct sched_group **sched_group_nodes
7043 = sched_group_nodes_bycpu[cpu]; 6994 = sched_group_nodes_bycpu[cpu];
7044 6995
@@ -7277,7 +7228,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7277 /* 7228 /*
7278 * Set up domains for cpus specified by the cpu_map. 7229 * Set up domains for cpus specified by the cpu_map.
7279 */ 7230 */
7280 for_each_cpu_mask(i, *cpu_map) { 7231 for_each_cpu_mask_nr(i, *cpu_map) {
7281 struct sched_domain *sd = NULL, *p; 7232 struct sched_domain *sd = NULL, *p;
7282 SCHED_CPUMASK_VAR(nodemask, allmasks); 7233 SCHED_CPUMASK_VAR(nodemask, allmasks);
7283 7234
@@ -7344,7 +7295,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7344 7295
7345#ifdef CONFIG_SCHED_SMT 7296#ifdef CONFIG_SCHED_SMT
7346 /* Set up CPU (sibling) groups */ 7297 /* Set up CPU (sibling) groups */
7347 for_each_cpu_mask(i, *cpu_map) { 7298 for_each_cpu_mask_nr(i, *cpu_map) {
7348 SCHED_CPUMASK_VAR(this_sibling_map, allmasks); 7299 SCHED_CPUMASK_VAR(this_sibling_map, allmasks);
7349 SCHED_CPUMASK_VAR(send_covered, allmasks); 7300 SCHED_CPUMASK_VAR(send_covered, allmasks);
7350 7301
@@ -7361,7 +7312,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7361 7312
7362#ifdef CONFIG_SCHED_MC 7313#ifdef CONFIG_SCHED_MC
7363 /* Set up multi-core groups */ 7314 /* Set up multi-core groups */
7364 for_each_cpu_mask(i, *cpu_map) { 7315 for_each_cpu_mask_nr(i, *cpu_map) {
7365 SCHED_CPUMASK_VAR(this_core_map, allmasks); 7316 SCHED_CPUMASK_VAR(this_core_map, allmasks);
7366 SCHED_CPUMASK_VAR(send_covered, allmasks); 7317 SCHED_CPUMASK_VAR(send_covered, allmasks);
7367 7318
@@ -7428,7 +7379,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7428 goto error; 7379 goto error;
7429 } 7380 }
7430 sched_group_nodes[i] = sg; 7381 sched_group_nodes[i] = sg;
7431 for_each_cpu_mask(j, *nodemask) { 7382 for_each_cpu_mask_nr(j, *nodemask) {
7432 struct sched_domain *sd; 7383 struct sched_domain *sd;
7433 7384
7434 sd = &per_cpu(node_domains, j); 7385 sd = &per_cpu(node_domains, j);
@@ -7474,21 +7425,21 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7474 7425
7475 /* Calculate CPU power for physical packages and nodes */ 7426 /* Calculate CPU power for physical packages and nodes */
7476#ifdef CONFIG_SCHED_SMT 7427#ifdef CONFIG_SCHED_SMT
7477 for_each_cpu_mask(i, *cpu_map) { 7428 for_each_cpu_mask_nr(i, *cpu_map) {
7478 struct sched_domain *sd = &per_cpu(cpu_domains, i); 7429 struct sched_domain *sd = &per_cpu(cpu_domains, i);
7479 7430
7480 init_sched_groups_power(i, sd); 7431 init_sched_groups_power(i, sd);
7481 } 7432 }
7482#endif 7433#endif
7483#ifdef CONFIG_SCHED_MC 7434#ifdef CONFIG_SCHED_MC
7484 for_each_cpu_mask(i, *cpu_map) { 7435 for_each_cpu_mask_nr(i, *cpu_map) {
7485 struct sched_domain *sd = &per_cpu(core_domains, i); 7436 struct sched_domain *sd = &per_cpu(core_domains, i);
7486 7437
7487 init_sched_groups_power(i, sd); 7438 init_sched_groups_power(i, sd);
7488 } 7439 }
7489#endif 7440#endif
7490 7441
7491 for_each_cpu_mask(i, *cpu_map) { 7442 for_each_cpu_mask_nr(i, *cpu_map) {
7492 struct sched_domain *sd = &per_cpu(phys_domains, i); 7443 struct sched_domain *sd = &per_cpu(phys_domains, i);
7493 7444
7494 init_sched_groups_power(i, sd); 7445 init_sched_groups_power(i, sd);
@@ -7508,7 +7459,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map,
7508#endif 7459#endif
7509 7460
7510 /* Attach the domains */ 7461 /* Attach the domains */
7511 for_each_cpu_mask(i, *cpu_map) { 7462 for_each_cpu_mask_nr(i, *cpu_map) {
7512 struct sched_domain *sd; 7463 struct sched_domain *sd;
7513#ifdef CONFIG_SCHED_SMT 7464#ifdef CONFIG_SCHED_SMT
7514 sd = &per_cpu(cpu_domains, i); 7465 sd = &per_cpu(cpu_domains, i);
@@ -7553,18 +7504,6 @@ void __attribute__((weak)) arch_update_cpu_topology(void)
7553} 7504}
7554 7505
7555/* 7506/*
7556 * Free current domain masks.
7557 * Called after all cpus are attached to NULL domain.
7558 */
7559static void free_sched_domains(void)
7560{
7561 ndoms_cur = 0;
7562 if (doms_cur != &fallback_doms)
7563 kfree(doms_cur);
7564 doms_cur = &fallback_doms;
7565}
7566
7567/*
7568 * Set up scheduler domains and groups. Callers must hold the hotplug lock. 7507 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
7569 * For now this just excludes isolated cpus, but could be used to 7508 * For now this just excludes isolated cpus, but could be used to
7570 * exclude other special cases in the future. 7509 * exclude other special cases in the future.
@@ -7603,7 +7542,7 @@ static void detach_destroy_domains(const cpumask_t *cpu_map)
7603 7542
7604 unregister_sched_domain_sysctl(); 7543 unregister_sched_domain_sysctl();
7605 7544
7606 for_each_cpu_mask(i, *cpu_map) 7545 for_each_cpu_mask_nr(i, *cpu_map)
7607 cpu_attach_domain(NULL, &def_root_domain, i); 7546 cpu_attach_domain(NULL, &def_root_domain, i);
7608 synchronize_sched(); 7547 synchronize_sched();
7609 arch_destroy_sched_domains(cpu_map, &tmpmask); 7548 arch_destroy_sched_domains(cpu_map, &tmpmask);
@@ -7642,7 +7581,7 @@ static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
7642 * ownership of it and will kfree it when done with it. If the caller 7581 * ownership of it and will kfree it when done with it. If the caller
7643 * failed the kmalloc call, then it can pass in doms_new == NULL, 7582 * failed the kmalloc call, then it can pass in doms_new == NULL,
7644 * and partition_sched_domains() will fallback to the single partition 7583 * and partition_sched_domains() will fallback to the single partition
7645 * 'fallback_doms'. 7584 * 'fallback_doms', it also forces the domains to be rebuilt.
7646 * 7585 *
7647 * Call with hotplug lock held 7586 * Call with hotplug lock held
7648 */ 7587 */
@@ -7656,12 +7595,8 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
7656 /* always unregister in case we don't destroy any domains */ 7595 /* always unregister in case we don't destroy any domains */
7657 unregister_sched_domain_sysctl(); 7596 unregister_sched_domain_sysctl();
7658 7597
7659 if (doms_new == NULL) { 7598 if (doms_new == NULL)
7660 ndoms_new = 1; 7599 ndoms_new = 0;
7661 doms_new = &fallback_doms;
7662 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7663 dattr_new = NULL;
7664 }
7665 7600
7666 /* Destroy deleted domains */ 7601 /* Destroy deleted domains */
7667 for (i = 0; i < ndoms_cur; i++) { 7602 for (i = 0; i < ndoms_cur; i++) {
@@ -7676,6 +7611,14 @@ match1:
7676 ; 7611 ;
7677 } 7612 }
7678 7613
7614 if (doms_new == NULL) {
7615 ndoms_cur = 0;
7616 ndoms_new = 1;
7617 doms_new = &fallback_doms;
7618 cpus_andnot(doms_new[0], cpu_online_map, cpu_isolated_map);
7619 dattr_new = NULL;
7620 }
7621
7679 /* Build new domains */ 7622 /* Build new domains */
7680 for (i = 0; i < ndoms_new; i++) { 7623 for (i = 0; i < ndoms_new; i++) {
7681 for (j = 0; j < ndoms_cur; j++) { 7624 for (j = 0; j < ndoms_cur; j++) {
@@ -7706,17 +7649,10 @@ match2:
7706#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) 7649#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
7707int arch_reinit_sched_domains(void) 7650int arch_reinit_sched_domains(void)
7708{ 7651{
7709 int err;
7710
7711 get_online_cpus(); 7652 get_online_cpus();
7712 mutex_lock(&sched_domains_mutex); 7653 rebuild_sched_domains();
7713 detach_destroy_domains(&cpu_online_map);
7714 free_sched_domains();
7715 err = arch_init_sched_domains(&cpu_online_map);
7716 mutex_unlock(&sched_domains_mutex);
7717 put_online_cpus(); 7654 put_online_cpus();
7718 7655 return 0;
7719 return err;
7720} 7656}
7721 7657
7722static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt) 7658static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
@@ -7737,30 +7673,34 @@ static ssize_t sched_power_savings_store(const char *buf, size_t count, int smt)
7737} 7673}
7738 7674
7739#ifdef CONFIG_SCHED_MC 7675#ifdef CONFIG_SCHED_MC
7740static ssize_t sched_mc_power_savings_show(struct sys_device *dev, char *page) 7676static ssize_t sched_mc_power_savings_show(struct sysdev_class *class,
7677 char *page)
7741{ 7678{
7742 return sprintf(page, "%u\n", sched_mc_power_savings); 7679 return sprintf(page, "%u\n", sched_mc_power_savings);
7743} 7680}
7744static ssize_t sched_mc_power_savings_store(struct sys_device *dev, 7681static ssize_t sched_mc_power_savings_store(struct sysdev_class *class,
7745 const char *buf, size_t count) 7682 const char *buf, size_t count)
7746{ 7683{
7747 return sched_power_savings_store(buf, count, 0); 7684 return sched_power_savings_store(buf, count, 0);
7748} 7685}
7749static SYSDEV_ATTR(sched_mc_power_savings, 0644, sched_mc_power_savings_show, 7686static SYSDEV_CLASS_ATTR(sched_mc_power_savings, 0644,
7750 sched_mc_power_savings_store); 7687 sched_mc_power_savings_show,
7688 sched_mc_power_savings_store);
7751#endif 7689#endif
7752 7690
7753#ifdef CONFIG_SCHED_SMT 7691#ifdef CONFIG_SCHED_SMT
7754static ssize_t sched_smt_power_savings_show(struct sys_device *dev, char *page) 7692static ssize_t sched_smt_power_savings_show(struct sysdev_class *dev,
7693 char *page)
7755{ 7694{
7756 return sprintf(page, "%u\n", sched_smt_power_savings); 7695 return sprintf(page, "%u\n", sched_smt_power_savings);
7757} 7696}
7758static ssize_t sched_smt_power_savings_store(struct sys_device *dev, 7697static ssize_t sched_smt_power_savings_store(struct sysdev_class *dev,
7759 const char *buf, size_t count) 7698 const char *buf, size_t count)
7760{ 7699{
7761 return sched_power_savings_store(buf, count, 1); 7700 return sched_power_savings_store(buf, count, 1);
7762} 7701}
7763static SYSDEV_ATTR(sched_smt_power_savings, 0644, sched_smt_power_savings_show, 7702static SYSDEV_CLASS_ATTR(sched_smt_power_savings, 0644,
7703 sched_smt_power_savings_show,
7764 sched_smt_power_savings_store); 7704 sched_smt_power_savings_store);
7765#endif 7705#endif
7766 7706
@@ -7782,59 +7722,49 @@ int sched_create_sysfs_power_savings_entries(struct sysdev_class *cls)
7782} 7722}
7783#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ 7723#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
7784 7724
7725#ifndef CONFIG_CPUSETS
7785/* 7726/*
7786 * Force a reinitialization of the sched domains hierarchy. The domains 7727 * Add online and remove offline CPUs from the scheduler domains.
7787 * and groups cannot be updated in place without racing with the balancing 7728 * When cpusets are enabled they take over this function.
7788 * code, so we temporarily attach all running cpus to the NULL domain
7789 * which will prevent rebalancing while the sched domains are recalculated.
7790 */ 7729 */
7791static int update_sched_domains(struct notifier_block *nfb, 7730static int update_sched_domains(struct notifier_block *nfb,
7792 unsigned long action, void *hcpu) 7731 unsigned long action, void *hcpu)
7793{ 7732{
7733 switch (action) {
7734 case CPU_ONLINE:
7735 case CPU_ONLINE_FROZEN:
7736 case CPU_DEAD:
7737 case CPU_DEAD_FROZEN:
7738 partition_sched_domains(0, NULL, NULL);
7739 return NOTIFY_OK;
7740
7741 default:
7742 return NOTIFY_DONE;
7743 }
7744}
7745#endif
7746
7747static int update_runtime(struct notifier_block *nfb,
7748 unsigned long action, void *hcpu)
7749{
7794 int cpu = (int)(long)hcpu; 7750 int cpu = (int)(long)hcpu;
7795 7751
7796 switch (action) { 7752 switch (action) {
7797 case CPU_DOWN_PREPARE: 7753 case CPU_DOWN_PREPARE:
7798 case CPU_DOWN_PREPARE_FROZEN: 7754 case CPU_DOWN_PREPARE_FROZEN:
7799 disable_runtime(cpu_rq(cpu)); 7755 disable_runtime(cpu_rq(cpu));
7800 /* fall-through */
7801 case CPU_UP_PREPARE:
7802 case CPU_UP_PREPARE_FROZEN:
7803 detach_destroy_domains(&cpu_online_map);
7804 free_sched_domains();
7805 return NOTIFY_OK; 7756 return NOTIFY_OK;
7806 7757
7807
7808 case CPU_DOWN_FAILED: 7758 case CPU_DOWN_FAILED:
7809 case CPU_DOWN_FAILED_FROZEN: 7759 case CPU_DOWN_FAILED_FROZEN:
7810 case CPU_ONLINE: 7760 case CPU_ONLINE:
7811 case CPU_ONLINE_FROZEN: 7761 case CPU_ONLINE_FROZEN:
7812 enable_runtime(cpu_rq(cpu)); 7762 enable_runtime(cpu_rq(cpu));
7813 /* fall-through */ 7763 return NOTIFY_OK;
7814 case CPU_UP_CANCELED: 7764
7815 case CPU_UP_CANCELED_FROZEN:
7816 case CPU_DEAD:
7817 case CPU_DEAD_FROZEN:
7818 /*
7819 * Fall through and re-initialise the domains.
7820 */
7821 break;
7822 default: 7765 default:
7823 return NOTIFY_DONE; 7766 return NOTIFY_DONE;
7824 } 7767 }
7825
7826#ifndef CONFIG_CPUSETS
7827 /*
7828 * Create default domain partitioning if cpusets are disabled.
7829 * Otherwise we let cpusets rebuild the domains based on the
7830 * current setup.
7831 */
7832
7833 /* The hotplug lock is already held by cpu_up/cpu_down */
7834 arch_init_sched_domains(&cpu_online_map);
7835#endif
7836
7837 return NOTIFY_OK;
7838} 7768}
7839 7769
7840void __init sched_init_smp(void) 7770void __init sched_init_smp(void)
@@ -7854,8 +7784,15 @@ void __init sched_init_smp(void)
7854 cpu_set(smp_processor_id(), non_isolated_cpus); 7784 cpu_set(smp_processor_id(), non_isolated_cpus);
7855 mutex_unlock(&sched_domains_mutex); 7785 mutex_unlock(&sched_domains_mutex);
7856 put_online_cpus(); 7786 put_online_cpus();
7787
7788#ifndef CONFIG_CPUSETS
7857 /* XXX: Theoretical race here - CPU may be hotplugged now */ 7789 /* XXX: Theoretical race here - CPU may be hotplugged now */
7858 hotcpu_notifier(update_sched_domains, 0); 7790 hotcpu_notifier(update_sched_domains, 0);
7791#endif
7792
7793 /* RT runtime code needs to handle some hotplug events */
7794 hotcpu_notifier(update_runtime, 0);
7795
7859 init_hrtick(); 7796 init_hrtick();
7860 7797
7861 /* Move init over to a non-isolated CPU */ 7798 /* Move init over to a non-isolated CPU */