aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c217
1 files changed, 121 insertions, 96 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 927c9307cd00..669c49aa57f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
204 rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ; 204 rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
205} 205}
206 206
207static inline int rt_bandwidth_enabled(void)
208{
209 return sysctl_sched_rt_runtime >= 0;
210}
211
207static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 212static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
208{ 213{
209 ktime_t now; 214 ktime_t now;
210 215
211 if (rt_b->rt_runtime == RUNTIME_INF) 216 if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
212 return; 217 return;
213 218
214 if (hrtimer_active(&rt_b->rt_period_timer)) 219 if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
298static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity); 303static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
299static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp; 304static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
300#endif /* CONFIG_RT_GROUP_SCHED */ 305#endif /* CONFIG_RT_GROUP_SCHED */
301#else /* !CONFIG_FAIR_GROUP_SCHED */ 306#else /* !CONFIG_USER_SCHED */
302#define root_task_group init_task_group 307#define root_task_group init_task_group
303#endif /* CONFIG_FAIR_GROUP_SCHED */ 308#endif /* CONFIG_USER_SCHED */
304 309
305/* task_group_lock serializes add/remove of task groups and also changes to 310/* task_group_lock serializes add/remove of task groups and also changes to
306 * a task group's cpu shares. 311 * a task group's cpu shares.
@@ -1087,7 +1092,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
1087 return NOTIFY_DONE; 1092 return NOTIFY_DONE;
1088} 1093}
1089 1094
1090static void init_hrtick(void) 1095static __init void init_hrtick(void)
1091{ 1096{
1092 hotcpu_notifier(hotplug_hrtick, 0); 1097 hotcpu_notifier(hotplug_hrtick, 0);
1093} 1098}
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1380 update_load_sub(&rq->load, load); 1385 update_load_sub(&rq->load, load);
1381} 1386}
1382 1387
1383#ifdef CONFIG_SMP 1388#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
1384static unsigned long source_load(int cpu, int type); 1389typedef int (*tg_visitor)(struct task_group *, void *);
1385static unsigned long target_load(int cpu, int type);
1386static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1387
1388static unsigned long cpu_avg_load_per_task(int cpu)
1389{
1390 struct rq *rq = cpu_rq(cpu);
1391
1392 if (rq->nr_running)
1393 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1394
1395 return rq->avg_load_per_task;
1396}
1397
1398#ifdef CONFIG_FAIR_GROUP_SCHED
1399
1400typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
1401 1390
1402/* 1391/*
1403 * Iterate the full tree, calling @down when first entering a node and @up when 1392 * Iterate the full tree, calling @down when first entering a node and @up when
1404 * leaving it for the final time. 1393 * leaving it for the final time.
1405 */ 1394 */
1406static void 1395static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
1407walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
1408{ 1396{
1409 struct task_group *parent, *child; 1397 struct task_group *parent, *child;
1398 int ret;
1410 1399
1411 rcu_read_lock(); 1400 rcu_read_lock();
1412 parent = &root_task_group; 1401 parent = &root_task_group;
1413down: 1402down:
1414 (*down)(parent, cpu, sd); 1403 ret = (*down)(parent, data);
1404 if (ret)
1405 goto out_unlock;
1415 list_for_each_entry_rcu(child, &parent->children, siblings) { 1406 list_for_each_entry_rcu(child, &parent->children, siblings) {
1416 parent = child; 1407 parent = child;
1417 goto down; 1408 goto down;
@@ -1419,15 +1410,43 @@ down:
1419up: 1410up:
1420 continue; 1411 continue;
1421 } 1412 }
1422 (*up)(parent, cpu, sd); 1413 ret = (*up)(parent, data);
1414 if (ret)
1415 goto out_unlock;
1423 1416
1424 child = parent; 1417 child = parent;
1425 parent = parent->parent; 1418 parent = parent->parent;
1426 if (parent) 1419 if (parent)
1427 goto up; 1420 goto up;
1421out_unlock:
1428 rcu_read_unlock(); 1422 rcu_read_unlock();
1423
1424 return ret;
1429} 1425}
1430 1426
1427static int tg_nop(struct task_group *tg, void *data)
1428{
1429 return 0;
1430}
1431#endif
1432
1433#ifdef CONFIG_SMP
1434static unsigned long source_load(int cpu, int type);
1435static unsigned long target_load(int cpu, int type);
1436static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
1437
1438static unsigned long cpu_avg_load_per_task(int cpu)
1439{
1440 struct rq *rq = cpu_rq(cpu);
1441
1442 if (rq->nr_running)
1443 rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1444
1445 return rq->avg_load_per_task;
1446}
1447
1448#ifdef CONFIG_FAIR_GROUP_SCHED
1449
1431static void __set_se_shares(struct sched_entity *se, unsigned long shares); 1450static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1432 1451
1433/* 1452/*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1486 * This needs to be done in a bottom-up fashion because the rq weight of a 1505 * This needs to be done in a bottom-up fashion because the rq weight of a
1487 * parent group depends on the shares of its child groups. 1506 * parent group depends on the shares of its child groups.
1488 */ 1507 */
1489static void 1508static int tg_shares_up(struct task_group *tg, void *data)
1490tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
1491{ 1509{
1492 unsigned long rq_weight = 0; 1510 unsigned long rq_weight = 0;
1493 unsigned long shares = 0; 1511 unsigned long shares = 0;
1512 struct sched_domain *sd = data;
1494 int i; 1513 int i;
1495 1514
1496 for_each_cpu_mask(i, sd->span) { 1515 for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
1515 __update_group_shares_cpu(tg, i, shares, rq_weight); 1534 __update_group_shares_cpu(tg, i, shares, rq_weight);
1516 spin_unlock_irqrestore(&rq->lock, flags); 1535 spin_unlock_irqrestore(&rq->lock, flags);
1517 } 1536 }
1537
1538 return 0;
1518} 1539}
1519 1540
1520/* 1541/*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
1522 * This needs to be done in a top-down fashion because the load of a child 1543 * This needs to be done in a top-down fashion because the load of a child
1523 * group is a fraction of its parents load. 1544 * group is a fraction of its parents load.
1524 */ 1545 */
1525static void 1546static int tg_load_down(struct task_group *tg, void *data)
1526tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
1527{ 1547{
1528 unsigned long load; 1548 unsigned long load;
1549 long cpu = (long)data;
1529 1550
1530 if (!tg->parent) { 1551 if (!tg->parent) {
1531 load = cpu_rq(cpu)->load.weight; 1552 load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
1536 } 1557 }
1537 1558
1538 tg->cfs_rq[cpu]->h_load = load; 1559 tg->cfs_rq[cpu]->h_load = load;
1539}
1540 1560
1541static void 1561 return 0;
1542tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
1543{
1544} 1562}
1545 1563
1546static void update_shares(struct sched_domain *sd) 1564static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
1550 1568
1551 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) { 1569 if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
1552 sd->last_update = now; 1570 sd->last_update = now;
1553 walk_tg_tree(tg_nop, tg_shares_up, 0, sd); 1571 walk_tg_tree(tg_nop, tg_shares_up, sd);
1554 } 1572 }
1555} 1573}
1556 1574
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
1561 spin_lock(&rq->lock); 1579 spin_lock(&rq->lock);
1562} 1580}
1563 1581
1564static void update_h_load(int cpu) 1582static void update_h_load(long cpu)
1565{ 1583{
1566 walk_tg_tree(tg_load_down, tg_nop, cpu, NULL); 1584 walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1567} 1585}
1568 1586
1569#else 1587#else
@@ -5171,7 +5189,8 @@ recheck:
5171 * Do not allow realtime tasks into groups that have no runtime 5189 * Do not allow realtime tasks into groups that have no runtime
5172 * assigned. 5190 * assigned.
5173 */ 5191 */
5174 if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0) 5192 if (rt_bandwidth_enabled() && rt_policy(policy) &&
5193 task_group(p)->rt_bandwidth.rt_runtime == 0)
5175 return -EPERM; 5194 return -EPERM;
5176#endif 5195#endif
5177 5196
@@ -8808,73 +8827,77 @@ static DEFINE_MUTEX(rt_constraints_mutex);
8808static unsigned long to_ratio(u64 period, u64 runtime) 8827static unsigned long to_ratio(u64 period, u64 runtime)
8809{ 8828{
8810 if (runtime == RUNTIME_INF) 8829 if (runtime == RUNTIME_INF)
8811 return 1ULL << 16; 8830 return 1ULL << 20;
8812 8831
8813 return div64_u64(runtime << 16, period); 8832 return div64_u64(runtime << 20, period);
8814} 8833}
8815 8834
8816#ifdef CONFIG_CGROUP_SCHED 8835/* Must be called with tasklist_lock held */
8817static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 8836static inline int tg_has_rt_tasks(struct task_group *tg)
8818{ 8837{
8819 struct task_group *tgi, *parent = tg->parent; 8838 struct task_struct *g, *p;
8820 unsigned long total = 0;
8821 8839
8822 if (!parent) { 8840 do_each_thread(g, p) {
8823 if (global_rt_period() < period) 8841 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
8824 return 0; 8842 return 1;
8843 } while_each_thread(g, p);
8825 8844
8826 return to_ratio(period, runtime) < 8845 return 0;
8827 to_ratio(global_rt_period(), global_rt_runtime()); 8846}
8828 }
8829 8847
8830 if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period) 8848struct rt_schedulable_data {
8831 return 0; 8849 struct task_group *tg;
8850 u64 rt_period;
8851 u64 rt_runtime;
8852};
8832 8853
8833 rcu_read_lock(); 8854static int tg_schedulable(struct task_group *tg, void *data)
8834 list_for_each_entry_rcu(tgi, &parent->children, siblings) { 8855{
8835 if (tgi == tg) 8856 struct rt_schedulable_data *d = data;
8836 continue; 8857 struct task_group *child;
8858 unsigned long total, sum = 0;
8859 u64 period, runtime;
8837 8860
8838 total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), 8861 period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8839 tgi->rt_bandwidth.rt_runtime); 8862 runtime = tg->rt_bandwidth.rt_runtime;
8863
8864 if (tg == d->tg) {
8865 period = d->rt_period;
8866 runtime = d->rt_runtime;
8840 } 8867 }
8841 rcu_read_unlock();
8842 8868
8843 return total + to_ratio(period, runtime) <= 8869 if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
8844 to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period), 8870 return -EBUSY;
8845 parent->rt_bandwidth.rt_runtime);
8846}
8847#elif defined CONFIG_USER_SCHED
8848static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8849{
8850 struct task_group *tgi;
8851 unsigned long total = 0;
8852 unsigned long global_ratio =
8853 to_ratio(global_rt_period(), global_rt_runtime());
8854 8871
8855 rcu_read_lock(); 8872 total = to_ratio(period, runtime);
8856 list_for_each_entry_rcu(tgi, &task_groups, list) { 8873
8857 if (tgi == tg) 8874 list_for_each_entry_rcu(child, &tg->children, siblings) {
8858 continue; 8875 period = ktime_to_ns(child->rt_bandwidth.rt_period);
8876 runtime = child->rt_bandwidth.rt_runtime;
8859 8877
8860 total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period), 8878 if (child == d->tg) {
8861 tgi->rt_bandwidth.rt_runtime); 8879 period = d->rt_period;
8880 runtime = d->rt_runtime;
8881 }
8882
8883 sum += to_ratio(period, runtime);
8862 } 8884 }
8863 rcu_read_unlock();
8864 8885
8865 return total + to_ratio(period, runtime) < global_ratio; 8886 if (sum > total)
8887 return -EINVAL;
8888
8889 return 0;
8866} 8890}
8867#endif
8868 8891
8869/* Must be called with tasklist_lock held */ 8892static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8870static inline int tg_has_rt_tasks(struct task_group *tg)
8871{ 8893{
8872 struct task_struct *g, *p; 8894 struct rt_schedulable_data data = {
8873 do_each_thread(g, p) { 8895 .tg = tg,
8874 if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg) 8896 .rt_period = period,
8875 return 1; 8897 .rt_runtime = runtime,
8876 } while_each_thread(g, p); 8898 };
8877 return 0; 8899
8900 return walk_tg_tree(tg_schedulable, tg_nop, &data);
8878} 8901}
8879 8902
8880static int tg_set_bandwidth(struct task_group *tg, 8903static int tg_set_bandwidth(struct task_group *tg,
@@ -8884,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg,
8884 8907
8885 mutex_lock(&rt_constraints_mutex); 8908 mutex_lock(&rt_constraints_mutex);
8886 read_lock(&tasklist_lock); 8909 read_lock(&tasklist_lock);
8887 if (rt_runtime == 0 && tg_has_rt_tasks(tg)) { 8910 err = __rt_schedulable(tg, rt_period, rt_runtime);
8888 err = -EBUSY; 8911 if (err)
8889 goto unlock; 8912 goto unlock;
8890 }
8891 if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
8892 err = -EINVAL;
8893 goto unlock;
8894 }
8895 8913
8896 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 8914 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
8897 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 8915 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8964,12 +8982,16 @@ static int sched_rt_global_constraints(void)
8964 u64 rt_runtime, rt_period; 8982 u64 rt_runtime, rt_period;
8965 int ret = 0; 8983 int ret = 0;
8966 8984
8985 if (sysctl_sched_rt_period <= 0)
8986 return -EINVAL;
8987
8967 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); 8988 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8968 rt_runtime = tg->rt_bandwidth.rt_runtime; 8989 rt_runtime = tg->rt_bandwidth.rt_runtime;
8969 8990
8970 mutex_lock(&rt_constraints_mutex); 8991 mutex_lock(&rt_constraints_mutex);
8971 if (!__rt_schedulable(tg, rt_period, rt_runtime)) 8992 read_lock(&tasklist_lock);
8972 ret = -EINVAL; 8993 ret = __rt_schedulable(tg, rt_period, rt_runtime);
8994 read_unlock(&tasklist_lock);
8973 mutex_unlock(&rt_constraints_mutex); 8995 mutex_unlock(&rt_constraints_mutex);
8974 8996
8975 return ret; 8997 return ret;
@@ -8980,6 +9002,9 @@ static int sched_rt_global_constraints(void)
8980 unsigned long flags; 9002 unsigned long flags;
8981 int i; 9003 int i;
8982 9004
9005 if (sysctl_sched_rt_period <= 0)
9006 return -EINVAL;
9007
8983 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 9008 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
8984 for_each_possible_cpu(i) { 9009 for_each_possible_cpu(i) {
8985 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 9010 struct rt_rq *rt_rq = &cpu_rq(i)->rt;