1 files changed, 121 insertions, 96 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 927c9307cd00..669c49aa57f0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
        rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
 }
+static inline int rt_bandwidth_enabled(void)
+{
+        return sysctl_sched_rt_runtime >= 0;
+}
 static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
        ktime_t now;
-        if (rt_b->rt_runtime == RUNTIME_INF)
+        if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
                return;
        if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
 static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
 static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
 #endif /* CONFIG_RT_GROUP_SCHED */
-#else /* !CONFIG_FAIR_GROUP_SCHED */
+#else /* !CONFIG_USER_SCHED */
 #define root_task_group init_task_group
-#endif /* CONFIG_FAIR_GROUP_SCHED */
+#endif /* CONFIG_USER_SCHED */
 /* task_group_lock serializes add/remove of task groups and also changes to
 * a task group's cpu shares.
@@ -1087,7 +1092,7 @@ hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
        return NOTIFY_DONE;
 }
-static void init_hrtick(void)
+static __init void init_hrtick(void)
 {
        hotcpu_notifier(hotplug_hrtick, 0);
 }
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
        update_load_sub(&rq->load, load);
 }
-#ifdef CONFIG_SMP
+#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) || defined(CONFIG_RT_GROUP_SCHED)
-static unsigned long source_load(int cpu, int type);
+typedef int (*tg_visitor)(struct task_group *, void *);
-static unsigned long target_load(int cpu, int type);
-static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
-static unsigned long cpu_avg_load_per_task(int cpu)
-{
-        struct rq *rq = cpu_rq(cpu);
-        if (rq->nr_running)
-                rq->avg_load_per_task = rq->load.weight / rq->nr_running;
-        return rq->avg_load_per_task;
-}
-#ifdef CONFIG_FAIR_GROUP_SCHED
-typedef void (*tg_visitor)(struct task_group *, int, struct sched_domain *);
 /*
 * Iterate the full tree, calling @down when first entering a node and @up when
 * leaving it for the final time.
 */
-static void
+static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
-walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
 {
        struct task_group *parent, *child;
+        int ret;
        rcu_read_lock();
        parent = &root_task_group;
 down:
-        (*down)(parent, cpu, sd);
+        ret = (*down)(parent, data);
+        if (ret)
+                goto out_unlock;
        list_for_each_entry_rcu(child, &parent->children, siblings) {
                parent = child;
                goto down;
@@ -1419,15 +1410,43 @@ down:
 up:
                continue;
        }
-        (*up)(parent, cpu, sd);
+        ret = (*up)(parent, data);
+        if (ret)
+                goto out_unlock;
        child = parent;
        parent = parent->parent;
        if (parent)
                goto up;
+out_unlock:
        rcu_read_unlock();
+        return ret;
 }
+static int tg_nop(struct task_group *tg, void *data)
+{
+        return 0;
+}
+#endif
+#ifdef CONFIG_SMP
+static unsigned long source_load(int cpu, int type);
+static unsigned long target_load(int cpu, int type);
+static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd);
+static unsigned long cpu_avg_load_per_task(int cpu)
+{
+        struct rq *rq = cpu_rq(cpu);
+        if (rq->nr_running)
+                rq->avg_load_per_task = rq->load.weight / rq->nr_running;
+        return rq->avg_load_per_task;
+}
+#ifdef CONFIG_FAIR_GROUP_SCHED
 static void __set_se_shares(struct sched_entity *se, unsigned long shares);
 /*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
 * This needs to be done in a bottom-up fashion because the rq weight of a
 * parent group depends on the shares of its child groups.
 */
-static void
+static int tg_shares_up(struct task_group *tg, void *data)
-tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
 {
        unsigned long rq_weight = 0;
        unsigned long shares = 0;
+        struct sched_domain *sd = data;
        int i;
        for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
                __update_group_shares_cpu(tg, i, shares, rq_weight);
                spin_unlock_irqrestore(&rq->lock, flags);
        }
+        return 0;
 }
 /*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group *tg, int cpu, struct sched_domain *sd)
 * This needs to be done in a top-down fashion because the load of a child
 * group is a fraction of its parents load.
 */
-static void
+static int tg_load_down(struct task_group *tg, void *data)
-tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
 {
        unsigned long load;
+        long cpu = (long)data;
        if (!tg->parent) {
                load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group *tg, int cpu, struct sched_domain *sd)
        }
        tg->cfs_rq[cpu]->h_load = load;
-}
-static void
+        return 0;
-tg_nop(struct task_group *tg, int cpu, struct sched_domain *sd)
-{
 }
 static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
        if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
                sd->last_update = now;
-                walk_tg_tree(tg_nop, tg_shares_up, 0, sd);
+                walk_tg_tree(tg_nop, tg_shares_up, sd);
        }
 }
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq *rq, struct sched_domain *sd)
        spin_lock(&rq->lock);
 }
-static void update_h_load(int cpu)
+static void update_h_load(long cpu)
 {
-        walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);
+        walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
 }
 #else
@@ -5171,7 +5189,8 @@ recheck:
                 * Do not allow realtime tasks into groups that have no runtime
                 * assigned.
                 */
-                if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)
+                if (rt_bandwidth_enabled() && rt_policy(policy) &&
+                                task_group(p)->rt_bandwidth.rt_runtime == 0)
                        return -EPERM;
 #endif
@@ -8808,73 +8827,77 @@ static DEFINE_MUTEX(rt_constraints_mutex);
 static unsigned long to_ratio(u64 period, u64 runtime)
 {
        if (runtime == RUNTIME_INF)
-                return 1ULL << 16;
+                return 1ULL << 20;
-        return div64_u64(runtime << 16, period);
+        return div64_u64(runtime << 20, period);
 }
-#ifdef CONFIG_CGROUP_SCHED
+/* Must be called with tasklist_lock held */
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
+static inline int tg_has_rt_tasks(struct task_group *tg)
 {
-        struct task_group *tgi, *parent = tg->parent;
+        struct task_struct *g, *p;
-        unsigned long total = 0;
-        if (!parent) {
+        do_each_thread(g, p) {
-                if (global_rt_period() < period)
+                if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
-                        return 0;
+                        return 1;
+        } while_each_thread(g, p);
-                return to_ratio(period, runtime) <
+        return 0;
-                        to_ratio(global_rt_period(), global_rt_runtime());
+}
-        }
-        if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)
+struct rt_schedulable_data {
-                return 0;
+        struct task_group *tg;
+        u64 rt_period;
+        u64 rt_runtime;
+};
-        rcu_read_lock();
+static int tg_schedulable(struct task_group *tg, void *data)
-        list_for_each_entry_rcu(tgi, &parent->children, siblings) {
+{
-                if (tgi == tg)
+        struct rt_schedulable_data *d = data;
-                        continue;
+        struct task_group *child;
+        unsigned long total, sum = 0;
+        u64 period, runtime;
-                total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
+        period = ktime_to_ns(tg->rt_bandwidth.rt_period);
-                                tgi->rt_bandwidth.rt_runtime);
+        runtime = tg->rt_bandwidth.rt_runtime;
+        if (tg == d->tg) {
+                period = d->rt_period;
+                runtime = d->rt_runtime;
        }
-        rcu_read_unlock();
-        return total + to_ratio(period, runtime) <=
+        if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
-                to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),
+                return -EBUSY;
-                                parent->rt_bandwidth.rt_runtime);
-}
-#elif defined CONFIG_USER_SCHED
-static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-{
-        struct task_group *tgi;
-        unsigned long total = 0;
-        unsigned long global_ratio =
-                to_ratio(global_rt_period(), global_rt_runtime());
-        rcu_read_lock();
+        total = to_ratio(period, runtime);
-        list_for_each_entry_rcu(tgi, &task_groups, list) {
-                if (tgi == tg)
+        list_for_each_entry_rcu(child, &tg->children, siblings) {
-                        continue;
+                period = ktime_to_ns(child->rt_bandwidth.rt_period);
+                runtime = child->rt_bandwidth.rt_runtime;
-                total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),
+                if (child == d->tg) {
-                                tgi->rt_bandwidth.rt_runtime);
+                        period = d->rt_period;
+                        runtime = d->rt_runtime;
+                }
+                sum += to_ratio(period, runtime);
        }
-        rcu_read_unlock();
-        return total + to_ratio(period, runtime) < global_ratio;
+        if (sum > total)
+                return -EINVAL;
+        return 0;
 }
-#endif
-/* Must be called with tasklist_lock held */
+static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
-static inline int tg_has_rt_tasks(struct task_group *tg)
 {
-        struct task_struct *g, *p;
+        struct rt_schedulable_data data = {
-        do_each_thread(g, p) {
+                .tg = tg,
-                if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
+                .rt_period = period,
-                        return 1;
+                .rt_runtime = runtime,
-        } while_each_thread(g, p);
+        };
-        return 0;
+        return walk_tg_tree(tg_schedulable, tg_nop, &data);
 }
 static int tg_set_bandwidth(struct task_group *tg,
@@ -8884,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg,
        mutex_lock(&rt_constraints_mutex);
        read_lock(&tasklist_lock);
-        if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
+        err = __rt_schedulable(tg, rt_period, rt_runtime);
-                err = -EBUSY;
+        if (err)
                goto unlock;
-        }
-        if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
-                err = -EINVAL;
-                goto unlock;
-        }
        spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
        tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8964,12 +8982,16 @@ static int sched_rt_global_constraints(void)
        u64 rt_runtime, rt_period;
        int ret = 0;
+        if (sysctl_sched_rt_period <= 0)
+                return -EINVAL;
        rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
        rt_runtime = tg->rt_bandwidth.rt_runtime;
        mutex_lock(&rt_constraints_mutex);
-        if (!__rt_schedulable(tg, rt_period, rt_runtime))
+        read_lock(&tasklist_lock);
-                ret = -EINVAL;
+        ret = __rt_schedulable(tg, rt_period, rt_runtime);
+        read_unlock(&tasklist_lock);
        mutex_unlock(&rt_constraints_mutex);
        return ret;
@@ -8980,6 +9002,9 @@ static int sched_rt_global_constraints(void)
        unsigned long flags;
        int i;
+        if (sysctl_sched_rt_period <= 0)
+                return -EINVAL;
        spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
        for_each_possible_cpu(i) {
                struct rt_rq *rt_rq = &cpu_rq(i)->rt;

diff --git a/kernel/sched.c b/kernel/sched.c index 927c9307cd00..669c49aa57f0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -204,11 +204,16 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
204	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;	204	rt_b->rt_period_timer.cb_mode = HRTIMER_CB_IRQSAFE_NO_SOFTIRQ;
205	}	205	}
206		206
		207	static inline int rt_bandwidth_enabled(void)
		208	{
		209	return sysctl_sched_rt_runtime >= 0;
		210	}
		211
207	static void start_rt_bandwidth(struct rt_bandwidth *rt_b)	212	static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
208	{	213	{
209	ktime_t now;	214	ktime_t now;
210		215
211	if (rt_b->rt_runtime == RUNTIME_INF)	216	if (rt_bandwidth_enabled() && rt_b->rt_runtime == RUNTIME_INF)
212	return;	217	return;
213		218
214	if (hrtimer_active(&rt_b->rt_period_timer))	219	if (hrtimer_active(&rt_b->rt_period_timer))
@@ -298,9 +303,9 @@ static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
298	static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);	303	static DEFINE_PER_CPU(struct sched_rt_entity, init_sched_rt_entity);
299	static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;	304	static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
300	#endif /* CONFIG_RT_GROUP_SCHED */	305	#endif /* CONFIG_RT_GROUP_SCHED */
301	#else /* !CONFIG_FAIR_GROUP_SCHED */	306	#else /* !CONFIG_USER_SCHED */
302	#define root_task_group init_task_group	307	#define root_task_group init_task_group
303	#endif /* CONFIG_FAIR_GROUP_SCHED */	308	#endif /* CONFIG_USER_SCHED */
304		309
305	/* task_group_lock serializes add/remove of task groups and also changes to	310	/* task_group_lock serializes add/remove of task groups and also changes to
306	* a task group's cpu shares.	311	* a task group's cpu shares.
@@ -1087,7 +1092,7 @@ hotplug_hrtick(struct notifier_block nfb, unsigned long action, void hcpu)
1087	return NOTIFY_DONE;	1092	return NOTIFY_DONE;
1088	}	1093	}
1089		1094
1090	static void init_hrtick(void)	1095	static __init void init_hrtick(void)
1091	{	1096	{
1092	hotcpu_notifier(hotplug_hrtick, 0);	1097	hotcpu_notifier(hotplug_hrtick, 0);
1093	}	1098	}
@@ -1380,38 +1385,24 @@ static inline void dec_cpu_load(struct rq *rq, unsigned long load)
1380	update_load_sub(&rq->load, load);	1385	update_load_sub(&rq->load, load);
1381	}	1386	}
1382		1387
1383	#ifdef CONFIG_SMP	1388	#if (defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)) \|\| defined(CONFIG_RT_GROUP_SCHED)
1384	static unsigned long source_load(int cpu, int type);	1389	typedef int (tg_visitor)(struct task_group , void *);
1385	static unsigned long target_load(int cpu, int type);
1386	static int task_hot(struct task_struct p, u64 now, struct sched_domain sd);
1387
1388	static unsigned long cpu_avg_load_per_task(int cpu)
1389	{
1390	struct rq *rq = cpu_rq(cpu);
1391
1392	if (rq->nr_running)
1393	rq->avg_load_per_task = rq->load.weight / rq->nr_running;
1394
1395	return rq->avg_load_per_task;
1396	}
1397
1398	#ifdef CONFIG_FAIR_GROUP_SCHED
1399
1400	typedef void (tg_visitor)(struct task_group , int, struct sched_domain *);
1401		1390
1402	/*	1391	/*
1403	* Iterate the full tree, calling @down when first entering a node and @up when	1392	* Iterate the full tree, calling @down when first entering a node and @up when
1404	* leaving it for the final time.	1393	* leaving it for the final time.
1405	*/	1394	*/
1406	static void	1395	static int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
1407	walk_tg_tree(tg_visitor down, tg_visitor up, int cpu, struct sched_domain *sd)
1408	{	1396	{
1409	struct task_group parent, child;	1397	struct task_group parent, child;
		1398	int ret;
1410		1399
1411	rcu_read_lock();	1400	rcu_read_lock();
1412	parent = &root_task_group;	1401	parent = &root_task_group;
1413	down:	1402	down:
1414	(*down)(parent, cpu, sd);	1403	ret = (*down)(parent, data);
		1404	if (ret)
		1405	goto out_unlock;
1415	list_for_each_entry_rcu(child, &parent->children, siblings) {	1406	list_for_each_entry_rcu(child, &parent->children, siblings) {
1416	parent = child;	1407	parent = child;
1417	goto down;	1408	goto down;
@@ -1419,15 +1410,43 @@ down:
1419	up:	1410	up:
1420	continue;	1411	continue;
1421	}	1412	}
1422	(*up)(parent, cpu, sd);	1413	ret = (*up)(parent, data);
		1414	if (ret)
		1415	goto out_unlock;
1423		1416
1424	child = parent;	1417	child = parent;
1425	parent = parent->parent;	1418	parent = parent->parent;
1426	if (parent)	1419	if (parent)
1427	goto up;	1420	goto up;
		1421	out_unlock:
1428	rcu_read_unlock();	1422	rcu_read_unlock();
		1423
		1424	return ret;
1429	}	1425	}
1430		1426
		1427	static int tg_nop(struct task_group tg, void data)
		1428	{
		1429	return 0;
		1430	}
		1431	#endif
		1432
		1433	#ifdef CONFIG_SMP
		1434	static unsigned long source_load(int cpu, int type);
		1435	static unsigned long target_load(int cpu, int type);
		1436	static int task_hot(struct task_struct p, u64 now, struct sched_domain sd);
		1437
		1438	static unsigned long cpu_avg_load_per_task(int cpu)
		1439	{
		1440	struct rq *rq = cpu_rq(cpu);
		1441
		1442	if (rq->nr_running)
		1443	rq->avg_load_per_task = rq->load.weight / rq->nr_running;
		1444
		1445	return rq->avg_load_per_task;
		1446	}
		1447
		1448	#ifdef CONFIG_FAIR_GROUP_SCHED
		1449
1431	static void __set_se_shares(struct sched_entity *se, unsigned long shares);	1450	static void __set_se_shares(struct sched_entity *se, unsigned long shares);
1432		1451
1433	/*	1452	/*
@@ -1486,11 +1505,11 @@ __update_group_shares_cpu(struct task_group *tg, int cpu,
1486	* This needs to be done in a bottom-up fashion because the rq weight of a	1505	* This needs to be done in a bottom-up fashion because the rq weight of a
1487	* parent group depends on the shares of its child groups.	1506	* parent group depends on the shares of its child groups.
1488	*/	1507	*/
1489	static void	1508	static int tg_shares_up(struct task_group tg, void data)
1490	tg_shares_up(struct task_group tg, int cpu, struct sched_domain sd)
1491	{	1509	{
1492	unsigned long rq_weight = 0;	1510	unsigned long rq_weight = 0;
1493	unsigned long shares = 0;	1511	unsigned long shares = 0;
		1512	struct sched_domain *sd = data;
1494	int i;	1513	int i;
1495		1514
1496	for_each_cpu_mask(i, sd->span) {	1515	for_each_cpu_mask(i, sd->span) {
@@ -1515,6 +1534,8 @@ tg_shares_up(struct task_group tg, int cpu, struct sched_domain sd)
1515	__update_group_shares_cpu(tg, i, shares, rq_weight);	1534	__update_group_shares_cpu(tg, i, shares, rq_weight);
1516	spin_unlock_irqrestore(&rq->lock, flags);	1535	spin_unlock_irqrestore(&rq->lock, flags);
1517	}	1536	}
		1537
		1538	return 0;
1518	}	1539	}
1519		1540
1520	/*	1541	/*
@@ -1522,10 +1543,10 @@ tg_shares_up(struct task_group tg, int cpu, struct sched_domain sd)
1522	* This needs to be done in a top-down fashion because the load of a child	1543	* This needs to be done in a top-down fashion because the load of a child
1523	* group is a fraction of its parents load.	1544	* group is a fraction of its parents load.
1524	*/	1545	*/
1525	static void	1546	static int tg_load_down(struct task_group tg, void data)
1526	tg_load_down(struct task_group tg, int cpu, struct sched_domain sd)
1527	{	1547	{
1528	unsigned long load;	1548	unsigned long load;
		1549	long cpu = (long)data;
1529		1550
1530	if (!tg->parent) {	1551	if (!tg->parent) {
1531	load = cpu_rq(cpu)->load.weight;	1552	load = cpu_rq(cpu)->load.weight;
@@ -1536,11 +1557,8 @@ tg_load_down(struct task_group tg, int cpu, struct sched_domain sd)
1536	}	1557	}
1537		1558
1538	tg->cfs_rq[cpu]->h_load = load;	1559	tg->cfs_rq[cpu]->h_load = load;
1539	}
1540		1560
1541	static void	1561	return 0;
1542	tg_nop(struct task_group tg, int cpu, struct sched_domain sd)
1543	{
1544	}	1562	}
1545		1563
1546	static void update_shares(struct sched_domain *sd)	1564	static void update_shares(struct sched_domain *sd)
@@ -1550,7 +1568,7 @@ static void update_shares(struct sched_domain *sd)
1550		1568
1551	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {	1569	if (elapsed >= (s64)(u64)sysctl_sched_shares_ratelimit) {
1552	sd->last_update = now;	1570	sd->last_update = now;
1553	walk_tg_tree(tg_nop, tg_shares_up, 0, sd);	1571	walk_tg_tree(tg_nop, tg_shares_up, sd);
1554	}	1572	}
1555	}	1573	}
1556		1574
@@ -1561,9 +1579,9 @@ static void update_shares_locked(struct rq rq, struct sched_domain sd)
1561	spin_lock(&rq->lock);	1579	spin_lock(&rq->lock);
1562	}	1580	}
1563		1581
1564	static void update_h_load(int cpu)	1582	static void update_h_load(long cpu)
1565	{	1583	{
1566	walk_tg_tree(tg_load_down, tg_nop, cpu, NULL);	1584	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);
1567	}	1585	}
1568		1586
1569	#else	1587	#else
@@ -5171,7 +5189,8 @@ recheck:
5171	* Do not allow realtime tasks into groups that have no runtime	5189	* Do not allow realtime tasks into groups that have no runtime
5172	* assigned.	5190	* assigned.
5173	*/	5191	*/
5174	if (rt_policy(policy) && task_group(p)->rt_bandwidth.rt_runtime == 0)	5192	if (rt_bandwidth_enabled() && rt_policy(policy) &&
		5193	task_group(p)->rt_bandwidth.rt_runtime == 0)
5175	return -EPERM;	5194	return -EPERM;
5176	#endif	5195	#endif
5177		5196
@@ -8808,73 +8827,77 @@ static DEFINE_MUTEX(rt_constraints_mutex);
8808	static unsigned long to_ratio(u64 period, u64 runtime)	8827	static unsigned long to_ratio(u64 period, u64 runtime)
8809	{	8828	{
8810	if (runtime == RUNTIME_INF)	8829	if (runtime == RUNTIME_INF)
8811	return 1ULL << 16;	8830	return 1ULL << 20;
8812		8831
8813	return div64_u64(runtime << 16, period);	8832	return div64_u64(runtime << 20, period);
8814	}	8833	}
8815		8834
8816	#ifdef CONFIG_CGROUP_SCHED	8835	/* Must be called with tasklist_lock held */
8817	static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)	8836	static inline int tg_has_rt_tasks(struct task_group *tg)
8818	{	8837	{
8819	struct task_group tgi, parent = tg->parent;	8838	struct task_struct g, p;
8820	unsigned long total = 0;
8821		8839
8822	if (!parent) {	8840	do_each_thread(g, p) {
8823	if (global_rt_period() < period)	8841	if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)
8824	return 0;	8842	return 1;
		8843	} while_each_thread(g, p);
8825		8844
8826	return to_ratio(period, runtime) <	8845	return 0;
8827	to_ratio(global_rt_period(), global_rt_runtime());	8846	}
8828	}
8829		8847
8830	if (ktime_to_ns(parent->rt_bandwidth.rt_period) < period)	8848	struct rt_schedulable_data {
8831	return 0;	8849	struct task_group *tg;
		8850	u64 rt_period;
		8851	u64 rt_runtime;
		8852	};
8832		8853
8833	rcu_read_lock();	8854	static int tg_schedulable(struct task_group tg, void data)
8834	list_for_each_entry_rcu(tgi, &parent->children, siblings) {	8855	{
8835	if (tgi == tg)	8856	struct rt_schedulable_data *d = data;
8836	continue;	8857	struct task_group *child;
		8858	unsigned long total, sum = 0;
		8859	u64 period, runtime;
8837		8860
8838	total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),	8861	period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8839	tgi->rt_bandwidth.rt_runtime);	8862	runtime = tg->rt_bandwidth.rt_runtime;
		8863
		8864	if (tg == d->tg) {
		8865	period = d->rt_period;
		8866	runtime = d->rt_runtime;
8840	}	8867	}
8841	rcu_read_unlock();
8842		8868
8843	return total + to_ratio(period, runtime) <=	8869	if (rt_bandwidth_enabled() && !runtime && tg_has_rt_tasks(tg))
8844	to_ratio(ktime_to_ns(parent->rt_bandwidth.rt_period),	8870	return -EBUSY;
8845	parent->rt_bandwidth.rt_runtime);
8846	}
8847	#elif defined CONFIG_USER_SCHED
8848	static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8849	{
8850	struct task_group *tgi;
8851	unsigned long total = 0;
8852	unsigned long global_ratio =
8853	to_ratio(global_rt_period(), global_rt_runtime());
8854		8871
8855	rcu_read_lock();	8872	total = to_ratio(period, runtime);
8856	list_for_each_entry_rcu(tgi, &task_groups, list) {	8873
8857	if (tgi == tg)	8874	list_for_each_entry_rcu(child, &tg->children, siblings) {
8858	continue;	8875	period = ktime_to_ns(child->rt_bandwidth.rt_period);
		8876	runtime = child->rt_bandwidth.rt_runtime;
8859		8877
8860	total += to_ratio(ktime_to_ns(tgi->rt_bandwidth.rt_period),	8878	if (child == d->tg) {
8861	tgi->rt_bandwidth.rt_runtime);	8879	period = d->rt_period;
		8880	runtime = d->rt_runtime;
		8881	}
		8882
		8883	sum += to_ratio(period, runtime);
8862	}	8884	}
8863	rcu_read_unlock();
8864		8885
8865	return total + to_ratio(period, runtime) < global_ratio;	8886	if (sum > total)
		8887	return -EINVAL;
		8888
		8889	return 0;
8866	}	8890	}
8867	#endif
8868		8891
8869	/* Must be called with tasklist_lock held */	8892	static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
8870	static inline int tg_has_rt_tasks(struct task_group *tg)
8871	{	8893	{
8872	struct task_struct g, p;	8894	struct rt_schedulable_data data = {
8873	do_each_thread(g, p) {	8895	.tg = tg,
8874	if (rt_task(p) && rt_rq_of_se(&p->rt)->tg == tg)	8896	.rt_period = period,
8875	return 1;	8897	.rt_runtime = runtime,
8876	} while_each_thread(g, p);	8898	};
8877	return 0;	8899
		8900	return walk_tg_tree(tg_schedulable, tg_nop, &data);
8878	}	8901	}
8879		8902
8880	static int tg_set_bandwidth(struct task_group *tg,	8903	static int tg_set_bandwidth(struct task_group *tg,
@@ -8884,14 +8907,9 @@ static int tg_set_bandwidth(struct task_group *tg,
8884		8907
8885	mutex_lock(&rt_constraints_mutex);	8908	mutex_lock(&rt_constraints_mutex);
8886	read_lock(&tasklist_lock);	8909	read_lock(&tasklist_lock);
8887	if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {	8910	err = __rt_schedulable(tg, rt_period, rt_runtime);
8888	err = -EBUSY;	8911	if (err)
8889	goto unlock;	8912	goto unlock;
8890	}
8891	if (!__rt_schedulable(tg, rt_period, rt_runtime)) {
8892	err = -EINVAL;
8893	goto unlock;
8894	}
8895		8913
8896	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);	8914	spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
8897	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);	8915	tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
@@ -8964,12 +8982,16 @@ static int sched_rt_global_constraints(void)
8964	u64 rt_runtime, rt_period;	8982	u64 rt_runtime, rt_period;
8965	int ret = 0;	8983	int ret = 0;
8966		8984
		8985	if (sysctl_sched_rt_period <= 0)
		8986	return -EINVAL;
		8987
8967	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);	8988	rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period);
8968	rt_runtime = tg->rt_bandwidth.rt_runtime;	8989	rt_runtime = tg->rt_bandwidth.rt_runtime;
8969		8990
8970	mutex_lock(&rt_constraints_mutex);	8991	mutex_lock(&rt_constraints_mutex);
8971	if (!__rt_schedulable(tg, rt_period, rt_runtime))	8992	read_lock(&tasklist_lock);
8972	ret = -EINVAL;	8993	ret = __rt_schedulable(tg, rt_period, rt_runtime);
		8994	read_unlock(&tasklist_lock);
8973	mutex_unlock(&rt_constraints_mutex);	8995	mutex_unlock(&rt_constraints_mutex);
8974		8996
8975	return ret;	8997	return ret;
@@ -8980,6 +9002,9 @@ static int sched_rt_global_constraints(void)
8980	unsigned long flags;	9002	unsigned long flags;
8981	int i;	9003	int i;
8982		9004
		9005	if (sysctl_sched_rt_period <= 0)
		9006	return -EINVAL;
		9007
8983	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);	9008	spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
8984	for_each_possible_cpu(i) {	9009	for_each_possible_cpu(i) {
8985	struct rt_rq *rt_rq = &cpu_rq(i)->rt;	9010	struct rt_rq *rt_rq = &cpu_rq(i)->rt;