1 files changed, 56 insertions, 10 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1113157b2058..d9ba9d5f99d6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
+        struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
        struct sched_rt_entity *rt_se = rt_rq->rt_se;
-        if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
+        if (rt_rq->rt_nr_running) {
-                struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+                if (rt_se && !on_rt_rq(rt_se))
+                        enqueue_rt_entity(rt_se);
-                enqueue_rt_entity(rt_se);
                if (rt_rq->highest_prio < curr->prio)
                        resched_task(curr);
        }
@@ -231,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 #endif /* CONFIG_RT_GROUP_SCHED */
 #ifdef CONFIG_SMP
+/*
+ * We ran out of runtime, see if we can borrow some from our neighbours.
+ */
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                        continue;
                spin_lock(&iter->rt_runtime_lock);
+                /*
+                 * Either all rqs have inf runtime and there's nothing to steal
+                 * or __disable_runtime() below sets a specific rq to inf to
+                 * indicate its been disabled and disalow stealing.
+                 */
                if (iter->rt_runtime == RUNTIME_INF)
                        goto next;
+                /*
+                 * From runqueues with spare time, take 1/n part of their
+                 * spare time, but no more than our period.
+                 */
                diff = iter->rt_runtime - iter->rt_time;
                if (diff > 0) {
                        diff = div_u64((u64)diff, weight);
@@ -274,6 +286,9 @@ next:
        return more;
 }
+/*
+ * Ensure this RQ takes back all the runtime it lend to its neighbours.
+ */
 static void __disable_runtime(struct rq *rq)
 {
        struct root_domain *rd = rq->rd;
@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
                spin_lock(&rt_b->rt_runtime_lock);
                spin_lock(&rt_rq->rt_runtime_lock);
+                /*
+                 * Either we're all inf and nobody needs to borrow, or we're
+                 * already disabled and thus have nothing to do, or we have
+                 * exactly the right amount of runtime to take out.
+                 */
                if (rt_rq->rt_runtime == RUNTIME_INF ||
                                rt_rq->rt_runtime == rt_b->rt_runtime)
                        goto balanced;
                spin_unlock(&rt_rq->rt_runtime_lock);
+                /*
+                 * Calculate the difference between what we started out with
+                 * and what we current have, that's the amount of runtime
+                 * we lend and now have to reclaim.
+                 */
                want = rt_b->rt_runtime - rt_rq->rt_runtime;
+                /*
+                 * Greedy reclaim, take back as much as we can.
+                 */
                for_each_cpu_mask(i, rd->span) {
                        struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                        s64 diff;
+                        /*
+                         * Can't reclaim from ourselves or disabled runqueues.
+                         */
                        if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
                                continue;
@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
                }
                spin_lock(&rt_rq->rt_runtime_lock);
+                /*
+                 * We cannot be left wanting - that would mean some runtime
+                 * leaked out of the system.
+                 */
                BUG_ON(want);
 balanced:
+                /*
+                 * Disable all the borrow logic by pretending we have inf
+                 * runtime - in which case borrowing doesn't make sense.
+                 */
                rt_rq->rt_runtime = RUNTIME_INF;
                spin_unlock(&rt_rq->rt_runtime_lock);
                spin_unlock(&rt_b->rt_runtime_lock);
@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
        if (unlikely(!scheduler_running))
                return;
+        /*
+         * Reset each runqueue's bandwidth settings
+         */
        for_each_leaf_rt_rq(rt_rq, rq) {
                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
        int i, idle = 1;
        cpumask_t span;
-        if (rt_b->rt_runtime == RUNTIME_INF)
+        if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                return 1;
        span = sched_rt_period_mask();
@@ -484,9 +526,14 @@ static void update_curr_rt(struct rq *rq)
        schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
        curr->se.sum_exec_runtime += delta_exec;
+        account_group_exec_runtime(curr, delta_exec);
        curr->se.exec_start = rq->clock;
        cpuacct_charge(curr, delta_exec);
+        if (!rt_bandwidth_enabled())
+                return;
        for_each_sched_rt_entity(rt_se) {
                rt_rq = rt_rq_of_se(rt_se);
@@ -784,7 +831,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
 * Preempt the current task with a newly woken task if needed:
 */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
 {
        if (p->prio < rq->curr->prio) {
                resched_task(rq->curr);
@@ -1413,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
                p->rt.timeout++;
                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                if (p->rt.timeout > next)
-                        p->it_sched_expires = p->se.sum_exec_runtime;
+                        p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
        }
 }
@@ -1457,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
        .enqueue_task           = enqueue_task_rt,
        .dequeue_task           = dequeue_task_rt,
        .yield_task             = yield_task_rt,
-#ifdef CONFIG_SMP
-        .select_task_rq         = select_task_rq_rt,
-#endif /* CONFIG_SMP */
        .check_preempt_curr     = check_preempt_curr_rt,
@@ -1467,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
        .put_prev_task          = put_prev_task_rt,
 #ifdef CONFIG_SMP
+        .select_task_rq         = select_task_rq_rt,
        .load_balance           = load_balance_rt,
        .move_one_task          = move_one_task_rt,
        .set_cpus_allowed       = set_cpus_allowed_rt,

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1113157b2058..d9ba9d5f99d6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
102		102
103	static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)	103	static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
104	{	104	{
		105	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
105	struct sched_rt_entity *rt_se = rt_rq->rt_se;	106	struct sched_rt_entity *rt_se = rt_rq->rt_se;
106		107
107	if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {	108	if (rt_rq->rt_nr_running) {
108	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;	109	if (rt_se && !on_rt_rq(rt_se))
109		110	enqueue_rt_entity(rt_se);
110	enqueue_rt_entity(rt_se);
111	if (rt_rq->highest_prio < curr->prio)	111	if (rt_rq->highest_prio < curr->prio)
112	resched_task(curr);	112	resched_task(curr);
113	}	113	}
@@ -231,6 +231,9 @@ static inline struct rt_bandwidth sched_rt_bandwidth(struct rt_rq rt_rq)
231	#endif /* CONFIG_RT_GROUP_SCHED */	231	#endif /* CONFIG_RT_GROUP_SCHED */
232		232
233	#ifdef CONFIG_SMP	233	#ifdef CONFIG_SMP
		234	/*
		235	* We ran out of runtime, see if we can borrow some from our neighbours.
		236	*/
234	static int do_balance_runtime(struct rt_rq *rt_rq)	237	static int do_balance_runtime(struct rt_rq *rt_rq)
235	{	238	{
236	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);	239	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -250,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
250	continue;	253	continue;
251		254
252	spin_lock(&iter->rt_runtime_lock);	255	spin_lock(&iter->rt_runtime_lock);
		256	/*
		257	* Either all rqs have inf runtime and there's nothing to steal
		258	* or __disable_runtime() below sets a specific rq to inf to
		259	* indicate its been disabled and disalow stealing.
		260	*/
253	if (iter->rt_runtime == RUNTIME_INF)	261	if (iter->rt_runtime == RUNTIME_INF)
254	goto next;	262	goto next;
255		263
		264	/*
		265	* From runqueues with spare time, take 1/n part of their
		266	* spare time, but no more than our period.
		267	*/
256	diff = iter->rt_runtime - iter->rt_time;	268	diff = iter->rt_runtime - iter->rt_time;
257	if (diff > 0) {	269	if (diff > 0) {
258	diff = div_u64((u64)diff, weight);	270	diff = div_u64((u64)diff, weight);
@@ -274,6 +286,9 @@ next:
274	return more;	286	return more;
275	}	287	}
276		288
		289	/*
		290	* Ensure this RQ takes back all the runtime it lend to its neighbours.
		291	*/
277	static void __disable_runtime(struct rq *rq)	292	static void __disable_runtime(struct rq *rq)
278	{	293	{
279	struct root_domain *rd = rq->rd;	294	struct root_domain *rd = rq->rd;
@@ -289,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
289		304
290	spin_lock(&rt_b->rt_runtime_lock);	305	spin_lock(&rt_b->rt_runtime_lock);
291	spin_lock(&rt_rq->rt_runtime_lock);	306	spin_lock(&rt_rq->rt_runtime_lock);
		307	/*
		308	* Either we're all inf and nobody needs to borrow, or we're
		309	* already disabled and thus have nothing to do, or we have
		310	* exactly the right amount of runtime to take out.
		311	*/
292	if (rt_rq->rt_runtime == RUNTIME_INF \|\|	312	if (rt_rq->rt_runtime == RUNTIME_INF \|\|
293	rt_rq->rt_runtime == rt_b->rt_runtime)	313	rt_rq->rt_runtime == rt_b->rt_runtime)
294	goto balanced;	314	goto balanced;
295	spin_unlock(&rt_rq->rt_runtime_lock);	315	spin_unlock(&rt_rq->rt_runtime_lock);
296		316
		317	/*
		318	* Calculate the difference between what we started out with
		319	* and what we current have, that's the amount of runtime
		320	* we lend and now have to reclaim.
		321	*/
297	want = rt_b->rt_runtime - rt_rq->rt_runtime;	322	want = rt_b->rt_runtime - rt_rq->rt_runtime;
298		323
		324	/*
		325	* Greedy reclaim, take back as much as we can.
		326	*/
299	for_each_cpu_mask(i, rd->span) {	327	for_each_cpu_mask(i, rd->span) {
300	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);	328	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
301	s64 diff;	329	s64 diff;
302		330
		331	/*
		332	* Can't reclaim from ourselves or disabled runqueues.
		333	*/
303	if (iter == rt_rq \|\| iter->rt_runtime == RUNTIME_INF)	334	if (iter == rt_rq \|\| iter->rt_runtime == RUNTIME_INF)
304	continue;	335	continue;
305		336
@@ -319,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
319	}	350	}
320		351
321	spin_lock(&rt_rq->rt_runtime_lock);	352	spin_lock(&rt_rq->rt_runtime_lock);
		353	/*
		354	* We cannot be left wanting - that would mean some runtime
		355	* leaked out of the system.
		356	*/
322	BUG_ON(want);	357	BUG_ON(want);
323	balanced:	358	balanced:
		359	/*
		360	* Disable all the borrow logic by pretending we have inf
		361	* runtime - in which case borrowing doesn't make sense.
		362	*/
324	rt_rq->rt_runtime = RUNTIME_INF;	363	rt_rq->rt_runtime = RUNTIME_INF;
325	spin_unlock(&rt_rq->rt_runtime_lock);	364	spin_unlock(&rt_rq->rt_runtime_lock);
326	spin_unlock(&rt_b->rt_runtime_lock);	365	spin_unlock(&rt_b->rt_runtime_lock);
@@ -343,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
343	if (unlikely(!scheduler_running))	382	if (unlikely(!scheduler_running))
344	return;	383	return;
345		384
		385	/*
		386	* Reset each runqueue's bandwidth settings
		387	*/
346	for_each_leaf_rt_rq(rt_rq, rq) {	388	for_each_leaf_rt_rq(rt_rq, rq) {
347	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);	389	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
348		390
@@ -389,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
389	int i, idle = 1;	431	int i, idle = 1;
390	cpumask_t span;	432	cpumask_t span;
391		433
392	if (rt_b->rt_runtime == RUNTIME_INF)	434	if (!rt_bandwidth_enabled() \|\| rt_b->rt_runtime == RUNTIME_INF)
393	return 1;	435	return 1;
394		436
395	span = sched_rt_period_mask();	437	span = sched_rt_period_mask();
@@ -484,9 +526,14 @@ static void update_curr_rt(struct rq *rq)
484	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));	526	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
485		527
486	curr->se.sum_exec_runtime += delta_exec;	528	curr->se.sum_exec_runtime += delta_exec;
		529	account_group_exec_runtime(curr, delta_exec);
		530
487	curr->se.exec_start = rq->clock;	531	curr->se.exec_start = rq->clock;
488	cpuacct_charge(curr, delta_exec);	532	cpuacct_charge(curr, delta_exec);
489		533
		534	if (!rt_bandwidth_enabled())
		535	return;
		536
490	for_each_sched_rt_entity(rt_se) {	537	for_each_sched_rt_entity(rt_se) {
491	rt_rq = rt_rq_of_se(rt_se);	538	rt_rq = rt_rq_of_se(rt_se);
492		539
@@ -784,7 +831,7 @@ static void check_preempt_equal_prio(struct rq rq, struct task_struct p)
784	/*	831	/*
785	* Preempt the current task with a newly woken task if needed:	832	* Preempt the current task with a newly woken task if needed:
786	*/	833	*/
787	static void check_preempt_curr_rt(struct rq rq, struct task_struct p)	834	static void check_preempt_curr_rt(struct rq rq, struct task_struct p, int sync)
788	{	835	{
789	if (p->prio < rq->curr->prio) {	836	if (p->prio < rq->curr->prio) {
790	resched_task(rq->curr);	837	resched_task(rq->curr);
@@ -1413,7 +1460,7 @@ static void watchdog(struct rq rq, struct task_struct p)
1413	p->rt.timeout++;	1460	p->rt.timeout++;
1414	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);	1461	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1415	if (p->rt.timeout > next)	1462	if (p->rt.timeout > next)
1416	p->it_sched_expires = p->se.sum_exec_runtime;	1463	p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1417	}	1464	}
1418	}	1465	}
1419		1466
@@ -1457,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
1457	.enqueue_task = enqueue_task_rt,	1504	.enqueue_task = enqueue_task_rt,
1458	.dequeue_task = dequeue_task_rt,	1505	.dequeue_task = dequeue_task_rt,
1459	.yield_task = yield_task_rt,	1506	.yield_task = yield_task_rt,
1460	#ifdef CONFIG_SMP
1461	.select_task_rq = select_task_rq_rt,
1462	#endif /* CONFIG_SMP */
1463		1507
1464	.check_preempt_curr = check_preempt_curr_rt,	1508	.check_preempt_curr = check_preempt_curr_rt,
1465		1509
@@ -1467,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
1467	.put_prev_task = put_prev_task_rt,	1511	.put_prev_task = put_prev_task_rt,
1468		1512
1469	#ifdef CONFIG_SMP	1513	#ifdef CONFIG_SMP
		1514	.select_task_rq = select_task_rq_rt,
		1515
1470	.load_balance = load_balance_rt,	1516	.load_balance = load_balance_rt,
1471	.move_one_task = move_one_task_rt,	1517	.move_one_task = move_one_task_rt,
1472	.set_cpus_allowed = set_cpus_allowed_rt,	1518	.set_cpus_allowed = set_cpus_allowed_rt,