1 files changed, 62 insertions, 13 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 998ba54b4543..b446dc87494f 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
+        struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
        struct sched_rt_entity *rt_se = rt_rq->rt_se;
-        if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
+        if (rt_rq->rt_nr_running) {
-                struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
+                if (rt_se && !on_rt_rq(rt_se))
+                        enqueue_rt_entity(rt_se);
-                enqueue_rt_entity(rt_se);
                if (rt_rq->highest_prio < curr->prio)
                        resched_task(curr);
        }
@@ -199,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
 {
+        if (rt_rq->rt_nr_running)
+                resched_task(rq_of_rt_rq(rt_rq)->curr);
 }
 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -229,6 +231,9 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
 #endif /* CONFIG_RT_GROUP_SCHED */
 #ifdef CONFIG_SMP
+/*
+ * We ran out of runtime, see if we can borrow some from our neighbours.
+ */
 static int do_balance_runtime(struct rt_rq *rt_rq)
 {
        struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -248,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
                        continue;
                spin_lock(&iter->rt_runtime_lock);
+                /*
+                 * Either all rqs have inf runtime and there's nothing to steal
+                 * or __disable_runtime() below sets a specific rq to inf to
+                 * indicate its been disabled and disalow stealing.
+                 */
                if (iter->rt_runtime == RUNTIME_INF)
                        goto next;
+                /*
+                 * From runqueues with spare time, take 1/n part of their
+                 * spare time, but no more than our period.
+                 */
                diff = iter->rt_runtime - iter->rt_time;
                if (diff > 0) {
                        diff = div_u64((u64)diff, weight);
@@ -272,6 +286,9 @@ next:
        return more;
 }
+/*
+ * Ensure this RQ takes back all the runtime it lend to its neighbours.
+ */
 static void __disable_runtime(struct rq *rq)
 {
        struct root_domain *rd = rq->rd;
@@ -287,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
                spin_lock(&rt_b->rt_runtime_lock);
                spin_lock(&rt_rq->rt_runtime_lock);
+                /*
+                 * Either we're all inf and nobody needs to borrow, or we're
+                 * already disabled and thus have nothing to do, or we have
+                 * exactly the right amount of runtime to take out.
+                 */
                if (rt_rq->rt_runtime == RUNTIME_INF ||
                                rt_rq->rt_runtime == rt_b->rt_runtime)
                        goto balanced;
                spin_unlock(&rt_rq->rt_runtime_lock);
+                /*
+                 * Calculate the difference between what we started out with
+                 * and what we current have, that's the amount of runtime
+                 * we lend and now have to reclaim.
+                 */
                want = rt_b->rt_runtime - rt_rq->rt_runtime;
+                /*
+                 * Greedy reclaim, take back as much as we can.
+                 */
                for_each_cpu_mask(i, rd->span) {
                        struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
                        s64 diff;
+                        /*
+                         * Can't reclaim from ourselves or disabled runqueues.
+                         */
                        if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF)
                                continue;
@@ -317,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
                }
                spin_lock(&rt_rq->rt_runtime_lock);
+                /*
+                 * We cannot be left wanting - that would mean some runtime
+                 * leaked out of the system.
+                 */
                BUG_ON(want);
 balanced:
+                /*
+                 * Disable all the borrow logic by pretending we have inf
+                 * runtime - in which case borrowing doesn't make sense.
+                 */
                rt_rq->rt_runtime = RUNTIME_INF;
                spin_unlock(&rt_rq->rt_runtime_lock);
                spin_unlock(&rt_b->rt_runtime_lock);
@@ -341,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
        if (unlikely(!scheduler_running))
                return;
+        /*
+         * Reset each runqueue's bandwidth settings
+         */
        for_each_leaf_rt_rq(rt_rq, rq) {
                struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -348,6 +392,7 @@ static void __enable_runtime(struct rq *rq)
                spin_lock(&rt_rq->rt_runtime_lock);
                rt_rq->rt_runtime = rt_b->rt_runtime;
                rt_rq->rt_time = 0;
+                rt_rq->rt_throttled = 0;
                spin_unlock(&rt_rq->rt_runtime_lock);
                spin_unlock(&rt_b->rt_runtime_lock);
        }
@@ -386,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
        int i, idle = 1;
        cpumask_t span;
-        if (rt_b->rt_runtime == RUNTIME_INF)
+        if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
                return 1;
        span = sched_rt_period_mask();
@@ -438,9 +483,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 {
        u64 runtime = sched_rt_runtime(rt_rq);
-        if (runtime == RUNTIME_INF)
-                return 0;
        if (rt_rq->rt_throttled)
                return rt_rq_throttled(rt_rq);
@@ -484,16 +526,23 @@ static void update_curr_rt(struct rq *rq)
        schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
        curr->se.sum_exec_runtime += delta_exec;
+        account_group_exec_runtime(curr, delta_exec);
        curr->se.exec_start = rq->clock;
        cpuacct_charge(curr, delta_exec);
+        if (!rt_bandwidth_enabled())
+                return;
        for_each_sched_rt_entity(rt_se) {
                rt_rq = rt_rq_of_se(rt_se);
                spin_lock(&rt_rq->rt_runtime_lock);
-                rt_rq->rt_time += delta_exec;
+                if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
-                if (sched_rt_runtime_exceeded(rt_rq))
+                        rt_rq->rt_time += delta_exec;
-                        resched_task(curr);
+                        if (sched_rt_runtime_exceeded(rt_rq))
+                                resched_task(curr);
+                }
                spin_unlock(&rt_rq->rt_runtime_lock);
        }
 }
@@ -782,7 +831,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 /*
 * Preempt the current task with a newly woken task if needed:
 */
-static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
+static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync)
 {
        if (p->prio < rq->curr->prio) {
                resched_task(rq->curr);
@@ -1411,7 +1460,7 @@ static void watchdog(struct rq *rq, struct task_struct *p)
                p->rt.timeout++;
                next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
                if (p->rt.timeout > next)
-                        p->it_sched_expires = p->se.sum_exec_runtime;
+                        p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
        }
 }

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 998ba54b4543..b446dc87494f 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -102,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
102		102
103	static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)	103	static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
104	{	104	{
		105	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
105	struct sched_rt_entity *rt_se = rt_rq->rt_se;	106	struct sched_rt_entity *rt_se = rt_rq->rt_se;
106		107
107	if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {	108	if (rt_rq->rt_nr_running) {
108	struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;	109	if (rt_se && !on_rt_rq(rt_se))
109		110	enqueue_rt_entity(rt_se);
110	enqueue_rt_entity(rt_se);
111	if (rt_rq->highest_prio < curr->prio)	111	if (rt_rq->highest_prio < curr->prio)
112	resched_task(curr);	112	resched_task(curr);
113	}	113	}
@@ -199,6 +199,8 @@ static inline struct rt_rq group_rt_rq(struct sched_rt_entity rt_se)
199		199
200	static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)	200	static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
201	{	201	{
		202	if (rt_rq->rt_nr_running)
		203	resched_task(rq_of_rt_rq(rt_rq)->curr);
202	}	204	}
203		205
204	static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)	206	static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
@@ -229,6 +231,9 @@ static inline struct rt_bandwidth sched_rt_bandwidth(struct rt_rq rt_rq)
229	#endif /* CONFIG_RT_GROUP_SCHED */	231	#endif /* CONFIG_RT_GROUP_SCHED */
230		232
231	#ifdef CONFIG_SMP	233	#ifdef CONFIG_SMP
		234	/*
		235	* We ran out of runtime, see if we can borrow some from our neighbours.
		236	*/
232	static int do_balance_runtime(struct rt_rq *rt_rq)	237	static int do_balance_runtime(struct rt_rq *rt_rq)
233	{	238	{
234	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);	239	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
@@ -248,9 +253,18 @@ static int do_balance_runtime(struct rt_rq *rt_rq)
248	continue;	253	continue;
249		254
250	spin_lock(&iter->rt_runtime_lock);	255	spin_lock(&iter->rt_runtime_lock);
		256	/*
		257	* Either all rqs have inf runtime and there's nothing to steal
		258	* or __disable_runtime() below sets a specific rq to inf to
		259	* indicate its been disabled and disalow stealing.
		260	*/
251	if (iter->rt_runtime == RUNTIME_INF)	261	if (iter->rt_runtime == RUNTIME_INF)
252	goto next;	262	goto next;
253		263
		264	/*
		265	* From runqueues with spare time, take 1/n part of their
		266	* spare time, but no more than our period.
		267	*/
254	diff = iter->rt_runtime - iter->rt_time;	268	diff = iter->rt_runtime - iter->rt_time;
255	if (diff > 0) {	269	if (diff > 0) {
256	diff = div_u64((u64)diff, weight);	270	diff = div_u64((u64)diff, weight);
@@ -272,6 +286,9 @@ next:
272	return more;	286	return more;
273	}	287	}
274		288
		289	/*
		290	* Ensure this RQ takes back all the runtime it lend to its neighbours.
		291	*/
275	static void __disable_runtime(struct rq *rq)	292	static void __disable_runtime(struct rq *rq)
276	{	293	{
277	struct root_domain *rd = rq->rd;	294	struct root_domain *rd = rq->rd;
@@ -287,17 +304,33 @@ static void __disable_runtime(struct rq *rq)
287		304
288	spin_lock(&rt_b->rt_runtime_lock);	305	spin_lock(&rt_b->rt_runtime_lock);
289	spin_lock(&rt_rq->rt_runtime_lock);	306	spin_lock(&rt_rq->rt_runtime_lock);
		307	/*
		308	* Either we're all inf and nobody needs to borrow, or we're
		309	* already disabled and thus have nothing to do, or we have
		310	* exactly the right amount of runtime to take out.
		311	*/
290	if (rt_rq->rt_runtime == RUNTIME_INF \|\|	312	if (rt_rq->rt_runtime == RUNTIME_INF \|\|
291	rt_rq->rt_runtime == rt_b->rt_runtime)	313	rt_rq->rt_runtime == rt_b->rt_runtime)
292	goto balanced;	314	goto balanced;
293	spin_unlock(&rt_rq->rt_runtime_lock);	315	spin_unlock(&rt_rq->rt_runtime_lock);
294		316
		317	/*
		318	* Calculate the difference between what we started out with
		319	* and what we current have, that's the amount of runtime
		320	* we lend and now have to reclaim.
		321	*/
295	want = rt_b->rt_runtime - rt_rq->rt_runtime;	322	want = rt_b->rt_runtime - rt_rq->rt_runtime;
296		323
		324	/*
		325	* Greedy reclaim, take back as much as we can.
		326	*/
297	for_each_cpu_mask(i, rd->span) {	327	for_each_cpu_mask(i, rd->span) {
298	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);	328	struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
299	s64 diff;	329	s64 diff;
300		330
		331	/*
		332	* Can't reclaim from ourselves or disabled runqueues.
		333	*/
301	if (iter == rt_rq \|\| iter->rt_runtime == RUNTIME_INF)	334	if (iter == rt_rq \|\| iter->rt_runtime == RUNTIME_INF)
302	continue;	335	continue;
303		336
@@ -317,8 +350,16 @@ static void __disable_runtime(struct rq *rq)
317	}	350	}
318		351
319	spin_lock(&rt_rq->rt_runtime_lock);	352	spin_lock(&rt_rq->rt_runtime_lock);
		353	/*
		354	* We cannot be left wanting - that would mean some runtime
		355	* leaked out of the system.
		356	*/
320	BUG_ON(want);	357	BUG_ON(want);
321	balanced:	358	balanced:
		359	/*
		360	* Disable all the borrow logic by pretending we have inf
		361	* runtime - in which case borrowing doesn't make sense.
		362	*/
322	rt_rq->rt_runtime = RUNTIME_INF;	363	rt_rq->rt_runtime = RUNTIME_INF;
323	spin_unlock(&rt_rq->rt_runtime_lock);	364	spin_unlock(&rt_rq->rt_runtime_lock);
324	spin_unlock(&rt_b->rt_runtime_lock);	365	spin_unlock(&rt_b->rt_runtime_lock);
@@ -341,6 +382,9 @@ static void __enable_runtime(struct rq *rq)
341	if (unlikely(!scheduler_running))	382	if (unlikely(!scheduler_running))
342	return;	383	return;
343		384
		385	/*
		386	* Reset each runqueue's bandwidth settings
		387	*/
344	for_each_leaf_rt_rq(rt_rq, rq) {	388	for_each_leaf_rt_rq(rt_rq, rq) {
345	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);	389	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
346		390
@@ -348,6 +392,7 @@ static void __enable_runtime(struct rq *rq)
348	spin_lock(&rt_rq->rt_runtime_lock);	392	spin_lock(&rt_rq->rt_runtime_lock);
349	rt_rq->rt_runtime = rt_b->rt_runtime;	393	rt_rq->rt_runtime = rt_b->rt_runtime;
350	rt_rq->rt_time = 0;	394	rt_rq->rt_time = 0;
		395	rt_rq->rt_throttled = 0;
351	spin_unlock(&rt_rq->rt_runtime_lock);	396	spin_unlock(&rt_rq->rt_runtime_lock);
352	spin_unlock(&rt_b->rt_runtime_lock);	397	spin_unlock(&rt_b->rt_runtime_lock);
353	}	398	}
@@ -386,7 +431,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
386	int i, idle = 1;	431	int i, idle = 1;
387	cpumask_t span;	432	cpumask_t span;
388		433
389	if (rt_b->rt_runtime == RUNTIME_INF)	434	if (!rt_bandwidth_enabled() \|\| rt_b->rt_runtime == RUNTIME_INF)
390	return 1;	435	return 1;
391		436
392	span = sched_rt_period_mask();	437	span = sched_rt_period_mask();
@@ -438,9 +483,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
438	{	483	{
439	u64 runtime = sched_rt_runtime(rt_rq);	484	u64 runtime = sched_rt_runtime(rt_rq);
440		485
441	if (runtime == RUNTIME_INF)
442	return 0;
443
444	if (rt_rq->rt_throttled)	486	if (rt_rq->rt_throttled)
445	return rt_rq_throttled(rt_rq);	487	return rt_rq_throttled(rt_rq);
446		488
@@ -484,16 +526,23 @@ static void update_curr_rt(struct rq *rq)
484	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));	526	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
485		527
486	curr->se.sum_exec_runtime += delta_exec;	528	curr->se.sum_exec_runtime += delta_exec;
		529	account_group_exec_runtime(curr, delta_exec);
		530
487	curr->se.exec_start = rq->clock;	531	curr->se.exec_start = rq->clock;
488	cpuacct_charge(curr, delta_exec);	532	cpuacct_charge(curr, delta_exec);
489		533
		534	if (!rt_bandwidth_enabled())
		535	return;
		536
490	for_each_sched_rt_entity(rt_se) {	537	for_each_sched_rt_entity(rt_se) {
491	rt_rq = rt_rq_of_se(rt_se);	538	rt_rq = rt_rq_of_se(rt_se);
492		539
493	spin_lock(&rt_rq->rt_runtime_lock);	540	spin_lock(&rt_rq->rt_runtime_lock);
494	rt_rq->rt_time += delta_exec;	541	if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
495	if (sched_rt_runtime_exceeded(rt_rq))	542	rt_rq->rt_time += delta_exec;
496	resched_task(curr);	543	if (sched_rt_runtime_exceeded(rt_rq))
		544	resched_task(curr);
		545	}
497	spin_unlock(&rt_rq->rt_runtime_lock);	546	spin_unlock(&rt_rq->rt_runtime_lock);
498	}	547	}
499	}	548	}
@@ -782,7 +831,7 @@ static void check_preempt_equal_prio(struct rq rq, struct task_struct p)
782	/*	831	/*
783	* Preempt the current task with a newly woken task if needed:	832	* Preempt the current task with a newly woken task if needed:
784	*/	833	*/
785	static void check_preempt_curr_rt(struct rq rq, struct task_struct p)	834	static void check_preempt_curr_rt(struct rq rq, struct task_struct p, int sync)
786	{	835	{
787	if (p->prio < rq->curr->prio) {	836	if (p->prio < rq->curr->prio) {
788	resched_task(rq->curr);	837	resched_task(rq->curr);
@@ -1411,7 +1460,7 @@ static void watchdog(struct rq rq, struct task_struct p)
1411	p->rt.timeout++;	1460	p->rt.timeout++;
1412	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);	1461	next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ);
1413	if (p->rt.timeout > next)	1462	if (p->rt.timeout > next)
1414	p->it_sched_expires = p->se.sum_exec_runtime;	1463	p->cputime_expires.sched_exp = p->se.sum_exec_runtime;
1415	}	1464	}
1416	}	1465	}
1417		1466