sched/deadline: Fix deadline parameter modification handling

Commit 67dfa1b756f2 ("sched/deadline: Implement cancel_dl_timer() to use in switched_from_dl()") removed the hrtimer_try_cancel() function call out from init_dl_task_timer(), which gets called from __setparam_dl(). The result is that we can now re-init the timer while its active -- this is bad and corrupts timer state. Furthermore; changing the parameters of an active deadline task is tricky in that you want to maintain guarantees, while immediately effective change would allow one to circumvent the CBS guarantees -- this too is bad, as one (bad) task should not be able to affect the others. Rework things to avoid both problems. We only need to initialize the timer once, so move that to __sched_fork() for new tasks. Then make sure __setparam_dl() doesn't affect the current running state but only updates the parameters used to calculate the next scheduling period -- this guarantees the CBS functions as expected (albeit slightly pessimistic). This however means we need to make sure __dl_clear_params() needs to reset the active state otherwise new (and tasks flipping between classes) will not properly (re)compute their first instance. Todo: close class flipping CBS hole. Todo: implement delayed BW release. Reported-by: Luca Abeni <luca.abeni@unitn.it> Acked-by: Juri Lelli <juri.lelli@arm.com> Tested-by: Luca Abeni <luca.abeni@unitn.it> Fixes: 67dfa1b756f2 ("sched/deadline: Implement cancel_dl_timer() to use in switched_from_dl()") Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: <stable@vger.kernel.org> Cc: Kirill Tkhai <tkhai@yandex.ru> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/20150128140803.GF23038@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Peter Zijlstra <peterz@infradead.org> 2015-01-28 09:08:03 -0500
committer: Ingo Molnar <mingo@kernel.org> 2015-02-04 01:42:48 -0500
commit: 40767b0dc768060266d261b4a330164b4be53f7c (patch)
tree: 9a6bfc63f4ad74053eee60243edd22683386ce4d /kernel/sched
parent: 3e87523897e18a3e17fc8955ed795188be737ff1 (diff)
2 files changed, 30 insertions, 6 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c86687d22b3..9e838095beb8 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1814,6 +1814,10 @@ void __dl_clear_params(struct task_struct *p)
        dl_se->dl_period = 0;
        dl_se->flags = 0;
        dl_se->dl_bw = 0;
+        dl_se->dl_throttled = 0;
+        dl_se->dl_new = 1;
+        dl_se->dl_yielded = 0;
 }
 /*
@@ -1839,7 +1843,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
        RB_CLEAR_NODE(&p->dl.rb_node);
-        hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+        init_dl_task_timer(&p->dl);
        __dl_clear_params(p);
        INIT_LIST_HEAD(&p->rt.run_list);
@@ -2049,6 +2053,9 @@ static inline int dl_bw_cpus(int i)
 * allocated bandwidth to reflect the new situation.
 *
 * This function is called while holding p's rq->lock.
+ *
+ * XXX we should delay bw change until the task's 0-lag point, see
+ * __setparam_dl().
 */
 static int dl_overflow(struct task_struct *p, int policy,
                       const struct sched_attr *attr)
@@ -3251,15 +3258,31 @@ __setparam_dl(struct task_struct *p, const struct sched_attr *attr)
 {
        struct sched_dl_entity *dl_se = &p->dl;
-        init_dl_task_timer(dl_se);
        dl_se->dl_runtime = attr->sched_runtime;
        dl_se->dl_deadline = attr->sched_deadline;
        dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
        dl_se->flags = attr->sched_flags;
        dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
-        dl_se->dl_throttled = 0;
-        dl_se->dl_new = 1;
+        /*
-        dl_se->dl_yielded = 0;
+         * Changing the parameters of a task is 'tricky' and we're not doing
+         * the correct thing -- also see task_dead_dl() and switched_from_dl().
+         *
+         * What we SHOULD do is delay the bandwidth release until the 0-lag
+         * point. This would include retaining the task_struct until that time
+         * and change dl_overflow() to not immediately decrement the current
+         * amount.
+         *
+         * Instead we retain the current runtime/deadline and let the new
+         * parameters take effect after the current reservation period lapses.
+         * This is safe (albeit pessimistic) because the 0-lag point is always
+         * before the current scheduling deadline.
+         *
+         * We can still have temporary overloads because we do not delay the
+         * change in bandwidth until that time; so admission control is
+         * not on the safe side. It does however guarantee tasks will never
+         * consume more than promised.
+         */
 }
 /*
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index b52092f2636d..726470d47f87 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1094,6 +1094,7 @@ static void task_dead_dl(struct task_struct *p)
         * Since we are TASK_DEAD we won't slip out of the domain!
         */
        raw_spin_lock_irq(&dl_b->lock);
+        /* XXX we should retain the bw until 0-lag */
        dl_b->total_bw -= p->dl.dl_bw;
        raw_spin_unlock_irq(&dl_b->lock);
@@ -1614,8 +1615,8 @@ static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
 static void switched_from_dl(struct rq *rq, struct task_struct *p)
 {
+        /* XXX we should retain the bw until 0-lag */
        cancel_dl_timer(rq, p);
        __dl_clear_params(p);
        /*
author	Peter Zijlstra <peterz@infradead.org>	2015-01-28 09:08:03 -0500
committer	Ingo Molnar <mingo@kernel.org>	2015-02-04 01:42:48 -0500
commit	40767b0dc768060266d261b4a330164b4be53f7c (patch)
tree	9a6bfc63f4ad74053eee60243edd22683386ce4d /kernel/sched
parent	3e87523897e18a3e17fc8955ed795188be737ff1 (diff)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5c86687d22b3..9e838095beb8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -1814,6 +1814,10 @@ void __dl_clear_params(struct task_struct *p)
1814	dl_se->dl_period = 0;	1814	dl_se->dl_period = 0;
1815	dl_se->flags = 0;	1815	dl_se->flags = 0;
1816	dl_se->dl_bw = 0;	1816	dl_se->dl_bw = 0;
		1817
		1818	dl_se->dl_throttled = 0;
		1819	dl_se->dl_new = 1;
		1820	dl_se->dl_yielded = 0;
1817	}	1821	}
1818		1822
1819	/*	1823	/*
@@ -1839,7 +1843,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
1839	#endif	1843	#endif
1840		1844
1841	RB_CLEAR_NODE(&p->dl.rb_node);	1845	RB_CLEAR_NODE(&p->dl.rb_node);
1842	hrtimer_init(&p->dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);	1846	init_dl_task_timer(&p->dl);
1843	__dl_clear_params(p);	1847	__dl_clear_params(p);
1844		1848
1845	INIT_LIST_HEAD(&p->rt.run_list);	1849	INIT_LIST_HEAD(&p->rt.run_list);
@@ -2049,6 +2053,9 @@ static inline int dl_bw_cpus(int i)
2049	* allocated bandwidth to reflect the new situation.	2053	* allocated bandwidth to reflect the new situation.
2050	*	2054	*
2051	* This function is called while holding p's rq->lock.	2055	* This function is called while holding p's rq->lock.
		2056	*
		2057	* XXX we should delay bw change until the task's 0-lag point, see
		2058	* __setparam_dl().
2052	*/	2059	*/
2053	static int dl_overflow(struct task_struct *p, int policy,	2060	static int dl_overflow(struct task_struct *p, int policy,
2054	const struct sched_attr *attr)	2061	const struct sched_attr *attr)
@@ -3251,15 +3258,31 @@ __setparam_dl(struct task_struct p, const struct sched_attr attr)
3251	{	3258	{
3252	struct sched_dl_entity *dl_se = &p->dl;	3259	struct sched_dl_entity *dl_se = &p->dl;
3253		3260
3254	init_dl_task_timer(dl_se);
3255	dl_se->dl_runtime = attr->sched_runtime;	3261	dl_se->dl_runtime = attr->sched_runtime;
3256	dl_se->dl_deadline = attr->sched_deadline;	3262	dl_se->dl_deadline = attr->sched_deadline;
3257	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;	3263	dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline;
3258	dl_se->flags = attr->sched_flags;	3264	dl_se->flags = attr->sched_flags;
3259	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);	3265	dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime);
3260	dl_se->dl_throttled = 0;	3266
3261	dl_se->dl_new = 1;	3267	/*
3262	dl_se->dl_yielded = 0;	3268	* Changing the parameters of a task is 'tricky' and we're not doing
		3269	* the correct thing -- also see task_dead_dl() and switched_from_dl().
		3270	*
		3271	* What we SHOULD do is delay the bandwidth release until the 0-lag
		3272	* point. This would include retaining the task_struct until that time
		3273	* and change dl_overflow() to not immediately decrement the current
		3274	* amount.
		3275	*
		3276	* Instead we retain the current runtime/deadline and let the new
		3277	* parameters take effect after the current reservation period lapses.
		3278	* This is safe (albeit pessimistic) because the 0-lag point is always
		3279	* before the current scheduling deadline.
		3280	*
		3281	* We can still have temporary overloads because we do not delay the
		3282	* change in bandwidth until that time; so admission control is
		3283	* not on the safe side. It does however guarantee tasks will never
		3284	* consume more than promised.
		3285	*/
3263	}	3286	}
3264		3287
3265	/*	3288	/*


diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index b52092f2636d..726470d47f87 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c
@@ -1094,6 +1094,7 @@ static void task_dead_dl(struct task_struct *p)
1094	* Since we are TASK_DEAD we won't slip out of the domain!	1094	* Since we are TASK_DEAD we won't slip out of the domain!
1095	*/	1095	*/
1096	raw_spin_lock_irq(&dl_b->lock);	1096	raw_spin_lock_irq(&dl_b->lock);
		1097	/* XXX we should retain the bw until 0-lag */
1097	dl_b->total_bw -= p->dl.dl_bw;	1098	dl_b->total_bw -= p->dl.dl_bw;
1098	raw_spin_unlock_irq(&dl_b->lock);	1099	raw_spin_unlock_irq(&dl_b->lock);
1099		1100
@@ -1614,8 +1615,8 @@ static void cancel_dl_timer(struct rq rq, struct task_struct p)
1614		1615
1615	static void switched_from_dl(struct rq rq, struct task_struct p)	1616	static void switched_from_dl(struct rq rq, struct task_struct p)
1616	{	1617	{
		1618	/* XXX we should retain the bw until 0-lag */
1617	cancel_dl_timer(rq, p);	1619	cancel_dl_timer(rq, p);
1618
1619	__dl_clear_params(p);	1620	__dl_clear_params(p);
1620		1621
1621	/*	1622	/*