sched: simplify adaptive latency

simplify adaptive latency. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Mike Galbraith <efault@gmx.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2007-10-15 11:00:05 -0400
committer: Ingo Molnar <mingo@elte.hu> 2007-10-15 11:00:05 -0400
commit: 6d0f0ebd063e36cd0ebae9be15973b02c4245a99 (patch)
tree: 007ce4f7942afd92cd21d82f89033b38dcb2de59 /kernel/sched_fair.c
parent: 4d78e7b656aa6440c337302fe065338ce840a64e (diff)
1 files changed, 9 insertions, 104 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 95487e3c8b06..3179d1129a80 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -217,77 +217,14 @@ static u64 __sched_period(unsigned long nr_running)
        return period;
 }
-/*
+static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
- * Calculate the preemption granularity needed to schedule every
- * runnable task once per sysctl_sched_latency amount of time.
- * (down to a sensible low limit on granularity)
- *
- * For example, if there are 2 tasks running and latency is 10 msecs,
- * we switch tasks every 5 msecs. If we have 3 tasks running, we have
- * to switch tasks every 3.33 msecs to get a 10 msecs observed latency
- * for each task. We do finer and finer scheduling up to until we
- * reach the minimum granularity value.
- *
- * To achieve this we use the following dynamic-granularity rule:
- *
- *    gran = lat/nr - lat/nr/nr
- *
- * This comes out of the following equations:
- *
- *    kA1 + gran = kB1
- *    kB2 + gran = kA2
- *    kA2 = kA1
- *    kB2 = kB1 - d + d/nr
- *    lat = d * nr
- *
- * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running),
- * '1' is start of time, '2' is end of time, 'd' is delay between
- * 1 and 2 (during which task B was running), 'nr' is number of tasks
- * running, 'lat' is the the period of each task. ('lat' is the
- * sched_latency that we aim for.)
- */
-static long
-sched_granularity(struct cfs_rq *cfs_rq)
 {
-        unsigned int gran = sysctl_sched_latency;
+        u64 period = __sched_period(cfs_rq->nr_running);
-        unsigned int nr = cfs_rq->nr_running;
-        if (nr > 1) {
-                gran = gran/nr - gran/nr/nr;
-                gran = max(gran, sysctl_sched_min_granularity);
-        }
-        return gran;
+        period *= se->load.weight;
-}
+        do_div(period, cfs_rq->load.weight);
-/*
+        return period;
- * We rescale the rescheduling granularity of tasks according to their
- * nice level, but only linearly, not exponentially:
- */
-static long
-niced_granularity(struct sched_entity *curr, unsigned long granularity)
-{
-        u64 tmp;
-        if (likely(curr->load.weight == NICE_0_LOAD))
-                return granularity;
-        /*
-         * Positive nice levels get the same granularity as nice-0:
-         */
-        if (likely(curr->load.weight < NICE_0_LOAD)) {
-                tmp = curr->load.weight * (u64)granularity;
-                return (long) (tmp >> NICE_0_SHIFT);
-        }
-        /*
-         * Negative nice level tasks get linearly finer
-         * granularity:
-         */
-        tmp = curr->load.inv_weight * (u64)granularity;
-        /*
-         * It will always fit into 'long':
-         */
-        return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT));
 }
 static inline void
@@ -646,36 +583,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 */
 static void
 __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
-                          struct sched_entity *curr, unsigned long granularity)
+                          struct sched_entity *curr)
 {
-        s64 __delta = curr->fair_key - se->fair_key;
        unsigned long ideal_runtime, delta_exec;
-        /*
+        ideal_runtime = sched_slice(cfs_rq, curr);
-         * ideal_runtime is compared against sum_exec_runtime, which is
-         * walltime, hence do not scale.
-         */
-        ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running,
-                        (unsigned long)sysctl_sched_min_granularity);
-        /*
-         * If we executed more than what the latency constraint suggests,
-         * reduce the rescheduling granularity. This way the total latency
-         * of how much a task is not scheduled converges to
-         * sysctl_sched_latency:
-         */
        delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
        if (delta_exec > ideal_runtime)
-                granularity = 0;
-        /*
-         * Take scheduling granularity into account - do not
-         * preempt the current task unless the best task has
-         * a larger than sched_granularity fairness advantage:
-         *
-         * scale granularity as key space is in fair_clock.
-         */
-        if (__delta > niced_granularity(curr, granularity))
                resched_task(rq_of(cfs_rq)->curr);
 }
@@ -749,8 +663,7 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
        if (next == curr)
                return;
-        __check_preempt_curr_fair(cfs_rq, next, curr,
+        __check_preempt_curr_fair(cfs_rq, next, curr);
-                        sched_granularity(cfs_rq));
 }
 /**************************************************
@@ -944,7 +857,6 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
 {
        struct task_struct *curr = rq->curr;
        struct cfs_rq *cfs_rq = task_cfs_rq(curr);
-        unsigned long gran;
        if (unlikely(rt_prio(p->prio))) {
                update_rq_clock(rq);
@@ -953,15 +865,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
                return;
        }
-        gran = sysctl_sched_wakeup_granularity;
-        /*
-         * Batch tasks prefer throughput over latency:
-         */
-        if (unlikely(p->policy == SCHED_BATCH))
-                gran = sysctl_sched_batch_wakeup_granularity;
        if (is_same_group(curr, p))
-                __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);
+                __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se);
 }
 static struct task_struct *pick_next_task_fair(struct rq *rq)
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2007-10-15 11:00:05 -0400
committer	Ingo Molnar <mingo@elte.hu>	2007-10-15 11:00:05 -0400
commit	6d0f0ebd063e36cd0ebae9be15973b02c4245a99 (patch)
tree	007ce4f7942afd92cd21d82f89033b38dcb2de59 /kernel/sched_fair.c
parent	4d78e7b656aa6440c337302fe065338ce840a64e (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 95487e3c8b06..3179d1129a80 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -217,77 +217,14 @@ static u64 __sched_period(unsigned long nr_running)
217	return period;	217	return period;
218	}	218	}
219		219
220	/*	220	static u64 sched_slice(struct cfs_rq cfs_rq, struct sched_entity se)
221	* Calculate the preemption granularity needed to schedule every
222	* runnable task once per sysctl_sched_latency amount of time.
223	* (down to a sensible low limit on granularity)
224	*
225	* For example, if there are 2 tasks running and latency is 10 msecs,
226	* we switch tasks every 5 msecs. If we have 3 tasks running, we have
227	* to switch tasks every 3.33 msecs to get a 10 msecs observed latency
228	* for each task. We do finer and finer scheduling up to until we
229	* reach the minimum granularity value.
230	*
231	* To achieve this we use the following dynamic-granularity rule:
232	*
233	* gran = lat/nr - lat/nr/nr
234	*
235	* This comes out of the following equations:
236	*
237	* kA1 + gran = kB1
238	* kB2 + gran = kA2
239	* kA2 = kA1
240	* kB2 = kB1 - d + d/nr
241	* lat = d * nr
242	*
243	* Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running),
244	* '1' is start of time, '2' is end of time, 'd' is delay between
245	* 1 and 2 (during which task B was running), 'nr' is number of tasks
246	* running, 'lat' is the the period of each task. ('lat' is the
247	* sched_latency that we aim for.)
248	*/
249	static long
250	sched_granularity(struct cfs_rq *cfs_rq)
251	{	221	{
252	unsigned int gran = sysctl_sched_latency;	222	u64 period = __sched_period(cfs_rq->nr_running);
253	unsigned int nr = cfs_rq->nr_running;
254
255	if (nr > 1) {
256	gran = gran/nr - gran/nr/nr;
257	gran = max(gran, sysctl_sched_min_granularity);
258	}
259		223
260	return gran;	224	period *= se->load.weight;
261	}	225	do_div(period, cfs_rq->load.weight);
262		226
263	/*	227	return period;
264	* We rescale the rescheduling granularity of tasks according to their
265	* nice level, but only linearly, not exponentially:
266	*/
267	static long
268	niced_granularity(struct sched_entity *curr, unsigned long granularity)
269	{
270	u64 tmp;
271
272	if (likely(curr->load.weight == NICE_0_LOAD))
273	return granularity;
274	/*
275	* Positive nice levels get the same granularity as nice-0:
276	*/
277	if (likely(curr->load.weight < NICE_0_LOAD)) {
278	tmp = curr->load.weight * (u64)granularity;
279	return (long) (tmp >> NICE_0_SHIFT);
280	}
281	/*
282	* Negative nice level tasks get linearly finer
283	* granularity:
284	*/
285	tmp = curr->load.inv_weight * (u64)granularity;
286
287	/*
288	* It will always fit into 'long':
289	*/
290	return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT));
291	}	228	}
292		229
293	static inline void	230	static inline void
@@ -646,36 +583,13 @@ dequeue_entity(struct cfs_rq cfs_rq, struct sched_entity se, int sleep)
646	*/	583	*/
647	static void	584	static void
648	__check_preempt_curr_fair(struct cfs_rq cfs_rq, struct sched_entity se,	585	__check_preempt_curr_fair(struct cfs_rq cfs_rq, struct sched_entity se,
649	struct sched_entity *curr, unsigned long granularity)	586	struct sched_entity *curr)
650	{	587	{
651	s64 __delta = curr->fair_key - se->fair_key;
652	unsigned long ideal_runtime, delta_exec;	588	unsigned long ideal_runtime, delta_exec;
653		589
654	/*	590	ideal_runtime = sched_slice(cfs_rq, curr);
655	* ideal_runtime is compared against sum_exec_runtime, which is
656	* walltime, hence do not scale.
657	*/
658	ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running,
659	(unsigned long)sysctl_sched_min_granularity);
660
661	/*
662	* If we executed more than what the latency constraint suggests,
663	* reduce the rescheduling granularity. This way the total latency
664	* of how much a task is not scheduled converges to
665	* sysctl_sched_latency:
666	*/
667	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;	591	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
668	if (delta_exec > ideal_runtime)	592	if (delta_exec > ideal_runtime)
669	granularity = 0;
670
671	/*
672	* Take scheduling granularity into account - do not
673	* preempt the current task unless the best task has
674	* a larger than sched_granularity fairness advantage:
675	*
676	* scale granularity as key space is in fair_clock.
677	*/
678	if (__delta > niced_granularity(curr, granularity))
679	resched_task(rq_of(cfs_rq)->curr);	593	resched_task(rq_of(cfs_rq)->curr);
680	}	594	}
681		595
@@ -749,8 +663,7 @@ static void entity_tick(struct cfs_rq cfs_rq, struct sched_entity curr)
749	if (next == curr)	663	if (next == curr)
750	return;	664	return;
751		665
752	__check_preempt_curr_fair(cfs_rq, next, curr,	666	__check_preempt_curr_fair(cfs_rq, next, curr);
753	sched_granularity(cfs_rq));
754	}	667	}
755		668
756	/**************************************************	669	/**************************************************
@@ -944,7 +857,6 @@ static void check_preempt_curr_fair(struct rq rq, struct task_struct p)
944	{	857	{
945	struct task_struct *curr = rq->curr;	858	struct task_struct *curr = rq->curr;
946	struct cfs_rq *cfs_rq = task_cfs_rq(curr);	859	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
947	unsigned long gran;
948		860
949	if (unlikely(rt_prio(p->prio))) {	861	if (unlikely(rt_prio(p->prio))) {
950	update_rq_clock(rq);	862	update_rq_clock(rq);
@@ -953,15 +865,8 @@ static void check_preempt_curr_fair(struct rq rq, struct task_struct p)
953	return;	865	return;
954	}	866	}
955		867
956	gran = sysctl_sched_wakeup_granularity;
957	/*
958	* Batch tasks prefer throughput over latency:
959	*/
960	if (unlikely(p->policy == SCHED_BATCH))
961	gran = sysctl_sched_batch_wakeup_granularity;
962
963	if (is_same_group(curr, p))	868	if (is_same_group(curr, p))
964	__check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);	869	__check_preempt_curr_fair(cfs_rq, &p->se, &curr->se);
965	}	870	}
966		871
967	static struct task_struct pick_next_task_fair(struct rq rq)	872	static struct task_struct pick_next_task_fair(struct rq rq)