aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2007-10-15 11:00:05 -0400
committerIngo Molnar <mingo@elte.hu>2007-10-15 11:00:05 -0400
commit6d0f0ebd063e36cd0ebae9be15973b02c4245a99 (patch)
tree007ce4f7942afd92cd21d82f89033b38dcb2de59 /kernel/sched_fair.c
parent4d78e7b656aa6440c337302fe065338ce840a64e (diff)
sched: simplify adaptive latency
simplify adaptive latency. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Mike Galbraith <efault@gmx.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c113
1 files changed, 9 insertions, 104 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 95487e3c8b06..3179d1129a80 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -217,77 +217,14 @@ static u64 __sched_period(unsigned long nr_running)
217 return period; 217 return period;
218} 218}
219 219
220/* 220static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
221 * Calculate the preemption granularity needed to schedule every
222 * runnable task once per sysctl_sched_latency amount of time.
223 * (down to a sensible low limit on granularity)
224 *
225 * For example, if there are 2 tasks running and latency is 10 msecs,
226 * we switch tasks every 5 msecs. If we have 3 tasks running, we have
227 * to switch tasks every 3.33 msecs to get a 10 msecs observed latency
228 * for each task. We do finer and finer scheduling up to until we
229 * reach the minimum granularity value.
230 *
231 * To achieve this we use the following dynamic-granularity rule:
232 *
233 * gran = lat/nr - lat/nr/nr
234 *
235 * This comes out of the following equations:
236 *
237 * kA1 + gran = kB1
238 * kB2 + gran = kA2
239 * kA2 = kA1
240 * kB2 = kB1 - d + d/nr
241 * lat = d * nr
242 *
243 * Where 'k' is key, 'A' is task A (waiting), 'B' is task B (running),
244 * '1' is start of time, '2' is end of time, 'd' is delay between
245 * 1 and 2 (during which task B was running), 'nr' is number of tasks
246 * running, 'lat' is the the period of each task. ('lat' is the
247 * sched_latency that we aim for.)
248 */
249static long
250sched_granularity(struct cfs_rq *cfs_rq)
251{ 221{
252 unsigned int gran = sysctl_sched_latency; 222 u64 period = __sched_period(cfs_rq->nr_running);
253 unsigned int nr = cfs_rq->nr_running;
254
255 if (nr > 1) {
256 gran = gran/nr - gran/nr/nr;
257 gran = max(gran, sysctl_sched_min_granularity);
258 }
259 223
260 return gran; 224 period *= se->load.weight;
261} 225 do_div(period, cfs_rq->load.weight);
262 226
263/* 227 return period;
264 * We rescale the rescheduling granularity of tasks according to their
265 * nice level, but only linearly, not exponentially:
266 */
267static long
268niced_granularity(struct sched_entity *curr, unsigned long granularity)
269{
270 u64 tmp;
271
272 if (likely(curr->load.weight == NICE_0_LOAD))
273 return granularity;
274 /*
275 * Positive nice levels get the same granularity as nice-0:
276 */
277 if (likely(curr->load.weight < NICE_0_LOAD)) {
278 tmp = curr->load.weight * (u64)granularity;
279 return (long) (tmp >> NICE_0_SHIFT);
280 }
281 /*
282 * Negative nice level tasks get linearly finer
283 * granularity:
284 */
285 tmp = curr->load.inv_weight * (u64)granularity;
286
287 /*
288 * It will always fit into 'long':
289 */
290 return (long) (tmp >> (WMULT_SHIFT-NICE_0_SHIFT));
291} 228}
292 229
293static inline void 230static inline void
@@ -646,36 +583,13 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
646 */ 583 */
647static void 584static void
648__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, 585__check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
649 struct sched_entity *curr, unsigned long granularity) 586 struct sched_entity *curr)
650{ 587{
651 s64 __delta = curr->fair_key - se->fair_key;
652 unsigned long ideal_runtime, delta_exec; 588 unsigned long ideal_runtime, delta_exec;
653 589
654 /* 590 ideal_runtime = sched_slice(cfs_rq, curr);
655 * ideal_runtime is compared against sum_exec_runtime, which is
656 * walltime, hence do not scale.
657 */
658 ideal_runtime = max(sysctl_sched_latency / cfs_rq->nr_running,
659 (unsigned long)sysctl_sched_min_granularity);
660
661 /*
662 * If we executed more than what the latency constraint suggests,
663 * reduce the rescheduling granularity. This way the total latency
664 * of how much a task is not scheduled converges to
665 * sysctl_sched_latency:
666 */
667 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; 591 delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
668 if (delta_exec > ideal_runtime) 592 if (delta_exec > ideal_runtime)
669 granularity = 0;
670
671 /*
672 * Take scheduling granularity into account - do not
673 * preempt the current task unless the best task has
674 * a larger than sched_granularity fairness advantage:
675 *
676 * scale granularity as key space is in fair_clock.
677 */
678 if (__delta > niced_granularity(curr, granularity))
679 resched_task(rq_of(cfs_rq)->curr); 593 resched_task(rq_of(cfs_rq)->curr);
680} 594}
681 595
@@ -749,8 +663,7 @@ static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
749 if (next == curr) 663 if (next == curr)
750 return; 664 return;
751 665
752 __check_preempt_curr_fair(cfs_rq, next, curr, 666 __check_preempt_curr_fair(cfs_rq, next, curr);
753 sched_granularity(cfs_rq));
754} 667}
755 668
756/************************************************** 669/**************************************************
@@ -944,7 +857,6 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
944{ 857{
945 struct task_struct *curr = rq->curr; 858 struct task_struct *curr = rq->curr;
946 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 859 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
947 unsigned long gran;
948 860
949 if (unlikely(rt_prio(p->prio))) { 861 if (unlikely(rt_prio(p->prio))) {
950 update_rq_clock(rq); 862 update_rq_clock(rq);
@@ -953,15 +865,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
953 return; 865 return;
954 } 866 }
955 867
956 gran = sysctl_sched_wakeup_granularity;
957 /*
958 * Batch tasks prefer throughput over latency:
959 */
960 if (unlikely(p->policy == SCHED_BATCH))
961 gran = sysctl_sched_batch_wakeup_granularity;
962
963 if (is_same_group(curr, p)) 868 if (is_same_group(curr, p))
964 __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); 869 __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se);
965} 870}
966 871
967static struct task_struct *pick_next_task_fair(struct rq *rq) 872static struct task_struct *pick_next_task_fair(struct rq *rq)