aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2007-10-15 11:00:04 -0400
committerIngo Molnar <mingo@elte.hu>2007-10-15 11:00:04 -0400
commite9acbff6484df51fd880e0f5fe0224e8be34c17b (patch)
tree9ac0275ae7dcd13fb8b628971bf038e9f7318d1e /kernel
parent08e2388aa1e40cb06f7d04ac621e2ae94e1d8fdc (diff)
sched: introduce se->vruntime
introduce se->vruntime as a sum of weighted delta-exec's, and use that as the key into the tree. the idea to use absolute virtual time as the basic metric of scheduling has been first raised by William Lee Irwin, advanced by Tong Li and first prototyped by Roman Zippel in the "Really Fair Scheduler" (RFS) patchset. also see: http://lkml.org/lkml/2007/9/2/76 for a simpler variant of this patch. Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Mike Galbraith <efault@gmx.de> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c1
-rw-r--r--kernel/sched_fair.c81
2 files changed, 49 insertions, 33 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 992a1fae72a7..8f80ebafacc1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -182,6 +182,7 @@ struct cfs_rq {
182 182
183 s64 fair_clock; 183 s64 fair_clock;
184 u64 exec_clock; 184 u64 exec_clock;
185 u64 min_vruntime;
185 s64 wait_runtime; 186 s64 wait_runtime;
186 u64 sleeper_bonus; 187 u64 sleeper_bonus;
187 unsigned long wait_runtime_overruns, wait_runtime_underruns; 188 unsigned long wait_runtime_overruns, wait_runtime_underruns;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b46f8078e78f..a2af09cb6a70 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -92,14 +92,16 @@ unsigned int sysctl_sched_runtime_limit __read_mostly;
92 */ 92 */
93enum { 93enum {
94 SCHED_FEAT_FAIR_SLEEPERS = 1, 94 SCHED_FEAT_FAIR_SLEEPERS = 1,
95 SCHED_FEAT_SLEEPER_AVG = 2, 95 SCHED_FEAT_NEW_FAIR_SLEEPERS = 2,
96 SCHED_FEAT_SLEEPER_LOAD_AVG = 4, 96 SCHED_FEAT_SLEEPER_AVG = 4,
97 SCHED_FEAT_START_DEBIT = 8, 97 SCHED_FEAT_SLEEPER_LOAD_AVG = 8,
98 SCHED_FEAT_SKIP_INITIAL = 16, 98 SCHED_FEAT_START_DEBIT = 16,
99 SCHED_FEAT_SKIP_INITIAL = 32,
99}; 100};
100 101
101const_debug unsigned int sysctl_sched_features = 102const_debug unsigned int sysctl_sched_features =
102 SCHED_FEAT_FAIR_SLEEPERS *1 | 103 SCHED_FEAT_FAIR_SLEEPERS *0 |
104 SCHED_FEAT_NEW_FAIR_SLEEPERS *1 |
103 SCHED_FEAT_SLEEPER_AVG *0 | 105 SCHED_FEAT_SLEEPER_AVG *0 |
104 SCHED_FEAT_SLEEPER_LOAD_AVG *1 | 106 SCHED_FEAT_SLEEPER_LOAD_AVG *1 |
105 SCHED_FEAT_START_DEBIT *1 | 107 SCHED_FEAT_START_DEBIT *1 |
@@ -145,6 +147,19 @@ static inline struct task_struct *task_of(struct sched_entity *se)
145 * Scheduling class tree data structure manipulation methods: 147 * Scheduling class tree data structure manipulation methods:
146 */ 148 */
147 149
150static inline void
151set_leftmost(struct cfs_rq *cfs_rq, struct rb_node *leftmost)
152{
153 struct sched_entity *se;
154
155 cfs_rq->rb_leftmost = leftmost;
156 if (leftmost) {
157 se = rb_entry(leftmost, struct sched_entity, run_node);
158 cfs_rq->min_vruntime = max(se->vruntime,
159 cfs_rq->min_vruntime);
160 }
161}
162
148/* 163/*
149 * Enqueue an entity into the rb-tree: 164 * Enqueue an entity into the rb-tree:
150 */ 165 */
@@ -180,7 +195,7 @@ __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
180 * used): 195 * used):
181 */ 196 */
182 if (leftmost) 197 if (leftmost)
183 cfs_rq->rb_leftmost = &se->run_node; 198 set_leftmost(cfs_rq, &se->run_node);
184 199
185 rb_link_node(&se->run_node, parent, link); 200 rb_link_node(&se->run_node, parent, link);
186 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 201 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -195,7 +210,8 @@ static void
195__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) 210__dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
196{ 211{
197 if (cfs_rq->rb_leftmost == &se->run_node) 212 if (cfs_rq->rb_leftmost == &se->run_node)
198 cfs_rq->rb_leftmost = rb_next(&se->run_node); 213 set_leftmost(cfs_rq, rb_next(&se->run_node));
214
199 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 215 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
200 update_load_sub(&cfs_rq->load, se->load.weight); 216 update_load_sub(&cfs_rq->load, se->load.weight);
201 cfs_rq->nr_running--; 217 cfs_rq->nr_running--;
@@ -336,7 +352,7 @@ static inline void
336__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, 352__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
337 unsigned long delta_exec) 353 unsigned long delta_exec)
338{ 354{
339 unsigned long delta, delta_fair, delta_mine; 355 unsigned long delta, delta_fair, delta_mine, delta_exec_weighted;
340 struct load_weight *lw = &cfs_rq->load; 356 struct load_weight *lw = &cfs_rq->load;
341 unsigned long load = lw->weight; 357 unsigned long load = lw->weight;
342 358
@@ -344,6 +360,12 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
344 360
345 curr->sum_exec_runtime += delta_exec; 361 curr->sum_exec_runtime += delta_exec;
346 cfs_rq->exec_clock += delta_exec; 362 cfs_rq->exec_clock += delta_exec;
363 delta_exec_weighted = delta_exec;
364 if (unlikely(curr->load.weight != NICE_0_LOAD)) {
365 delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
366 &curr->load);
367 }
368 curr->vruntime += delta_exec_weighted;
347 369
348 if (unlikely(!load)) 370 if (unlikely(!load))
349 return; 371 return;
@@ -413,8 +435,6 @@ calc_weighted(unsigned long delta, struct sched_entity *se)
413 */ 435 */
414static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 436static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
415{ 437{
416 s64 key;
417
418 /* 438 /*
419 * Are we enqueueing a waiting task? (for current tasks 439 * Are we enqueueing a waiting task? (for current tasks
420 * a dequeue/enqueue event is a NOP) 440 * a dequeue/enqueue event is a NOP)
@@ -424,28 +444,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
424 /* 444 /*
425 * Update the key: 445 * Update the key:
426 */ 446 */
427 key = cfs_rq->fair_clock; 447 se->fair_key = se->vruntime;
428
429 /*
430 * Optimize the common nice 0 case:
431 */
432 if (likely(se->load.weight == NICE_0_LOAD)) {
433 key -= se->wait_runtime;
434 } else {
435 u64 tmp;
436
437 if (se->wait_runtime < 0) {
438 tmp = -se->wait_runtime;
439 key += (tmp * se->load.inv_weight) >>
440 (WMULT_SHIFT - NICE_0_SHIFT);
441 } else {
442 tmp = se->wait_runtime;
443 key -= (tmp * se->load.inv_weight) >>
444 (WMULT_SHIFT - NICE_0_SHIFT);
445 }
446 }
447
448 se->fair_key = key;
449} 448}
450 449
451/* 450/*
@@ -615,8 +614,22 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
615 */ 614 */
616 update_curr(cfs_rq); 615 update_curr(cfs_rq);
617 616
618 if (wakeup) 617 if (wakeup) {
618 u64 min_runtime, latency;
619
620 min_runtime = cfs_rq->min_vruntime;
621 min_runtime += sysctl_sched_latency/2;
622
623 if (sched_feat(NEW_FAIR_SLEEPERS)) {
624 latency = calc_weighted(sysctl_sched_latency, se);
625 if (min_runtime > latency)
626 min_runtime -= latency;
627 }
628
629 se->vruntime = max(se->vruntime, min_runtime);
630
619 enqueue_sleeper(cfs_rq, se); 631 enqueue_sleeper(cfs_rq, se);
632 }
620 633
621 update_stats_enqueue(cfs_rq, se); 634 update_stats_enqueue(cfs_rq, se);
622 __enqueue_entity(cfs_rq, se); 635 __enqueue_entity(cfs_rq, se);
@@ -1155,6 +1168,8 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
1155 if (sched_feat(START_DEBIT)) 1168 if (sched_feat(START_DEBIT))
1156 se->wait_runtime = -(sched_granularity(cfs_rq) / 2); 1169 se->wait_runtime = -(sched_granularity(cfs_rq) / 2);
1157 1170
1171 se->vruntime = cfs_rq->min_vruntime;
1172 update_stats_enqueue(cfs_rq, se);
1158 __enqueue_entity(cfs_rq, se); 1173 __enqueue_entity(cfs_rq, se);
1159 resched_task(rq->curr); 1174 resched_task(rq->curr);
1160} 1175}