aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-10-17 13:27:04 -0400
committerIngo Molnar <mingo@elte.hu>2008-10-20 08:05:04 -0400
commitf9c0b0950d5fd8c8c5af39bc061f27ea8fddcac3 (patch)
tree288537eac8d5e03970422ac0c705617e551f544d /kernel
parenta4c2f00f5cb848af7a8c816426b413c8e41834df (diff)
sched: revert back to per-rq vruntime
Vatsa rightly points out that having the runqueue weight in the vruntime calculations can cause unfairness in the face of task joins/leaves. Suppose: dv = dt * rw / w Then take 10 tasks t_n, each of similar weight. If the first will run 1 then its vruntime will increase by 10. Now, if the next 8 tasks leave after having run their 1, then the last task will get a vruntime increase of 2 after having run 1. Which will leave us with 2 tasks of equal weight and equal runtime, of which one will not be scheduled for 8/2=4 units of time. Ergo, we cannot do that and must use: dv = dt / w. This means we cannot have a global vruntime based on effective priority, but must instead go back to the vruntime per rq model we started out with. This patch was lightly tested by doing starting while loops on each nice level and observing their execution time, and a simple group scenario of 1:2:3 pinned to a single cpu. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched_fair.c32
1 files changed, 15 insertions, 17 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0c4bcac54761..a0aa38b10fdd 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -336,7 +336,7 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
336#endif 336#endif
337 337
338/* 338/*
339 * delta *= w / rw 339 * delta *= P[w / rw]
340 */ 340 */
341static inline unsigned long 341static inline unsigned long
342calc_delta_weight(unsigned long delta, struct sched_entity *se) 342calc_delta_weight(unsigned long delta, struct sched_entity *se)
@@ -350,15 +350,13 @@ calc_delta_weight(unsigned long delta, struct sched_entity *se)
350} 350}
351 351
352/* 352/*
353 * delta *= rw / w 353 * delta /= w
354 */ 354 */
355static inline unsigned long 355static inline unsigned long
356calc_delta_fair(unsigned long delta, struct sched_entity *se) 356calc_delta_fair(unsigned long delta, struct sched_entity *se)
357{ 357{
358 for_each_sched_entity(se) { 358 if (unlikely(se->load.weight != NICE_0_LOAD))
359 delta = calc_delta_mine(delta, 359 delta = calc_delta_mine(delta, NICE_0_LOAD, &se->load);
360 cfs_rq_of(se)->load.weight, &se->load);
361 }
362 360
363 return delta; 361 return delta;
364} 362}
@@ -388,26 +386,26 @@ static u64 __sched_period(unsigned long nr_running)
388 * We calculate the wall-time slice from the period by taking a part 386 * We calculate the wall-time slice from the period by taking a part
389 * proportional to the weight. 387 * proportional to the weight.
390 * 388 *
391 * s = p*w/rw 389 * s = p*P[w/rw]
392 */ 390 */
393static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
394{ 392{
395 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); 393 unsigned long nr_running = cfs_rq->nr_running;
394
395 if (unlikely(!se->on_rq))
396 nr_running++;
397
398 return calc_delta_weight(__sched_period(nr_running), se);
396} 399}
397 400
398/* 401/*
399 * We calculate the vruntime slice of a to be inserted task 402 * We calculate the vruntime slice of a to be inserted task
400 * 403 *
401 * vs = s*rw/w = p 404 * vs = s/w
402 */ 405 */
403static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 406static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
404{ 407{
405 unsigned long nr_running = cfs_rq->nr_running; 408 return calc_delta_fair(sched_slice(cfs_rq, se), se);
406
407 if (!se->on_rq)
408 nr_running++;
409
410 return __sched_period(nr_running);
411} 409}
412 410
413/* 411/*
@@ -629,7 +627,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
629 * stays open at the end. 627 * stays open at the end.
630 */ 628 */
631 if (initial && sched_feat(START_DEBIT)) 629 if (initial && sched_feat(START_DEBIT))
632 vruntime += sched_vslice_add(cfs_rq, se); 630 vruntime += sched_vslice(cfs_rq, se);
633 631
634 if (!initial) { 632 if (!initial) {
635 /* sleeps upto a single latency don't count. */ 633 /* sleeps upto a single latency don't count. */