diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-19 13:45:00 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:45:00 -0400 |
commit | 8f1bc385cfbab474db6c27b5af1e439614f3025c (patch) | |
tree | e4cfb8255d62621d17bc08ee5f94f42a0cc22677 /kernel | |
parent | 4a55bd5e97b1775913f88f11108a4f144f590e89 (diff) |
sched: fair: weight calculations
In order to level the hierarchy, we need to calculate load based on the
root view. That is, each task's load is in the same unit.
A
/ \
B 1
/ \
2 3
To compute 1's load we do:
weight(1)
--------------
rq_weight(A)
To compute 2's load we do:
weight(2) weight(B)
------------ * -----------
rq_weight(B) rw_weight(A)
This yields load fractions in comparable units.
The consequence is that it changes virtual time. We used to have:
time_{i}
vtime_{i} = ------------
weight_{i}
vtime = \Sum vtime_{i} = time / rq_weight.
But with the new way of load calculation we get that vtime equals time.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 9 | ||||
-rw-r--r-- | kernel/sched_fair.c | 95 |
2 files changed, 65 insertions, 39 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 3202462109f5..6d55dfc56cab 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1320,6 +1320,9 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1320 | */ | 1320 | */ |
1321 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1321 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1322 | 1322 | ||
1323 | /* | ||
1324 | * delta *= weight / lw | ||
1325 | */ | ||
1323 | static unsigned long | 1326 | static unsigned long |
1324 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1327 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1325 | struct load_weight *lw) | 1328 | struct load_weight *lw) |
@@ -1342,12 +1345,6 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1342 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1345 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1343 | } | 1346 | } |
1344 | 1347 | ||
1345 | static inline unsigned long | ||
1346 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1347 | { | ||
1348 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1349 | } | ||
1350 | |||
1351 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1348 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1352 | { | 1349 | { |
1353 | lw->weight += inc; | 1350 | lw->weight += inc; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index ed8ce329899b..d72e8b41b3e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -334,6 +334,34 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * delta *= w / rw | ||
338 | */ | ||
339 | static inline unsigned long | ||
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
341 | { | ||
342 | for_each_sched_entity(se) { | ||
343 | delta = calc_delta_mine(delta, | ||
344 | se->load.weight, &cfs_rq_of(se)->load); | ||
345 | } | ||
346 | |||
347 | return delta; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * delta *= rw / w | ||
352 | */ | ||
353 | static inline unsigned long | ||
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
355 | { | ||
356 | for_each_sched_entity(se) { | ||
357 | delta = calc_delta_mine(delta, | ||
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | |||
361 | return delta; | ||
362 | } | ||
363 | |||
364 | /* | ||
337 | * The idea is to set a period in which each task runs once. | 365 | * The idea is to set a period in which each task runs once. |
338 | * | 366 | * |
339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
@@ -362,47 +390,54 @@ static u64 __sched_period(unsigned long nr_running) | |||
362 | */ | 390 | */ |
363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
364 | { | 392 | { |
365 | u64 slice = __sched_period(cfs_rq->nr_running); | 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); |
366 | |||
367 | for_each_sched_entity(se) { | ||
368 | cfs_rq = cfs_rq_of(se); | ||
369 | |||
370 | slice *= se->load.weight; | ||
371 | do_div(slice, cfs_rq->load.weight); | ||
372 | } | ||
373 | |||
374 | |||
375 | return slice; | ||
376 | } | 394 | } |
377 | 395 | ||
378 | /* | 396 | /* |
379 | * We calculate the vruntime slice of a to be inserted task | 397 | * We calculate the vruntime slice of a to be inserted task |
380 | * | 398 | * |
381 | * vs = s/w = p/rw | 399 | * vs = s*rw/w = p |
382 | */ | 400 | */ |
383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
384 | { | 402 | { |
385 | unsigned long nr_running = cfs_rq->nr_running; | 403 | unsigned long nr_running = cfs_rq->nr_running; |
386 | unsigned long weight; | ||
387 | u64 vslice; | ||
388 | 404 | ||
389 | if (!se->on_rq) | 405 | if (!se->on_rq) |
390 | nr_running++; | 406 | nr_running++; |
391 | 407 | ||
392 | vslice = __sched_period(nr_running); | 408 | return __sched_period(nr_running); |
409 | } | ||
410 | |||
411 | /* | ||
412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
413 | * that it favours >=0 over <0. | ||
414 | * | ||
415 | * -20 | | ||
416 | * | | ||
417 | * 0 --------+------- | ||
418 | * .' | ||
419 | * 19 .' | ||
420 | * | ||
421 | */ | ||
422 | static unsigned long | ||
423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
424 | { | ||
425 | struct load_weight lw = { | ||
426 | .weight = NICE_0_LOAD, | ||
427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
428 | }; | ||
393 | 429 | ||
394 | for_each_sched_entity(se) { | 430 | for_each_sched_entity(se) { |
395 | cfs_rq = cfs_rq_of(se); | 431 | struct load_weight *se_lw = &se->load; |
396 | 432 | ||
397 | weight = cfs_rq->load.weight; | 433 | if (se->load.weight < NICE_0_LOAD) |
398 | if (!se->on_rq) | 434 | se_lw = &lw; |
399 | weight += se->load.weight; | ||
400 | 435 | ||
401 | vslice *= NICE_0_LOAD; | 436 | delta = calc_delta_mine(delta, |
402 | do_div(vslice, weight); | 437 | cfs_rq_of(se)->load.weight, se_lw); |
403 | } | 438 | } |
404 | 439 | ||
405 | return vslice; | 440 | return delta; |
406 | } | 441 | } |
407 | 442 | ||
408 | /* | 443 | /* |
@@ -419,11 +454,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
419 | 454 | ||
420 | curr->sum_exec_runtime += delta_exec; | 455 | curr->sum_exec_runtime += delta_exec; |
421 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 456 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
422 | delta_exec_weighted = delta_exec; | 457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
425 | &curr->load); | ||
426 | } | ||
427 | curr->vruntime += delta_exec_weighted; | 458 | curr->vruntime += delta_exec_weighted; |
428 | } | 459 | } |
429 | 460 | ||
@@ -632,8 +663,7 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
632 | /* sleeps upto a single latency don't count. */ | 663 | /* sleeps upto a single latency don't count. */ |
633 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { |
634 | if (sched_feat(NORMALIZED_SLEEPER)) | 665 | if (sched_feat(NORMALIZED_SLEEPER)) |
635 | vruntime -= calc_delta_fair(sysctl_sched_latency, | 666 | vruntime -= calc_delta_weight(sysctl_sched_latency, se); |
636 | &cfs_rq->load); | ||
637 | else | 667 | else |
638 | vruntime -= sysctl_sched_latency; | 668 | vruntime -= sysctl_sched_latency; |
639 | } | 669 | } |
@@ -1132,11 +1162,10 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1132 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1162 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1133 | 1163 | ||
1134 | /* | 1164 | /* |
1135 | * More easily preempt - nice tasks, while not making | 1165 | * More easily preempt - nice tasks, while not making it harder for |
1136 | * it harder for + nice tasks. | 1166 | * + nice tasks. |
1137 | */ | 1167 | */ |
1138 | if (unlikely(se->load.weight > NICE_0_LOAD)) | 1168 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); |
1139 | gran = calc_delta_fair(gran, &se->load); | ||
1140 | 1169 | ||
1141 | return gran; | 1170 | return gran; |
1142 | } | 1171 | } |