diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:23:17 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:24:01 -0400 |
commit | f9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3 (patch) | |
tree | e171e568f04bd25c7c2ff99b5ef673b917b6eae6 | |
parent | f26a3988917913b3d11b2bd741601a2c64ab9204 (diff) |
revert ("sched: fair: weight calculations")
Yanmin Zhang reported:
Comparing with kernel 2.6.25, sysbench+mysql(oltp, readonly) has many
regressions with 2.6.26-rc1:
1) 8-core stoakley: 28%;
2) 16-core tigerton: 20%;
3) Itanium Montvale: 50%.
Bisect located this patch:
| 8f1bc385cfbab474db6c27b5af1e439614f3025c is first bad commit
| commit 8f1bc385cfbab474db6c27b5af1e439614f3025c
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date: Sat Apr 19 19:45:00 2008 +0200
|
| sched: fair: weight calculations
Revert it to the 2.6.25 state.
Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | kernel/sched.c | 9 | ||||
-rw-r--r-- | kernel/sched_fair.c | 105 |
2 files changed, 39 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..4aac8aa16037 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1368,9 +1368,6 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
1368 | */ | 1368 | */ |
1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
1370 | 1370 | ||
1371 | /* | ||
1372 | * delta *= weight / lw | ||
1373 | */ | ||
1374 | static unsigned long | 1371 | static unsigned long |
1375 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1372 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
1376 | struct load_weight *lw) | 1373 | struct load_weight *lw) |
@@ -1393,6 +1390,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
1393 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1390 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
1394 | } | 1391 | } |
1395 | 1392 | ||
1393 | static inline unsigned long | ||
1394 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
1395 | { | ||
1396 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
1397 | } | ||
1398 | |||
1396 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1399 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
1397 | { | 1400 | { |
1398 | lw->weight += inc; | 1401 | lw->weight += inc; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..0eb0ae879542 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
334 | #endif | 334 | #endif |
335 | 335 | ||
336 | /* | 336 | /* |
337 | * delta *= w / rw | ||
338 | */ | ||
339 | static inline unsigned long | ||
340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
341 | { | ||
342 | for_each_sched_entity(se) { | ||
343 | delta = calc_delta_mine(delta, | ||
344 | se->load.weight, &cfs_rq_of(se)->load); | ||
345 | } | ||
346 | |||
347 | return delta; | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * delta *= rw / w | ||
352 | */ | ||
353 | static inline unsigned long | ||
354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
355 | { | ||
356 | for_each_sched_entity(se) { | ||
357 | delta = calc_delta_mine(delta, | ||
358 | cfs_rq_of(se)->load.weight, &se->load); | ||
359 | } | ||
360 | |||
361 | return delta; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * The idea is to set a period in which each task runs once. | 337 | * The idea is to set a period in which each task runs once. |
366 | * | 338 | * |
367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
@@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running) | |||
390 | */ | 362 | */ |
391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
392 | { | 364 | { |
393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 365 | u64 slice = __sched_period(cfs_rq->nr_running); |
366 | |||
367 | for_each_sched_entity(se) { | ||
368 | cfs_rq = cfs_rq_of(se); | ||
369 | |||
370 | slice *= se->load.weight; | ||
371 | do_div(slice, cfs_rq->load.weight); | ||
372 | } | ||
373 | |||
374 | |||
375 | return slice; | ||
394 | } | 376 | } |
395 | 377 | ||
396 | /* | 378 | /* |
397 | * We calculate the vruntime slice of a to be inserted task | 379 | * We calculate the vruntime slice of a to be inserted task |
398 | * | 380 | * |
399 | * vs = s*rw/w = p | 381 | * vs = s/w = p/rw |
400 | */ | 382 | */ |
401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
402 | { | 384 | { |
403 | unsigned long nr_running = cfs_rq->nr_running; | 385 | unsigned long nr_running = cfs_rq->nr_running; |
386 | unsigned long weight; | ||
387 | u64 vslice; | ||
404 | 388 | ||
405 | if (!se->on_rq) | 389 | if (!se->on_rq) |
406 | nr_running++; | 390 | nr_running++; |
407 | 391 | ||
408 | return __sched_period(nr_running); | 392 | vslice = __sched_period(nr_running); |
409 | } | ||
410 | |||
411 | /* | ||
412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
413 | * that it favours >=0 over <0. | ||
414 | * | ||
415 | * -20 | | ||
416 | * | | ||
417 | * 0 --------+------- | ||
418 | * .' | ||
419 | * 19 .' | ||
420 | * | ||
421 | */ | ||
422 | static unsigned long | ||
423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
424 | { | ||
425 | struct load_weight lw = { | ||
426 | .weight = NICE_0_LOAD, | ||
427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
428 | }; | ||
429 | 393 | ||
430 | for_each_sched_entity(se) { | 394 | for_each_sched_entity(se) { |
431 | struct load_weight *se_lw = &se->load; | 395 | cfs_rq = cfs_rq_of(se); |
432 | 396 | ||
433 | if (se->load.weight < NICE_0_LOAD) | 397 | weight = cfs_rq->load.weight; |
434 | se_lw = &lw; | 398 | if (!se->on_rq) |
399 | weight += se->load.weight; | ||
435 | 400 | ||
436 | delta = calc_delta_mine(delta, | 401 | vslice *= NICE_0_LOAD; |
437 | cfs_rq_of(se)->load.weight, se_lw); | 402 | do_div(vslice, weight); |
438 | } | 403 | } |
439 | 404 | ||
440 | return delta; | 405 | return vslice; |
441 | } | 406 | } |
442 | 407 | ||
443 | /* | 408 | /* |
@@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
454 | 419 | ||
455 | curr->sum_exec_runtime += delta_exec; | 420 | curr->sum_exec_runtime += delta_exec; |
456 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 421 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 422 | delta_exec_weighted = delta_exec; |
423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
425 | &curr->load); | ||
426 | } | ||
458 | curr->vruntime += delta_exec_weighted; | 427 | curr->vruntime += delta_exec_weighted; |
459 | } | 428 | } |
460 | 429 | ||
@@ -661,17 +630,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
661 | 630 | ||
662 | if (!initial) { | 631 | if (!initial) { |
663 | /* sleeps upto a single latency don't count. */ | 632 | /* sleeps upto a single latency don't count. */ |
664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 633 | if (sched_feat(NEW_FAIR_SLEEPERS)) |
665 | unsigned long thresh = sysctl_sched_latency; | 634 | vruntime -= sysctl_sched_latency; |
666 | |||
667 | /* | ||
668 | * convert the sleeper threshold into virtual time | ||
669 | */ | ||
670 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
671 | thresh = calc_delta_fair(thresh, se); | ||
672 | |||
673 | vruntime -= thresh; | ||
674 | } | ||
675 | 635 | ||
676 | /* ensure we never gain time by being placed backwards. */ | 636 | /* ensure we never gain time by being placed backwards. */ |
677 | vruntime = max_vruntime(se->vruntime, vruntime); | 637 | vruntime = max_vruntime(se->vruntime, vruntime); |
@@ -1169,10 +1129,11 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1169 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1129 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1170 | 1130 | ||
1171 | /* | 1131 | /* |
1172 | * More easily preempt - nice tasks, while not making it harder for | 1132 | * More easily preempt - nice tasks, while not making |
1173 | * + nice tasks. | 1133 | * it harder for + nice tasks. |
1174 | */ | 1134 | */ |
1175 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); | 1135 | if (unlikely(se->load.weight > NICE_0_LOAD)) |
1136 | gran = calc_delta_fair(gran, &se->load); | ||
1176 | 1137 | ||
1177 | return gran; | 1138 | return gran; |
1178 | } | 1139 | } |