diff options
| author | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:23:17 -0400 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:24:01 -0400 |
| commit | f9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3 (patch) | |
| tree | e171e568f04bd25c7c2ff99b5ef673b917b6eae6 | |
| parent | f26a3988917913b3d11b2bd741601a2c64ab9204 (diff) | |
revert ("sched: fair: weight calculations")
Yanmin Zhang reported:
Comparing with kernel 2.6.25, sysbench+mysql(oltp, readonly) has many
regressions with 2.6.26-rc1:
1) 8-core stoakley: 28%;
2) 16-core tigerton: 20%;
3) Itanium Montvale: 50%.
Bisect located this patch:
| 8f1bc385cfbab474db6c27b5af1e439614f3025c is first bad commit
| commit 8f1bc385cfbab474db6c27b5af1e439614f3025c
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date: Sat Apr 19 19:45:00 2008 +0200
|
| sched: fair: weight calculations
Revert it to the 2.6.25 state.
Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | kernel/sched.c | 9 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 105 |
2 files changed, 39 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index cfa222a91539..4aac8aa16037 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -1368,9 +1368,6 @@ static void __resched_task(struct task_struct *p, int tif_bit) | |||
| 1368 | */ | 1368 | */ |
| 1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) | 1369 | #define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) |
| 1370 | 1370 | ||
| 1371 | /* | ||
| 1372 | * delta *= weight / lw | ||
| 1373 | */ | ||
| 1374 | static unsigned long | 1371 | static unsigned long |
| 1375 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, | 1372 | calc_delta_mine(unsigned long delta_exec, unsigned long weight, |
| 1376 | struct load_weight *lw) | 1373 | struct load_weight *lw) |
| @@ -1393,6 +1390,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, | |||
| 1393 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); | 1390 | return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); |
| 1394 | } | 1391 | } |
| 1395 | 1392 | ||
| 1393 | static inline unsigned long | ||
| 1394 | calc_delta_fair(unsigned long delta_exec, struct load_weight *lw) | ||
| 1395 | { | ||
| 1396 | return calc_delta_mine(delta_exec, NICE_0_LOAD, lw); | ||
| 1397 | } | ||
| 1398 | |||
| 1396 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) | 1399 | static inline void update_load_add(struct load_weight *lw, unsigned long inc) |
| 1397 | { | 1400 | { |
| 1398 | lw->weight += inc; | 1401 | lw->weight += inc; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index e24ecd39c4b8..0eb0ae879542 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write, | |||
| 334 | #endif | 334 | #endif |
| 335 | 335 | ||
| 336 | /* | 336 | /* |
| 337 | * delta *= w / rw | ||
| 338 | */ | ||
| 339 | static inline unsigned long | ||
| 340 | calc_delta_weight(unsigned long delta, struct sched_entity *se) | ||
| 341 | { | ||
| 342 | for_each_sched_entity(se) { | ||
| 343 | delta = calc_delta_mine(delta, | ||
| 344 | se->load.weight, &cfs_rq_of(se)->load); | ||
| 345 | } | ||
| 346 | |||
| 347 | return delta; | ||
| 348 | } | ||
| 349 | |||
| 350 | /* | ||
| 351 | * delta *= rw / w | ||
| 352 | */ | ||
| 353 | static inline unsigned long | ||
| 354 | calc_delta_fair(unsigned long delta, struct sched_entity *se) | ||
| 355 | { | ||
| 356 | for_each_sched_entity(se) { | ||
| 357 | delta = calc_delta_mine(delta, | ||
| 358 | cfs_rq_of(se)->load.weight, &se->load); | ||
| 359 | } | ||
| 360 | |||
| 361 | return delta; | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * The idea is to set a period in which each task runs once. | 337 | * The idea is to set a period in which each task runs once. |
| 366 | * | 338 | * |
| 367 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch | 339 | * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch |
| @@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running) | |||
| 390 | */ | 362 | */ |
| 391 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 363 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 392 | { | 364 | { |
| 393 | return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); | 365 | u64 slice = __sched_period(cfs_rq->nr_running); |
| 366 | |||
| 367 | for_each_sched_entity(se) { | ||
| 368 | cfs_rq = cfs_rq_of(se); | ||
| 369 | |||
| 370 | slice *= se->load.weight; | ||
| 371 | do_div(slice, cfs_rq->load.weight); | ||
| 372 | } | ||
| 373 | |||
| 374 | |||
| 375 | return slice; | ||
| 394 | } | 376 | } |
| 395 | 377 | ||
| 396 | /* | 378 | /* |
| 397 | * We calculate the vruntime slice of a to be inserted task | 379 | * We calculate the vruntime slice of a to be inserted task |
| 398 | * | 380 | * |
| 399 | * vs = s*rw/w = p | 381 | * vs = s/w = p/rw |
| 400 | */ | 382 | */ |
| 401 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) | 383 | static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) |
| 402 | { | 384 | { |
| 403 | unsigned long nr_running = cfs_rq->nr_running; | 385 | unsigned long nr_running = cfs_rq->nr_running; |
| 386 | unsigned long weight; | ||
| 387 | u64 vslice; | ||
| 404 | 388 | ||
| 405 | if (!se->on_rq) | 389 | if (!se->on_rq) |
| 406 | nr_running++; | 390 | nr_running++; |
| 407 | 391 | ||
| 408 | return __sched_period(nr_running); | 392 | vslice = __sched_period(nr_running); |
| 409 | } | ||
| 410 | |||
| 411 | /* | ||
| 412 | * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in | ||
| 413 | * that it favours >=0 over <0. | ||
| 414 | * | ||
| 415 | * -20 | | ||
| 416 | * | | ||
| 417 | * 0 --------+------- | ||
| 418 | * .' | ||
| 419 | * 19 .' | ||
| 420 | * | ||
| 421 | */ | ||
| 422 | static unsigned long | ||
| 423 | calc_delta_asym(unsigned long delta, struct sched_entity *se) | ||
| 424 | { | ||
| 425 | struct load_weight lw = { | ||
| 426 | .weight = NICE_0_LOAD, | ||
| 427 | .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT) | ||
| 428 | }; | ||
| 429 | 393 | ||
| 430 | for_each_sched_entity(se) { | 394 | for_each_sched_entity(se) { |
| 431 | struct load_weight *se_lw = &se->load; | 395 | cfs_rq = cfs_rq_of(se); |
| 432 | 396 | ||
| 433 | if (se->load.weight < NICE_0_LOAD) | 397 | weight = cfs_rq->load.weight; |
| 434 | se_lw = &lw; | 398 | if (!se->on_rq) |
| 399 | weight += se->load.weight; | ||
| 435 | 400 | ||
| 436 | delta = calc_delta_mine(delta, | 401 | vslice *= NICE_0_LOAD; |
| 437 | cfs_rq_of(se)->load.weight, se_lw); | 402 | do_div(vslice, weight); |
| 438 | } | 403 | } |
| 439 | 404 | ||
| 440 | return delta; | 405 | return vslice; |
| 441 | } | 406 | } |
| 442 | 407 | ||
| 443 | /* | 408 | /* |
| @@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 454 | 419 | ||
| 455 | curr->sum_exec_runtime += delta_exec; | 420 | curr->sum_exec_runtime += delta_exec; |
| 456 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 421 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
| 457 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 422 | delta_exec_weighted = delta_exec; |
| 423 | if (unlikely(curr->load.weight != NICE_0_LOAD)) { | ||
| 424 | delta_exec_weighted = calc_delta_fair(delta_exec_weighted, | ||
| 425 | &curr->load); | ||
| 426 | } | ||
| 458 | curr->vruntime += delta_exec_weighted; | 427 | curr->vruntime += delta_exec_weighted; |
| 459 | } | 428 | } |
| 460 | 429 | ||
| @@ -661,17 +630,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
| 661 | 630 | ||
| 662 | if (!initial) { | 631 | if (!initial) { |
| 663 | /* sleeps upto a single latency don't count. */ | 632 | /* sleeps upto a single latency don't count. */ |
| 664 | if (sched_feat(NEW_FAIR_SLEEPERS)) { | 633 | if (sched_feat(NEW_FAIR_SLEEPERS)) |
| 665 | unsigned long thresh = sysctl_sched_latency; | 634 | vruntime -= sysctl_sched_latency; |
| 666 | |||
| 667 | /* | ||
| 668 | * convert the sleeper threshold into virtual time | ||
| 669 | */ | ||
| 670 | if (sched_feat(NORMALIZED_SLEEPER)) | ||
| 671 | thresh = calc_delta_fair(thresh, se); | ||
| 672 | |||
| 673 | vruntime -= thresh; | ||
| 674 | } | ||
| 675 | 635 | ||
| 676 | /* ensure we never gain time by being placed backwards. */ | 636 | /* ensure we never gain time by being placed backwards. */ |
| 677 | vruntime = max_vruntime(se->vruntime, vruntime); | 637 | vruntime = max_vruntime(se->vruntime, vruntime); |
| @@ -1169,10 +1129,11 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
| 1169 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1129 | unsigned long gran = sysctl_sched_wakeup_granularity; |
| 1170 | 1130 | ||
| 1171 | /* | 1131 | /* |
| 1172 | * More easily preempt - nice tasks, while not making it harder for | 1132 | * More easily preempt - nice tasks, while not making |
| 1173 | * + nice tasks. | 1133 | * it harder for + nice tasks. |
| 1174 | */ | 1134 | */ |
| 1175 | gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); | 1135 | if (unlikely(se->load.weight > NICE_0_LOAD)) |
| 1136 | gran = calc_delta_fair(gran, &se->load); | ||
| 1176 | 1137 | ||
| 1177 | return gran; | 1138 | return gran; |
| 1178 | } | 1139 | } |
