aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-05-29 05:23:17 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-29 05:24:01 -0400
commitf9305d4a0968201b2818dbed0dc8cb0d4ee7aeb3 (patch)
treee171e568f04bd25c7c2ff99b5ef673b917b6eae6
parentf26a3988917913b3d11b2bd741601a2c64ab9204 (diff)
revert ("sched: fair: weight calculations")
Yanmin Zhang reported: Comparing with kernel 2.6.25, sysbench+mysql(oltp, readonly) has many regressions with 2.6.26-rc1: 1) 8-core stoakley: 28%; 2) 16-core tigerton: 20%; 3) Itanium Montvale: 50%. Bisect located this patch: | 8f1bc385cfbab474db6c27b5af1e439614f3025c is first bad commit | commit 8f1bc385cfbab474db6c27b5af1e439614f3025c | Author: Peter Zijlstra <a.p.zijlstra@chello.nl> | Date: Sat Apr 19 19:45:00 2008 +0200 | | sched: fair: weight calculations Revert it to the 2.6.25 state. Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/sched_fair.c105
2 files changed, 39 insertions, 75 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index cfa222a91539..4aac8aa16037 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1368,9 +1368,6 @@ static void __resched_task(struct task_struct *p, int tif_bit)
1368 */ 1368 */
1369#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) 1369#define SRR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
1370 1370
1371/*
1372 * delta *= weight / lw
1373 */
1374static unsigned long 1371static unsigned long
1375calc_delta_mine(unsigned long delta_exec, unsigned long weight, 1372calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1376 struct load_weight *lw) 1373 struct load_weight *lw)
@@ -1393,6 +1390,12 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
1393 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 1390 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
1394} 1391}
1395 1392
1393static inline unsigned long
1394calc_delta_fair(unsigned long delta_exec, struct load_weight *lw)
1395{
1396 return calc_delta_mine(delta_exec, NICE_0_LOAD, lw);
1397}
1398
1396static inline void update_load_add(struct load_weight *lw, unsigned long inc) 1399static inline void update_load_add(struct load_weight *lw, unsigned long inc)
1397{ 1400{
1398 lw->weight += inc; 1401 lw->weight += inc;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index e24ecd39c4b8..0eb0ae879542 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -334,34 +334,6 @@ int sched_nr_latency_handler(struct ctl_table *table, int write,
334#endif 334#endif
335 335
336/* 336/*
337 * delta *= w / rw
338 */
339static inline unsigned long
340calc_delta_weight(unsigned long delta, struct sched_entity *se)
341{
342 for_each_sched_entity(se) {
343 delta = calc_delta_mine(delta,
344 se->load.weight, &cfs_rq_of(se)->load);
345 }
346
347 return delta;
348}
349
350/*
351 * delta *= rw / w
352 */
353static inline unsigned long
354calc_delta_fair(unsigned long delta, struct sched_entity *se)
355{
356 for_each_sched_entity(se) {
357 delta = calc_delta_mine(delta,
358 cfs_rq_of(se)->load.weight, &se->load);
359 }
360
361 return delta;
362}
363
364/*
365 * The idea is to set a period in which each task runs once. 337 * The idea is to set a period in which each task runs once.
366 * 338 *
367 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch 339 * When there are too many tasks (sysctl_sched_nr_latency) we have to stretch
@@ -390,54 +362,47 @@ static u64 __sched_period(unsigned long nr_running)
390 */ 362 */
391static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 363static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
392{ 364{
393 return calc_delta_weight(__sched_period(cfs_rq->nr_running), se); 365 u64 slice = __sched_period(cfs_rq->nr_running);
366
367 for_each_sched_entity(se) {
368 cfs_rq = cfs_rq_of(se);
369
370 slice *= se->load.weight;
371 do_div(slice, cfs_rq->load.weight);
372 }
373
374
375 return slice;
394} 376}
395 377
396/* 378/*
397 * We calculate the vruntime slice of a to be inserted task 379 * We calculate the vruntime slice of a to be inserted task
398 * 380 *
399 * vs = s*rw/w = p 381 * vs = s/w = p/rw
400 */ 382 */
401static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se) 383static u64 sched_vslice_add(struct cfs_rq *cfs_rq, struct sched_entity *se)
402{ 384{
403 unsigned long nr_running = cfs_rq->nr_running; 385 unsigned long nr_running = cfs_rq->nr_running;
386 unsigned long weight;
387 u64 vslice;
404 388
405 if (!se->on_rq) 389 if (!se->on_rq)
406 nr_running++; 390 nr_running++;
407 391
408 return __sched_period(nr_running); 392 vslice = __sched_period(nr_running);
409}
410
411/*
412 * The goal of calc_delta_asym() is to be asymmetrically around NICE_0_LOAD, in
413 * that it favours >=0 over <0.
414 *
415 * -20 |
416 * |
417 * 0 --------+-------
418 * .'
419 * 19 .'
420 *
421 */
422static unsigned long
423calc_delta_asym(unsigned long delta, struct sched_entity *se)
424{
425 struct load_weight lw = {
426 .weight = NICE_0_LOAD,
427 .inv_weight = 1UL << (WMULT_SHIFT-NICE_0_SHIFT)
428 };
429 393
430 for_each_sched_entity(se) { 394 for_each_sched_entity(se) {
431 struct load_weight *se_lw = &se->load; 395 cfs_rq = cfs_rq_of(se);
432 396
433 if (se->load.weight < NICE_0_LOAD) 397 weight = cfs_rq->load.weight;
434 se_lw = &lw; 398 if (!se->on_rq)
399 weight += se->load.weight;
435 400
436 delta = calc_delta_mine(delta, 401 vslice *= NICE_0_LOAD;
437 cfs_rq_of(se)->load.weight, se_lw); 402 do_div(vslice, weight);
438 } 403 }
439 404
440 return delta; 405 return vslice;
441} 406}
442 407
443/* 408/*
@@ -454,7 +419,11 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
454 419
455 curr->sum_exec_runtime += delta_exec; 420 curr->sum_exec_runtime += delta_exec;
456 schedstat_add(cfs_rq, exec_clock, delta_exec); 421 schedstat_add(cfs_rq, exec_clock, delta_exec);
457 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 422 delta_exec_weighted = delta_exec;
423 if (unlikely(curr->load.weight != NICE_0_LOAD)) {
424 delta_exec_weighted = calc_delta_fair(delta_exec_weighted,
425 &curr->load);
426 }
458 curr->vruntime += delta_exec_weighted; 427 curr->vruntime += delta_exec_weighted;
459} 428}
460 429
@@ -661,17 +630,8 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
661 630
662 if (!initial) { 631 if (!initial) {
663 /* sleeps upto a single latency don't count. */ 632 /* sleeps upto a single latency don't count. */
664 if (sched_feat(NEW_FAIR_SLEEPERS)) { 633 if (sched_feat(NEW_FAIR_SLEEPERS))
665 unsigned long thresh = sysctl_sched_latency; 634 vruntime -= sysctl_sched_latency;
666
667 /*
668 * convert the sleeper threshold into virtual time
669 */
670 if (sched_feat(NORMALIZED_SLEEPER))
671 thresh = calc_delta_fair(thresh, se);
672
673 vruntime -= thresh;
674 }
675 635
676 /* ensure we never gain time by being placed backwards. */ 636 /* ensure we never gain time by being placed backwards. */
677 vruntime = max_vruntime(se->vruntime, vruntime); 637 vruntime = max_vruntime(se->vruntime, vruntime);
@@ -1169,10 +1129,11 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1169 unsigned long gran = sysctl_sched_wakeup_granularity; 1129 unsigned long gran = sysctl_sched_wakeup_granularity;
1170 1130
1171 /* 1131 /*
1172 * More easily preempt - nice tasks, while not making it harder for 1132 * More easily preempt - nice tasks, while not making
1173 * + nice tasks. 1133 * it harder for + nice tasks.
1174 */ 1134 */
1175 gran = calc_delta_asym(sysctl_sched_wakeup_granularity, se); 1135 if (unlikely(se->load.weight > NICE_0_LOAD))
1136 gran = calc_delta_fair(gran, &se->load);
1176 1137
1177 return gran; 1138 return gran;
1178} 1139}