aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-05-29 05:28:57 -0400
committerIngo Molnar <mingo@elte.hu>2008-05-29 05:28:57 -0400
commit6363ca57c76b7b83639ca8c83fc285fa26a7880e (patch)
treeb8630b4af286409efdd648920a546fae24d4db88 /kernel/sched_fair.c
parent4285f594f84d1f0641fc962d00e6638dec4a19c4 (diff)
revert ("sched: fair-group: SMP-nice for group scheduling")
Yanmin Zhang reported: Comparing with 2.6.25, volanoMark has big regression with kernel 2.6.26-rc1. It's about 50% on my 8-core stoakley, 16-core tigerton, and Itanium Montecito. With bisect, I located the following patch: | 18d95a2832c1392a2d63227a7a6d433cb9f2037e is first bad commit | commit 18d95a2832c1392a2d63227a7a6d433cb9f2037e | Author: Peter Zijlstra <a.p.zijlstra@chello.nl> | Date: Sat Apr 19 19:45:00 2008 +0200 | | sched: fair-group: SMP-nice for group scheduling Revert it so that we get v2.6.25 behavior. Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c124
1 files changed, 44 insertions, 80 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 0eb0ae879542..f0f25fc12d0a 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
510 * Scheduling class queueing methods: 510 * Scheduling class queueing methods:
511 */ 511 */
512 512
513#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
514static void
515add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
516{
517 cfs_rq->task_weight += weight;
518}
519#else
520static inline void
521add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
522{
523}
524#endif
525
526static void 513static void
527account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 514account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
528{ 515{
529 update_load_add(&cfs_rq->load, se->load.weight); 516 update_load_add(&cfs_rq->load, se->load.weight);
530 if (!parent_entity(se))
531 inc_cpu_load(rq_of(cfs_rq), se->load.weight);
532 if (entity_is_task(se))
533 add_cfs_task_weight(cfs_rq, se->load.weight);
534 cfs_rq->nr_running++; 517 cfs_rq->nr_running++;
535 se->on_rq = 1; 518 se->on_rq = 1;
536 list_add(&se->group_node, &cfs_rq->tasks); 519 list_add(&se->group_node, &cfs_rq->tasks);
@@ -540,10 +523,6 @@ static void
540account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) 523account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
541{ 524{
542 update_load_sub(&cfs_rq->load, se->load.weight); 525 update_load_sub(&cfs_rq->load, se->load.weight);
543 if (!parent_entity(se))
544 dec_cpu_load(rq_of(cfs_rq), se->load.weight);
545 if (entity_is_task(se))
546 add_cfs_task_weight(cfs_rq, -se->load.weight);
547 cfs_rq->nr_running--; 526 cfs_rq->nr_running--;
548 se->on_rq = 0; 527 se->on_rq = 0;
549 list_del_init(&se->group_node); 528 list_del_init(&se->group_node);
@@ -1327,90 +1306,75 @@ static struct task_struct *load_balance_next_fair(void *arg)
1327 return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); 1306 return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator);
1328} 1307}
1329 1308
1330static unsigned long 1309#ifdef CONFIG_FAIR_GROUP_SCHED
1331__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1310static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
1332 unsigned long max_load_move, struct sched_domain *sd,
1333 enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
1334 struct cfs_rq *cfs_rq)
1335{ 1311{
1336 struct rq_iterator cfs_rq_iterator; 1312 struct sched_entity *curr;
1313 struct task_struct *p;
1337 1314
1338 cfs_rq_iterator.start = load_balance_start_fair; 1315 if (!cfs_rq->nr_running || !first_fair(cfs_rq))
1339 cfs_rq_iterator.next = load_balance_next_fair; 1316 return MAX_PRIO;
1340 cfs_rq_iterator.arg = cfs_rq; 1317
1318 curr = cfs_rq->curr;
1319 if (!curr)
1320 curr = __pick_next_entity(cfs_rq);
1341 1321
1342 return balance_tasks(this_rq, this_cpu, busiest, 1322 p = task_of(curr);
1343 max_load_move, sd, idle, all_pinned, 1323
1344 this_best_prio, &cfs_rq_iterator); 1324 return p->prio;
1345} 1325}
1326#endif
1346 1327
1347#ifdef CONFIG_FAIR_GROUP_SCHED
1348static unsigned long 1328static unsigned long
1349load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1329load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1350 unsigned long max_load_move, 1330 unsigned long max_load_move,
1351 struct sched_domain *sd, enum cpu_idle_type idle, 1331 struct sched_domain *sd, enum cpu_idle_type idle,
1352 int *all_pinned, int *this_best_prio) 1332 int *all_pinned, int *this_best_prio)
1353{ 1333{
1334 struct cfs_rq *busy_cfs_rq;
1354 long rem_load_move = max_load_move; 1335 long rem_load_move = max_load_move;
1355 int busiest_cpu = cpu_of(busiest); 1336 struct rq_iterator cfs_rq_iterator;
1356 struct task_group *tg;
1357
1358 rcu_read_lock();
1359 list_for_each_entry(tg, &task_groups, list) {
1360 long imbalance;
1361 unsigned long this_weight, busiest_weight;
1362 long rem_load, max_load, moved_load;
1363
1364 /*
1365 * empty group
1366 */
1367 if (!aggregate(tg, sd)->task_weight)
1368 continue;
1369
1370 rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
1371 rem_load /= aggregate(tg, sd)->load + 1;
1372
1373 this_weight = tg->cfs_rq[this_cpu]->task_weight;
1374 busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
1375 1337
1376 imbalance = (busiest_weight - this_weight) / 2; 1338 cfs_rq_iterator.start = load_balance_start_fair;
1339 cfs_rq_iterator.next = load_balance_next_fair;
1377 1340
1378 if (imbalance < 0) 1341 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1379 imbalance = busiest_weight; 1342#ifdef CONFIG_FAIR_GROUP_SCHED
1343 struct cfs_rq *this_cfs_rq;
1344 long imbalance;
1345 unsigned long maxload;
1380 1346
1381 max_load = max(rem_load, imbalance); 1347 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
1382 moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
1383 max_load, sd, idle, all_pinned, this_best_prio,
1384 tg->cfs_rq[busiest_cpu]);
1385 1348
1386 if (!moved_load) 1349 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
1350 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
1351 if (imbalance <= 0)
1387 continue; 1352 continue;
1388 1353
1389 move_group_shares(tg, sd, busiest_cpu, this_cpu); 1354 /* Don't pull more than imbalance/2 */
1355 imbalance /= 2;
1356 maxload = min(rem_load_move, imbalance);
1390 1357
1391 moved_load *= aggregate(tg, sd)->load; 1358 *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
1392 moved_load /= aggregate(tg, sd)->rq_weight + 1; 1359#else
1360# define maxload rem_load_move
1361#endif
1362 /*
1363 * pass busy_cfs_rq argument into
1364 * load_balance_[start|next]_fair iterators
1365 */
1366 cfs_rq_iterator.arg = busy_cfs_rq;
1367 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1368 maxload, sd, idle, all_pinned,
1369 this_best_prio,
1370 &cfs_rq_iterator);
1393 1371
1394 rem_load_move -= moved_load; 1372 if (rem_load_move <= 0)
1395 if (rem_load_move < 0)
1396 break; 1373 break;
1397 } 1374 }
1398 rcu_read_unlock();
1399 1375
1400 return max_load_move - rem_load_move; 1376 return max_load_move - rem_load_move;
1401} 1377}
1402#else
1403static unsigned long
1404load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1405 unsigned long max_load_move,
1406 struct sched_domain *sd, enum cpu_idle_type idle,
1407 int *all_pinned, int *this_best_prio)
1408{
1409 return __load_balance_fair(this_rq, this_cpu, busiest,
1410 max_load_move, sd, idle, all_pinned,
1411 this_best_prio, &busiest->cfs);
1412}
1413#endif
1414 1378
1415static int 1379static int
1416move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1380move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,