diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:28:57 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-05-29 05:28:57 -0400 |
commit | 6363ca57c76b7b83639ca8c83fc285fa26a7880e (patch) | |
tree | b8630b4af286409efdd648920a546fae24d4db88 /kernel/sched_fair.c | |
parent | 4285f594f84d1f0641fc962d00e6638dec4a19c4 (diff) |
revert ("sched: fair-group: SMP-nice for group scheduling")
Yanmin Zhang reported:
Comparing with 2.6.25, volanoMark has big regression with kernel 2.6.26-rc1.
It's about 50% on my 8-core stoakley, 16-core tigerton, and Itanium Montecito.
With bisect, I located the following patch:
| 18d95a2832c1392a2d63227a7a6d433cb9f2037e is first bad commit
| commit 18d95a2832c1392a2d63227a7a6d433cb9f2037e
| Author: Peter Zijlstra <a.p.zijlstra@chello.nl>
| Date: Sat Apr 19 19:45:00 2008 +0200
|
| sched: fair-group: SMP-nice for group scheduling
Revert it so that we get v2.6.25 behavior.
Bisected-by: Yanmin Zhang <yanmin_zhang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 124 |
1 files changed, 44 insertions, 80 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0eb0ae879542..f0f25fc12d0a 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -510,27 +510,10 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
510 | * Scheduling class queueing methods: | 510 | * Scheduling class queueing methods: |
511 | */ | 511 | */ |
512 | 512 | ||
513 | #if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED | ||
514 | static void | ||
515 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
516 | { | ||
517 | cfs_rq->task_weight += weight; | ||
518 | } | ||
519 | #else | ||
520 | static inline void | ||
521 | add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight) | ||
522 | { | ||
523 | } | ||
524 | #endif | ||
525 | |||
526 | static void | 513 | static void |
527 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 514 | account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
528 | { | 515 | { |
529 | update_load_add(&cfs_rq->load, se->load.weight); | 516 | update_load_add(&cfs_rq->load, se->load.weight); |
530 | if (!parent_entity(se)) | ||
531 | inc_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
532 | if (entity_is_task(se)) | ||
533 | add_cfs_task_weight(cfs_rq, se->load.weight); | ||
534 | cfs_rq->nr_running++; | 517 | cfs_rq->nr_running++; |
535 | se->on_rq = 1; | 518 | se->on_rq = 1; |
536 | list_add(&se->group_node, &cfs_rq->tasks); | 519 | list_add(&se->group_node, &cfs_rq->tasks); |
@@ -540,10 +523,6 @@ static void | |||
540 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) | 523 | account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) |
541 | { | 524 | { |
542 | update_load_sub(&cfs_rq->load, se->load.weight); | 525 | update_load_sub(&cfs_rq->load, se->load.weight); |
543 | if (!parent_entity(se)) | ||
544 | dec_cpu_load(rq_of(cfs_rq), se->load.weight); | ||
545 | if (entity_is_task(se)) | ||
546 | add_cfs_task_weight(cfs_rq, -se->load.weight); | ||
547 | cfs_rq->nr_running--; | 526 | cfs_rq->nr_running--; |
548 | se->on_rq = 0; | 527 | se->on_rq = 0; |
549 | list_del_init(&se->group_node); | 528 | list_del_init(&se->group_node); |
@@ -1327,90 +1306,75 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1327 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); | 1306 | return __load_balance_iterator(cfs_rq, cfs_rq->balance_iterator); |
1328 | } | 1307 | } |
1329 | 1308 | ||
1330 | static unsigned long | 1309 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1331 | __load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1310 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) |
1332 | unsigned long max_load_move, struct sched_domain *sd, | ||
1333 | enum cpu_idle_type idle, int *all_pinned, int *this_best_prio, | ||
1334 | struct cfs_rq *cfs_rq) | ||
1335 | { | 1311 | { |
1336 | struct rq_iterator cfs_rq_iterator; | 1312 | struct sched_entity *curr; |
1313 | struct task_struct *p; | ||
1337 | 1314 | ||
1338 | cfs_rq_iterator.start = load_balance_start_fair; | 1315 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) |
1339 | cfs_rq_iterator.next = load_balance_next_fair; | 1316 | return MAX_PRIO; |
1340 | cfs_rq_iterator.arg = cfs_rq; | 1317 | |
1318 | curr = cfs_rq->curr; | ||
1319 | if (!curr) | ||
1320 | curr = __pick_next_entity(cfs_rq); | ||
1341 | 1321 | ||
1342 | return balance_tasks(this_rq, this_cpu, busiest, | 1322 | p = task_of(curr); |
1343 | max_load_move, sd, idle, all_pinned, | 1323 | |
1344 | this_best_prio, &cfs_rq_iterator); | 1324 | return p->prio; |
1345 | } | 1325 | } |
1326 | #endif | ||
1346 | 1327 | ||
1347 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1348 | static unsigned long | 1328 | static unsigned long |
1349 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1329 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1350 | unsigned long max_load_move, | 1330 | unsigned long max_load_move, |
1351 | struct sched_domain *sd, enum cpu_idle_type idle, | 1331 | struct sched_domain *sd, enum cpu_idle_type idle, |
1352 | int *all_pinned, int *this_best_prio) | 1332 | int *all_pinned, int *this_best_prio) |
1353 | { | 1333 | { |
1334 | struct cfs_rq *busy_cfs_rq; | ||
1354 | long rem_load_move = max_load_move; | 1335 | long rem_load_move = max_load_move; |
1355 | int busiest_cpu = cpu_of(busiest); | 1336 | struct rq_iterator cfs_rq_iterator; |
1356 | struct task_group *tg; | ||
1357 | |||
1358 | rcu_read_lock(); | ||
1359 | list_for_each_entry(tg, &task_groups, list) { | ||
1360 | long imbalance; | ||
1361 | unsigned long this_weight, busiest_weight; | ||
1362 | long rem_load, max_load, moved_load; | ||
1363 | |||
1364 | /* | ||
1365 | * empty group | ||
1366 | */ | ||
1367 | if (!aggregate(tg, sd)->task_weight) | ||
1368 | continue; | ||
1369 | |||
1370 | rem_load = rem_load_move * aggregate(tg, sd)->rq_weight; | ||
1371 | rem_load /= aggregate(tg, sd)->load + 1; | ||
1372 | |||
1373 | this_weight = tg->cfs_rq[this_cpu]->task_weight; | ||
1374 | busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight; | ||
1375 | 1337 | ||
1376 | imbalance = (busiest_weight - this_weight) / 2; | 1338 | cfs_rq_iterator.start = load_balance_start_fair; |
1339 | cfs_rq_iterator.next = load_balance_next_fair; | ||
1377 | 1340 | ||
1378 | if (imbalance < 0) | 1341 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
1379 | imbalance = busiest_weight; | 1342 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1343 | struct cfs_rq *this_cfs_rq; | ||
1344 | long imbalance; | ||
1345 | unsigned long maxload; | ||
1380 | 1346 | ||
1381 | max_load = max(rem_load, imbalance); | 1347 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
1382 | moved_load = __load_balance_fair(this_rq, this_cpu, busiest, | ||
1383 | max_load, sd, idle, all_pinned, this_best_prio, | ||
1384 | tg->cfs_rq[busiest_cpu]); | ||
1385 | 1348 | ||
1386 | if (!moved_load) | 1349 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
1350 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ | ||
1351 | if (imbalance <= 0) | ||
1387 | continue; | 1352 | continue; |
1388 | 1353 | ||
1389 | move_group_shares(tg, sd, busiest_cpu, this_cpu); | 1354 | /* Don't pull more than imbalance/2 */ |
1355 | imbalance /= 2; | ||
1356 | maxload = min(rem_load_move, imbalance); | ||
1390 | 1357 | ||
1391 | moved_load *= aggregate(tg, sd)->load; | 1358 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
1392 | moved_load /= aggregate(tg, sd)->rq_weight + 1; | 1359 | #else |
1360 | # define maxload rem_load_move | ||
1361 | #endif | ||
1362 | /* | ||
1363 | * pass busy_cfs_rq argument into | ||
1364 | * load_balance_[start|next]_fair iterators | ||
1365 | */ | ||
1366 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
1367 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, | ||
1368 | maxload, sd, idle, all_pinned, | ||
1369 | this_best_prio, | ||
1370 | &cfs_rq_iterator); | ||
1393 | 1371 | ||
1394 | rem_load_move -= moved_load; | 1372 | if (rem_load_move <= 0) |
1395 | if (rem_load_move < 0) | ||
1396 | break; | 1373 | break; |
1397 | } | 1374 | } |
1398 | rcu_read_unlock(); | ||
1399 | 1375 | ||
1400 | return max_load_move - rem_load_move; | 1376 | return max_load_move - rem_load_move; |
1401 | } | 1377 | } |
1402 | #else | ||
1403 | static unsigned long | ||
1404 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
1405 | unsigned long max_load_move, | ||
1406 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
1407 | int *all_pinned, int *this_best_prio) | ||
1408 | { | ||
1409 | return __load_balance_fair(this_rq, this_cpu, busiest, | ||
1410 | max_load_move, sd, idle, all_pinned, | ||
1411 | this_best_prio, &busiest->cfs); | ||
1412 | } | ||
1413 | #endif | ||
1414 | 1378 | ||
1415 | static int | 1379 | static int |
1416 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1380 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |