aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-04-19 13:45:00 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:45:00 -0400
commit18d95a2832c1392a2d63227a7a6d433cb9f2037e (patch)
treefa85b700aa3caac5b1309edd8e31d9b957957a83 /kernel/sched_fair.c
parent1d3504fcf5606579d60b649d19f44b3871c1ddae (diff)
sched: fair-group: SMP-nice for group scheduling
Implement SMP nice support for the full group hierarchy. On each load-balance action, compile a sched_domain wide view of the full task_group tree. We compute the domain wide view when walking down the hierarchy, and readjust the weights when walking back up. After collecting and readjusting the domain wide view, we try to balance the tasks within the task_groups. The current approach is a naively balance each task group until we've moved the targeted amount of load. Inspired by Srivatsa Vaddsgiri's previous code and Abhishek Chandra's H-SMP paper. XXX: there will be some numerical issues due to the limited nature of SCHED_LOAD_SCALE wrt to representing a task_groups influence on the total weight. When the tree is deep enough, or the task weight small enough, we'll run out of bits. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> CC: Abhishek Chandra <chandra@cs.umn.edu> CC: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c124
1 files changed, 80 insertions, 44 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b43748efaa7f..b89fec93a237 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -492,10 +492,27 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
492 * Scheduling class queueing methods: 492 * Scheduling class queueing methods:
493 */ 493 */
494 494
495#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED
496static void
497add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
498{
499 cfs_rq->task_weight += weight;
500}
501#else
502static inline void
503add_cfs_task_weight(struct cfs_rq *cfs_rq, unsigned long weight)
504{
505}
506#endif
507
495static void 508static void
496account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) 509account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
497{ 510{
498 update_load_add(&cfs_rq->load, se->load.weight); 511 update_load_add(&cfs_rq->load, se->load.weight);
512 if (!parent_entity(se))
513 inc_cpu_load(rq_of(cfs_rq), se->load.weight);
514 if (entity_is_task(se))
515 add_cfs_task_weight(cfs_rq, se->load.weight);
499 cfs_rq->nr_running++; 516 cfs_rq->nr_running++;
500 se->on_rq = 1; 517 se->on_rq = 1;
501} 518}
@@ -504,6 +521,10 @@ static void
504account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) 521account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
505{ 522{
506 update_load_sub(&cfs_rq->load, se->load.weight); 523 update_load_sub(&cfs_rq->load, se->load.weight);
524 if (!parent_entity(se))
525 dec_cpu_load(rq_of(cfs_rq), se->load.weight);
526 if (entity_is_task(se))
527 add_cfs_task_weight(cfs_rq, -se->load.weight);
507 cfs_rq->nr_running--; 528 cfs_rq->nr_running--;
508 se->on_rq = 0; 529 se->on_rq = 0;
509} 530}
@@ -1286,75 +1307,90 @@ static struct task_struct *load_balance_next_fair(void *arg)
1286 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); 1307 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
1287} 1308}
1288 1309
1289#ifdef CONFIG_FAIR_GROUP_SCHED 1310static unsigned long
1290static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) 1311__load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1312 unsigned long max_load_move, struct sched_domain *sd,
1313 enum cpu_idle_type idle, int *all_pinned, int *this_best_prio,
1314 struct cfs_rq *cfs_rq)
1291{ 1315{
1292 struct sched_entity *curr; 1316 struct rq_iterator cfs_rq_iterator;
1293 struct task_struct *p;
1294
1295 if (!cfs_rq->nr_running || !first_fair(cfs_rq))
1296 return MAX_PRIO;
1297
1298 curr = cfs_rq->curr;
1299 if (!curr)
1300 curr = __pick_next_entity(cfs_rq);
1301 1317
1302 p = task_of(curr); 1318 cfs_rq_iterator.start = load_balance_start_fair;
1319 cfs_rq_iterator.next = load_balance_next_fair;
1320 cfs_rq_iterator.arg = cfs_rq;
1303 1321
1304 return p->prio; 1322 return balance_tasks(this_rq, this_cpu, busiest,
1323 max_load_move, sd, idle, all_pinned,
1324 this_best_prio, &cfs_rq_iterator);
1305} 1325}
1306#endif
1307 1326
1327#ifdef CONFIG_FAIR_GROUP_SCHED
1308static unsigned long 1328static unsigned long
1309load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1329load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1310 unsigned long max_load_move, 1330 unsigned long max_load_move,
1311 struct sched_domain *sd, enum cpu_idle_type idle, 1331 struct sched_domain *sd, enum cpu_idle_type idle,
1312 int *all_pinned, int *this_best_prio) 1332 int *all_pinned, int *this_best_prio)
1313{ 1333{
1314 struct cfs_rq *busy_cfs_rq;
1315 long rem_load_move = max_load_move; 1334 long rem_load_move = max_load_move;
1316 struct rq_iterator cfs_rq_iterator; 1335 int busiest_cpu = cpu_of(busiest);
1336 struct task_group *tg;
1317 1337
1318 cfs_rq_iterator.start = load_balance_start_fair; 1338 rcu_read_lock();
1319 cfs_rq_iterator.next = load_balance_next_fair; 1339 list_for_each_entry(tg, &task_groups, list) {
1320
1321 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1322#ifdef CONFIG_FAIR_GROUP_SCHED
1323 struct cfs_rq *this_cfs_rq;
1324 long imbalance; 1340 long imbalance;
1325 unsigned long maxload; 1341 unsigned long this_weight, busiest_weight;
1342 long rem_load, max_load, moved_load;
1343
1344 /*
1345 * empty group
1346 */
1347 if (!aggregate(tg, sd)->task_weight)
1348 continue;
1349
1350 rem_load = rem_load_move * aggregate(tg, sd)->rq_weight;
1351 rem_load /= aggregate(tg, sd)->load + 1;
1326 1352
1327 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); 1353 this_weight = tg->cfs_rq[this_cpu]->task_weight;
1354 busiest_weight = tg->cfs_rq[busiest_cpu]->task_weight;
1328 1355
1329 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; 1356 imbalance = (busiest_weight - this_weight) / 2;
1330 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ 1357
1331 if (imbalance <= 0) 1358 if (imbalance < 0)
1359 imbalance = busiest_weight;
1360
1361 max_load = max(rem_load, imbalance);
1362 moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
1363 max_load, sd, idle, all_pinned, this_best_prio,
1364 tg->cfs_rq[busiest_cpu]);
1365
1366 if (!moved_load)
1332 continue; 1367 continue;
1333 1368
1334 /* Don't pull more than imbalance/2 */ 1369 move_group_shares(tg, sd, busiest_cpu, this_cpu);
1335 imbalance /= 2;
1336 maxload = min(rem_load_move, imbalance);
1337 1370
1338 *this_best_prio = cfs_rq_best_prio(this_cfs_rq); 1371 moved_load *= aggregate(tg, sd)->load;
1339#else 1372 moved_load /= aggregate(tg, sd)->rq_weight + 1;
1340# define maxload rem_load_move
1341#endif
1342 /*
1343 * pass busy_cfs_rq argument into
1344 * load_balance_[start|next]_fair iterators
1345 */
1346 cfs_rq_iterator.arg = busy_cfs_rq;
1347 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1348 maxload, sd, idle, all_pinned,
1349 this_best_prio,
1350 &cfs_rq_iterator);
1351 1373
1352 if (rem_load_move <= 0) 1374 rem_load_move -= moved_load;
1375 if (rem_load_move < 0)
1353 break; 1376 break;
1354 } 1377 }
1378 rcu_read_unlock();
1355 1379
1356 return max_load_move - rem_load_move; 1380 return max_load_move - rem_load_move;
1357} 1381}
1382#else
1383static unsigned long
1384load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1385 unsigned long max_load_move,
1386 struct sched_domain *sd, enum cpu_idle_type idle,
1387 int *all_pinned, int *this_best_prio)
1388{
1389 return __load_balance_fair(this_rq, this_cpu, busiest,
1390 max_load_move, sd, idle, all_pinned,
1391 this_best_prio, &busiest->cfs);
1392}
1393#endif
1358 1394
1359static int 1395static int
1360move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1396move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,