aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2011-07-21 12:43:34 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:03:36 -0400
commit671fd9dabe5239ad218c7eb48b2b9edee50250e6 (patch)
tree351f59453eb699661bd811210f24d8b7fd554ca4 /kernel/sched_fair.c
parent85dac906bec3bb41bfaa7ccaa65c4706de5cfdf8 (diff)
sched: Add support for unthrottling group entities
At the start of each period we refresh the global bandwidth pool. At this time we must also unthrottle any cfs_rq entities who are now within bandwidth once more (as quota permits). Unthrottled entities have their corresponding cfs_rq->throttled flag cleared and their entities re-enqueued. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c127
1 files changed, 123 insertions, 4 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72c9d4ed5991..76411950ff3b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1439 raw_spin_unlock(&cfs_b->lock); 1439 raw_spin_unlock(&cfs_b->lock);
1440} 1440}
1441 1441
1442static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
1443{
1444 struct rq *rq = rq_of(cfs_rq);
1445 struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
1446 struct sched_entity *se;
1447 int enqueue = 1;
1448 long task_delta;
1449
1450 se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
1451
1452 cfs_rq->throttled = 0;
1453 raw_spin_lock(&cfs_b->lock);
1454 list_del_rcu(&cfs_rq->throttled_list);
1455 raw_spin_unlock(&cfs_b->lock);
1456
1457 if (!cfs_rq->load.weight)
1458 return;
1459
1460 task_delta = cfs_rq->h_nr_running;
1461 for_each_sched_entity(se) {
1462 if (se->on_rq)
1463 enqueue = 0;
1464
1465 cfs_rq = cfs_rq_of(se);
1466 if (enqueue)
1467 enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
1468 cfs_rq->h_nr_running += task_delta;
1469
1470 if (cfs_rq_throttled(cfs_rq))
1471 break;
1472 }
1473
1474 if (!se)
1475 rq->nr_running += task_delta;
1476
1477 /* determine whether we need to wake up potentially idle cpu */
1478 if (rq->curr == rq->idle && rq->cfs.nr_running)
1479 resched_task(rq->curr);
1480}
1481
1482static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
1483 u64 remaining, u64 expires)
1484{
1485 struct cfs_rq *cfs_rq;
1486 u64 runtime = remaining;
1487
1488 rcu_read_lock();
1489 list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
1490 throttled_list) {
1491 struct rq *rq = rq_of(cfs_rq);
1492
1493 raw_spin_lock(&rq->lock);
1494 if (!cfs_rq_throttled(cfs_rq))
1495 goto next;
1496
1497 runtime = -cfs_rq->runtime_remaining + 1;
1498 if (runtime > remaining)
1499 runtime = remaining;
1500 remaining -= runtime;
1501
1502 cfs_rq->runtime_remaining += runtime;
1503 cfs_rq->runtime_expires = expires;
1504
1505 /* we check whether we're throttled above */
1506 if (cfs_rq->runtime_remaining > 0)
1507 unthrottle_cfs_rq(cfs_rq);
1508
1509next:
1510 raw_spin_unlock(&rq->lock);
1511
1512 if (!remaining)
1513 break;
1514 }
1515 rcu_read_unlock();
1516
1517 return remaining;
1518}
1519
1442/* 1520/*
1443 * Responsible for refilling a task_group's bandwidth and unthrottling its 1521 * Responsible for refilling a task_group's bandwidth and unthrottling its
1444 * cfs_rqs as appropriate. If there has been no activity within the last 1522 * cfs_rqs as appropriate. If there has been no activity within the last
@@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1447 */ 1525 */
1448static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun) 1526static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
1449{ 1527{
1450 int idle = 1; 1528 u64 runtime, runtime_expires;
1529 int idle = 1, throttled;
1451 1530
1452 raw_spin_lock(&cfs_b->lock); 1531 raw_spin_lock(&cfs_b->lock);
1453 /* no need to continue the timer with no bandwidth constraint */ 1532 /* no need to continue the timer with no bandwidth constraint */
1454 if (cfs_b->quota == RUNTIME_INF) 1533 if (cfs_b->quota == RUNTIME_INF)
1455 goto out_unlock; 1534 goto out_unlock;
1456 1535
1457 idle = cfs_b->idle; 1536 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
1537 /* idle depends on !throttled (for the case of a large deficit) */
1538 idle = cfs_b->idle && !throttled;
1539
1458 /* if we're going inactive then everything else can be deferred */ 1540 /* if we're going inactive then everything else can be deferred */
1459 if (idle) 1541 if (idle)
1460 goto out_unlock; 1542 goto out_unlock;
1461 1543
1462 __refill_cfs_bandwidth_runtime(cfs_b); 1544 __refill_cfs_bandwidth_runtime(cfs_b);
1463 1545
1546 if (!throttled) {
1547 /* mark as potentially idle for the upcoming period */
1548 cfs_b->idle = 1;
1549 goto out_unlock;
1550 }
1551
1552 /*
1553 * There are throttled entities so we must first use the new bandwidth
1554 * to unthrottle them before making it generally available. This
1555 * ensures that all existing debts will be paid before a new cfs_rq is
1556 * allowed to run.
1557 */
1558 runtime = cfs_b->runtime;
1559 runtime_expires = cfs_b->runtime_expires;
1560 cfs_b->runtime = 0;
1561
1562 /*
1563 * This check is repeated as we are holding onto the new bandwidth
1564 * while we unthrottle. This can potentially race with an unthrottled
1565 * group trying to acquire new bandwidth from the global pool.
1566 */
1567 while (throttled && runtime > 0) {
1568 raw_spin_unlock(&cfs_b->lock);
1569 /* we can't nest cfs_b->lock while distributing bandwidth */
1570 runtime = distribute_cfs_runtime(cfs_b, runtime,
1571 runtime_expires);
1572 raw_spin_lock(&cfs_b->lock);
1573
1574 throttled = !list_empty(&cfs_b->throttled_cfs_rq);
1575 }
1464 1576
1465 /* mark as potentially idle for the upcoming period */ 1577 /* return (any) remaining runtime */
1466 cfs_b->idle = 1; 1578 cfs_b->runtime = runtime;
1579 /*
1580 * While we are ensured activity in the period following an
1581 * unthrottle, this also covers the case in which the new bandwidth is
1582 * insufficient to cover the existing bandwidth deficit. (Forcing the
1583 * timer to remain active while there are any throttled entities.)
1584 */
1585 cfs_b->idle = 0;
1467out_unlock: 1586out_unlock:
1468 if (idle) 1587 if (idle)
1469 cfs_b->timer_active = 0; 1588 cfs_b->timer_active = 0;