sched: Add support for unthrottling group entities

At the start of each period we refresh the global bandwidth pool. At this time we must also unthrottle any cfs_rq entities who are now within bandwidth once more (as quota permits). Unthrottled entities have their corresponding cfs_rq->throttled flag cleared and their entities re-enqueued. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20110721184757.574628950@google.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Paul Turner <pjt@google.com> 2011-07-21 12:43:34 -0400
committer: Ingo Molnar <mingo@elte.hu> 2011-08-14 06:03:36 -0400
commit: 671fd9dabe5239ad218c7eb48b2b9edee50250e6 (patch)
tree: 351f59453eb699661bd811210f24d8b7fd554ca4 /kernel/sched_fair.c
parent: 85dac906bec3bb41bfaa7ccaa65c4706de5cfdf8 (diff)
1 files changed, 123 insertions, 4 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 72c9d4ed5991..76411950ff3b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
        raw_spin_unlock(&cfs_b->lock);
 }
+static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
+{
+        struct rq *rq = rq_of(cfs_rq);
+        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+        struct sched_entity *se;
+        int enqueue = 1;
+        long task_delta;
+        se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
+        cfs_rq->throttled = 0;
+        raw_spin_lock(&cfs_b->lock);
+        list_del_rcu(&cfs_rq->throttled_list);
+        raw_spin_unlock(&cfs_b->lock);
+        if (!cfs_rq->load.weight)
+                return;
+        task_delta = cfs_rq->h_nr_running;
+        for_each_sched_entity(se) {
+                if (se->on_rq)
+                        enqueue = 0;
+                cfs_rq = cfs_rq_of(se);
+                if (enqueue)
+                        enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
+                cfs_rq->h_nr_running += task_delta;
+                if (cfs_rq_throttled(cfs_rq))
+                        break;
+        }
+        if (!se)
+                rq->nr_running += task_delta;
+        /* determine whether we need to wake up potentially idle cpu */
+        if (rq->curr == rq->idle && rq->cfs.nr_running)
+                resched_task(rq->curr);
+}
+static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
+                u64 remaining, u64 expires)
+{
+        struct cfs_rq *cfs_rq;
+        u64 runtime = remaining;
+        rcu_read_lock();
+        list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
+                                throttled_list) {
+                struct rq *rq = rq_of(cfs_rq);
+                raw_spin_lock(&rq->lock);
+                if (!cfs_rq_throttled(cfs_rq))
+                        goto next;
+                runtime = -cfs_rq->runtime_remaining + 1;
+                if (runtime > remaining)
+                        runtime = remaining;
+                remaining -= runtime;
+                cfs_rq->runtime_remaining += runtime;
+                cfs_rq->runtime_expires = expires;
+                /* we check whether we're throttled above */
+                if (cfs_rq->runtime_remaining > 0)
+                        unthrottle_cfs_rq(cfs_rq);
+next:
+                raw_spin_unlock(&rq->lock);
+                if (!remaining)
+                        break;
+        }
+        rcu_read_unlock();
+        return remaining;
+}
 /*
 * Responsible for refilling a task_group's bandwidth and unthrottling its
 * cfs_rqs as appropriate. If there has been no activity within the last
@@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
 */
 static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
 {
-        int idle = 1;
+        u64 runtime, runtime_expires;
+        int idle = 1, throttled;
        raw_spin_lock(&cfs_b->lock);
        /* no need to continue the timer with no bandwidth constraint */
        if (cfs_b->quota == RUNTIME_INF)
                goto out_unlock;
-        idle = cfs_b->idle;
+        throttled = !list_empty(&cfs_b->throttled_cfs_rq);
+        /* idle depends on !throttled (for the case of a large deficit) */
+        idle = cfs_b->idle && !throttled;
        /* if we're going inactive then everything else can be deferred */
        if (idle)
                goto out_unlock;
        __refill_cfs_bandwidth_runtime(cfs_b);
+        if (!throttled) {
+                /* mark as potentially idle for the upcoming period */
+                cfs_b->idle = 1;
+                goto out_unlock;
+        }
+        /*
+         * There are throttled entities so we must first use the new bandwidth
+         * to unthrottle them before making it generally available.  This
+         * ensures that all existing debts will be paid before a new cfs_rq is
+         * allowed to run.
+         */
+        runtime = cfs_b->runtime;
+        runtime_expires = cfs_b->runtime_expires;
+        cfs_b->runtime = 0;
+        /*
+         * This check is repeated as we are holding onto the new bandwidth
+         * while we unthrottle.  This can potentially race with an unthrottled
+         * group trying to acquire new bandwidth from the global pool.
+         */
+        while (throttled && runtime > 0) {
+                raw_spin_unlock(&cfs_b->lock);
+                /* we can't nest cfs_b->lock while distributing bandwidth */
+                runtime = distribute_cfs_runtime(cfs_b, runtime,
+                                                 runtime_expires);
+                raw_spin_lock(&cfs_b->lock);
+                throttled = !list_empty(&cfs_b->throttled_cfs_rq);
+        }
-        /* mark as potentially idle for the upcoming period */
+        /* return (any) remaining runtime */
-        cfs_b->idle = 1;
+        cfs_b->runtime = runtime;
+        /*
+         * While we are ensured activity in the period following an
+         * unthrottle, this also covers the case in which the new bandwidth is
+         * insufficient to cover the existing bandwidth deficit.  (Forcing the
+         * timer to remain active while there are any throttled entities.)
+         */
+        cfs_b->idle = 0;
 out_unlock:
        if (idle)
                cfs_b->timer_active = 0;
author	Paul Turner <pjt@google.com>	2011-07-21 12:43:34 -0400
committer	Ingo Molnar <mingo@elte.hu>	2011-08-14 06:03:36 -0400
commit	671fd9dabe5239ad218c7eb48b2b9edee50250e6 (patch)
tree	351f59453eb699661bd811210f24d8b7fd554ca4 /kernel/sched_fair.c
parent	85dac906bec3bb41bfaa7ccaa65c4706de5cfdf8 (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 72c9d4ed5991..76411950ff3b 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -1439,6 +1439,84 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1439	raw_spin_unlock(&cfs_b->lock);	1439	raw_spin_unlock(&cfs_b->lock);
1440	}	1440	}
1441		1441
		1442	static void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
		1443	{
		1444	struct rq *rq = rq_of(cfs_rq);
		1445	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
		1446	struct sched_entity *se;
		1447	int enqueue = 1;
		1448	long task_delta;
		1449
		1450	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
		1451
		1452	cfs_rq->throttled = 0;
		1453	raw_spin_lock(&cfs_b->lock);
		1454	list_del_rcu(&cfs_rq->throttled_list);
		1455	raw_spin_unlock(&cfs_b->lock);
		1456
		1457	if (!cfs_rq->load.weight)
		1458	return;
		1459
		1460	task_delta = cfs_rq->h_nr_running;
		1461	for_each_sched_entity(se) {
		1462	if (se->on_rq)
		1463	enqueue = 0;
		1464
		1465	cfs_rq = cfs_rq_of(se);
		1466	if (enqueue)
		1467	enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
		1468	cfs_rq->h_nr_running += task_delta;
		1469
		1470	if (cfs_rq_throttled(cfs_rq))
		1471	break;
		1472	}
		1473
		1474	if (!se)
		1475	rq->nr_running += task_delta;
		1476
		1477	/* determine whether we need to wake up potentially idle cpu */
		1478	if (rq->curr == rq->idle && rq->cfs.nr_running)
		1479	resched_task(rq->curr);
		1480	}
		1481
		1482	static u64 distribute_cfs_runtime(struct cfs_bandwidth *cfs_b,
		1483	u64 remaining, u64 expires)
		1484	{
		1485	struct cfs_rq *cfs_rq;
		1486	u64 runtime = remaining;
		1487
		1488	rcu_read_lock();
		1489	list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
		1490	throttled_list) {
		1491	struct rq *rq = rq_of(cfs_rq);
		1492
		1493	raw_spin_lock(&rq->lock);
		1494	if (!cfs_rq_throttled(cfs_rq))
		1495	goto next;
		1496
		1497	runtime = -cfs_rq->runtime_remaining + 1;
		1498	if (runtime > remaining)
		1499	runtime = remaining;
		1500	remaining -= runtime;
		1501
		1502	cfs_rq->runtime_remaining += runtime;
		1503	cfs_rq->runtime_expires = expires;
		1504
		1505	/* we check whether we're throttled above */
		1506	if (cfs_rq->runtime_remaining > 0)
		1507	unthrottle_cfs_rq(cfs_rq);
		1508
		1509	next:
		1510	raw_spin_unlock(&rq->lock);
		1511
		1512	if (!remaining)
		1513	break;
		1514	}
		1515	rcu_read_unlock();
		1516
		1517	return remaining;
		1518	}
		1519
1442	/*	1520	/*
1443	* Responsible for refilling a task_group's bandwidth and unthrottling its	1521	* Responsible for refilling a task_group's bandwidth and unthrottling its
1444	* cfs_rqs as appropriate. If there has been no activity within the last	1522	* cfs_rqs as appropriate. If there has been no activity within the last
@@ -1447,23 +1525,64 @@ static __used void throttle_cfs_rq(struct cfs_rq *cfs_rq)
1447	*/	1525	*/
1448	static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)	1526	static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
1449	{	1527	{
1450	int idle = 1;	1528	u64 runtime, runtime_expires;
		1529	int idle = 1, throttled;
1451		1530
1452	raw_spin_lock(&cfs_b->lock);	1531	raw_spin_lock(&cfs_b->lock);
1453	/* no need to continue the timer with no bandwidth constraint */	1532	/* no need to continue the timer with no bandwidth constraint */
1454	if (cfs_b->quota == RUNTIME_INF)	1533	if (cfs_b->quota == RUNTIME_INF)
1455	goto out_unlock;	1534	goto out_unlock;
1456		1535
1457	idle = cfs_b->idle;	1536	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
		1537	/* idle depends on !throttled (for the case of a large deficit) */
		1538	idle = cfs_b->idle && !throttled;
		1539
1458	/* if we're going inactive then everything else can be deferred */	1540	/* if we're going inactive then everything else can be deferred */
1459	if (idle)	1541	if (idle)
1460	goto out_unlock;	1542	goto out_unlock;
1461		1543
1462	__refill_cfs_bandwidth_runtime(cfs_b);	1544	__refill_cfs_bandwidth_runtime(cfs_b);
1463		1545
		1546	if (!throttled) {
		1547	/* mark as potentially idle for the upcoming period */
		1548	cfs_b->idle = 1;
		1549	goto out_unlock;
		1550	}
		1551
		1552	/*
		1553	* There are throttled entities so we must first use the new bandwidth
		1554	* to unthrottle them before making it generally available. This
		1555	* ensures that all existing debts will be paid before a new cfs_rq is
		1556	* allowed to run.
		1557	*/
		1558	runtime = cfs_b->runtime;
		1559	runtime_expires = cfs_b->runtime_expires;
		1560	cfs_b->runtime = 0;
		1561
		1562	/*
		1563	* This check is repeated as we are holding onto the new bandwidth
		1564	* while we unthrottle. This can potentially race with an unthrottled
		1565	* group trying to acquire new bandwidth from the global pool.
		1566	*/
		1567	while (throttled && runtime > 0) {
		1568	raw_spin_unlock(&cfs_b->lock);
		1569	/* we can't nest cfs_b->lock while distributing bandwidth */
		1570	runtime = distribute_cfs_runtime(cfs_b, runtime,
		1571	runtime_expires);
		1572	raw_spin_lock(&cfs_b->lock);
		1573
		1574	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
		1575	}
1464		1576
1465	/* mark as potentially idle for the upcoming period */	1577	/* return (any) remaining runtime */
1466	cfs_b->idle = 1;	1578	cfs_b->runtime = runtime;
		1579	/*
		1580	* While we are ensured activity in the period following an
		1581	* unthrottle, this also covers the case in which the new bandwidth is
		1582	* insufficient to cover the existing bandwidth deficit. (Forcing the
		1583	* timer to remain active while there are any throttled entities.)
		1584	*/
		1585	cfs_b->idle = 0;
1467	out_unlock:	1586	out_unlock:
1468	if (idle)	1587	if (idle)
1469	cfs_b->timer_active = 0;	1588	cfs_b->timer_active = 0;