1 files changed, 81 insertions, 9 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index af73a8a85eef..9d1adbd0b615 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1272,11 +1272,30 @@ static inline u64 sched_cfs_bandwidth_slice(void)
        return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
 }
+/*
+ * Replenish runtime according to assigned quota and update expiration time.
+ * We use sched_clock_cpu directly instead of rq->clock to avoid adding
+ * additional synchronization around rq->lock.
+ *
+ * requires cfs_b->lock
+ */
+static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
+{
+        u64 now;
+        if (cfs_b->quota == RUNTIME_INF)
+                return;
+        now = sched_clock_cpu(smp_processor_id());
+        cfs_b->runtime = cfs_b->quota;
+        cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
+}
 static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
        struct task_group *tg = cfs_rq->tg;
        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
-        u64 amount = 0, min_amount;
+        u64 amount = 0, min_amount, expires;
        /* note: this is a positive sum as runtime_remaining <= 0 */
        min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -1285,9 +1304,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
        if (cfs_b->quota == RUNTIME_INF)
                amount = min_amount;
        else {
-                /* ensure bandwidth timer remains active under consumption */
+                /*
-                if (!cfs_b->timer_active)
+                 * If the bandwidth pool has become inactive, then at least one
+                 * period must have elapsed since the last consumption.
+                 * Refresh the global state and ensure bandwidth timer becomes
+                 * active.
+                 */
+                if (!cfs_b->timer_active) {
+                        __refill_cfs_bandwidth_runtime(cfs_b);
                        __start_cfs_bandwidth(cfs_b);
+                }
                if (cfs_b->runtime > 0) {
                        amount = min(cfs_b->runtime, min_amount);
@@ -1295,19 +1321,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
                        cfs_b->idle = 0;
                }
        }
+        expires = cfs_b->runtime_expires;
        raw_spin_unlock(&cfs_b->lock);
        cfs_rq->runtime_remaining += amount;
+        /*
+         * we may have advanced our local expiration to account for allowed
+         * spread between our sched_clock and the one on which runtime was
+         * issued.
+         */
+        if ((s64)(expires - cfs_rq->runtime_expires) > 0)
+                cfs_rq->runtime_expires = expires;
 }
-static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+/*
-                                     unsigned long delta_exec)
+ * Note: This depends on the synchronization provided by sched_clock and the
+ * fact that rq->clock snapshots this value.
+ */
+static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
-        if (!cfs_rq->runtime_enabled)
+        struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+        struct rq *rq = rq_of(cfs_rq);
+        /* if the deadline is ahead of our clock, nothing to do */
+        if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
+                return;
+        if (cfs_rq->runtime_remaining < 0)
                return;
+        /*
+         * If the local deadline has passed we have to consider the
+         * possibility that our sched_clock is 'fast' and the global deadline
+         * has not truly expired.
+         *
+         * Fortunately we can check determine whether this the case by checking
+         * whether the global deadline has advanced.
+         */
+        if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
+                /* extend local deadline, drift is bounded above by 2 ticks */
+                cfs_rq->runtime_expires += TICK_NSEC;
+        } else {
+                /* global deadline is ahead, expiration has passed */
+                cfs_rq->runtime_remaining = 0;
+        }
+}
+static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+                                     unsigned long delta_exec)
+{
+        /* dock delta_exec before expiring quota (as it could span periods) */
        cfs_rq->runtime_remaining -= delta_exec;
-        if (cfs_rq->runtime_remaining > 0)
+        expire_cfs_rq_runtime(cfs_rq);
+        if (likely(cfs_rq->runtime_remaining > 0))
                return;
        assign_cfs_rq_runtime(cfs_rq);
@@ -1338,7 +1406,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
                goto out_unlock;
        idle = cfs_b->idle;
-        cfs_b->runtime = cfs_b->quota;
+        /* if we're going inactive then everything else can be deferred */
+        if (idle)
+                goto out_unlock;
+        __refill_cfs_bandwidth_runtime(cfs_b);
        /* mark as potentially idle for the upcoming period */
        cfs_b->idle = 1;
@@ -1557,7 +1630,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
        return wl;
 }
 #else
 static inline unsigned long effective_load(struct task_group *tg, int cpu,

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index af73a8a85eef..9d1adbd0b615 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -1272,11 +1272,30 @@ static inline u64 sched_cfs_bandwidth_slice(void)
1272	return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;	1272	return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
1273	}	1273	}
1274		1274
		1275	/*
		1276	* Replenish runtime according to assigned quota and update expiration time.
		1277	* We use sched_clock_cpu directly instead of rq->clock to avoid adding
		1278	* additional synchronization around rq->lock.
		1279	*
		1280	* requires cfs_b->lock
		1281	*/
		1282	static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
		1283	{
		1284	u64 now;
		1285
		1286	if (cfs_b->quota == RUNTIME_INF)
		1287	return;
		1288
		1289	now = sched_clock_cpu(smp_processor_id());
		1290	cfs_b->runtime = cfs_b->quota;
		1291	cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
		1292	}
		1293
1275	static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)	1294	static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1276	{	1295	{
1277	struct task_group *tg = cfs_rq->tg;	1296	struct task_group *tg = cfs_rq->tg;
1278	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);	1297	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
1279	u64 amount = 0, min_amount;	1298	u64 amount = 0, min_amount, expires;
1280		1299
1281	/* note: this is a positive sum as runtime_remaining <= 0 */	1300	/* note: this is a positive sum as runtime_remaining <= 0 */
1282	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;	1301	min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -1285,9 +1304,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1285	if (cfs_b->quota == RUNTIME_INF)	1304	if (cfs_b->quota == RUNTIME_INF)
1286	amount = min_amount;	1305	amount = min_amount;
1287	else {	1306	else {
1288	/* ensure bandwidth timer remains active under consumption */	1307	/*
1289	if (!cfs_b->timer_active)	1308	* If the bandwidth pool has become inactive, then at least one
		1309	* period must have elapsed since the last consumption.
		1310	* Refresh the global state and ensure bandwidth timer becomes
		1311	* active.
		1312	*/
		1313	if (!cfs_b->timer_active) {
		1314	__refill_cfs_bandwidth_runtime(cfs_b);
1290	__start_cfs_bandwidth(cfs_b);	1315	__start_cfs_bandwidth(cfs_b);
		1316	}
1291		1317
1292	if (cfs_b->runtime > 0) {	1318	if (cfs_b->runtime > 0) {
1293	amount = min(cfs_b->runtime, min_amount);	1319	amount = min(cfs_b->runtime, min_amount);
@@ -1295,19 +1321,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1295	cfs_b->idle = 0;	1321	cfs_b->idle = 0;
1296	}	1322	}
1297	}	1323	}
		1324	expires = cfs_b->runtime_expires;
1298	raw_spin_unlock(&cfs_b->lock);	1325	raw_spin_unlock(&cfs_b->lock);
1299		1326
1300	cfs_rq->runtime_remaining += amount;	1327	cfs_rq->runtime_remaining += amount;
		1328	/*
		1329	* we may have advanced our local expiration to account for allowed
		1330	* spread between our sched_clock and the one on which runtime was
		1331	* issued.
		1332	*/
		1333	if ((s64)(expires - cfs_rq->runtime_expires) > 0)
		1334	cfs_rq->runtime_expires = expires;
1301	}	1335	}
1302		1336
1303	static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,	1337	/*
1304	unsigned long delta_exec)	1338	* Note: This depends on the synchronization provided by sched_clock and the
		1339	* fact that rq->clock snapshots this value.
		1340	*/
		1341	static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
1305	{	1342	{
1306	if (!cfs_rq->runtime_enabled)	1343	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
		1344	struct rq *rq = rq_of(cfs_rq);
		1345
		1346	/* if the deadline is ahead of our clock, nothing to do */
		1347	if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
		1348	return;
		1349
		1350	if (cfs_rq->runtime_remaining < 0)
1307	return;	1351	return;
1308		1352
		1353	/*
		1354	* If the local deadline has passed we have to consider the
		1355	* possibility that our sched_clock is 'fast' and the global deadline
		1356	* has not truly expired.
		1357	*
		1358	* Fortunately we can check determine whether this the case by checking
		1359	* whether the global deadline has advanced.
		1360	*/
		1361
		1362	if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
		1363	/* extend local deadline, drift is bounded above by 2 ticks */
		1364	cfs_rq->runtime_expires += TICK_NSEC;
		1365	} else {
		1366	/* global deadline is ahead, expiration has passed */
		1367	cfs_rq->runtime_remaining = 0;
		1368	}
		1369	}
		1370
		1371	static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
		1372	unsigned long delta_exec)
		1373	{
		1374	/* dock delta_exec before expiring quota (as it could span periods) */
1309	cfs_rq->runtime_remaining -= delta_exec;	1375	cfs_rq->runtime_remaining -= delta_exec;
1310	if (cfs_rq->runtime_remaining > 0)	1376	expire_cfs_rq_runtime(cfs_rq);
		1377
		1378	if (likely(cfs_rq->runtime_remaining > 0))
1311	return;	1379	return;
1312		1380
1313	assign_cfs_rq_runtime(cfs_rq);	1381	assign_cfs_rq_runtime(cfs_rq);
@@ -1338,7 +1406,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
1338	goto out_unlock;	1406	goto out_unlock;
1339		1407
1340	idle = cfs_b->idle;	1408	idle = cfs_b->idle;
1341	cfs_b->runtime = cfs_b->quota;	1409	/* if we're going inactive then everything else can be deferred */
		1410	if (idle)
		1411	goto out_unlock;
		1412
		1413	__refill_cfs_bandwidth_runtime(cfs_b);
		1414
1342		1415
1343	/* mark as potentially idle for the upcoming period */	1416	/* mark as potentially idle for the upcoming period */
1344	cfs_b->idle = 1;	1417	cfs_b->idle = 1;
@@ -1557,7 +1630,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
1557		1630
1558	return wl;	1631	return wl;
1559	}	1632	}
1560
1561	#else	1633	#else
1562		1634
1563	static inline unsigned long effective_load(struct task_group *tg, int cpu,	1635	static inline unsigned long effective_load(struct task_group *tg, int cpu,