1 files changed, 62 insertions, 8 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index a7e50ba185ac..3816f217f119 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
            int idx, unsigned long load, unsigned long this_load,
            unsigned int imbalance)
 {
+        struct task_struct *curr = this_rq->curr;
+        struct task_group *tg;
        unsigned long tl = this_load;
        unsigned long tl_per_task;
-        struct task_group *tg;
        unsigned long weight;
        int balanced;
        if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
                return 0;
+        if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
+                        p->se.avg_overlap > sysctl_sched_migration_cost))
+                sync = 0;
        /*
         * If sync wakeup then subtract the (maximum possible)
         * effect of the currently running task from the load
@@ -1309,16 +1314,63 @@ out:
 }
 #endif /* CONFIG_SMP */
-static unsigned long wakeup_gran(struct sched_entity *se)
+/*
+ * Adaptive granularity
+ *
+ * se->avg_wakeup gives the average time a task runs until it does a wakeup,
+ * with the limit of wakeup_gran -- when it never does a wakeup.
+ *
+ * So the smaller avg_wakeup is the faster we want this task to preempt,
+ * but we don't want to treat the preemptee unfairly and therefore allow it
+ * to run for at least the amount of time we'd like to run.
+ *
+ * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
+ *
+ * NOTE: we use *nr_running to scale with load, this nicely matches the
+ *       degrading latency on load.
+ */
+static unsigned long
+adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
+{
+        u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
+        u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
+        u64 gran = 0;
+        if (this_run < expected_wakeup)
+                gran = expected_wakeup - this_run;
+        return min_t(s64, gran, sysctl_sched_wakeup_granularity);
+}
+static unsigned long
+wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
 {
        unsigned long gran = sysctl_sched_wakeup_granularity;
+        if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
+                gran = adaptive_gran(curr, se);
        /*
-         * More easily preempt - nice tasks, while not making it harder for
+         * Since its curr running now, convert the gran from real-time
-         * + nice tasks.
+         * to virtual-time in his units.
         */
-        if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
+        if (sched_feat(ASYM_GRAN)) {
-                gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
+                /*
+                 * By using 'se' instead of 'curr' we penalize light tasks, so
+                 * they get preempted easier. That is, if 'se' < 'curr' then
+                 * the resulting gran will be larger, therefore penalizing the
+                 * lighter, if otoh 'se' > 'curr' then the resulting gran will
+                 * be smaller, again penalizing the lighter task.
+                 *
+                 * This is especially important for buddies when the leftmost
+                 * task is higher priority than the buddy.
+                 */
+                if (unlikely(se->load.weight != NICE_0_LOAD))
+                        gran = calc_delta_fair(gran, se);
+        } else {
+                if (unlikely(curr->load.weight != NICE_0_LOAD))
+                        gran = calc_delta_fair(gran, curr);
+        }
        return gran;
 }
@@ -1345,7 +1397,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
        if (vdiff <= 0)
                return -1;
-        gran = wakeup_gran(curr);
+        gran = wakeup_gran(curr, se);
        if (vdiff > gran)
                return 1;
@@ -1426,7 +1478,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
        if (!sched_feat(WAKEUP_PREEMPT))
                return;
-        if (sched_feat(WAKEUP_OVERLAP) && sync) {
+        if (sched_feat(WAKEUP_OVERLAP) && (sync ||
+                        (se->avg_overlap < sysctl_sched_migration_cost &&
+                         pse->avg_overlap < sysctl_sched_migration_cost))) {
                resched_task(curr);
                return;
        }

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a7e50ba185ac..3816f217f119 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain this_sd, struct rq this_rq,
1191	int idx, unsigned long load, unsigned long this_load,	1191	int idx, unsigned long load, unsigned long this_load,
1192	unsigned int imbalance)	1192	unsigned int imbalance)
1193	{	1193	{
		1194	struct task_struct *curr = this_rq->curr;
		1195	struct task_group *tg;
1194	unsigned long tl = this_load;	1196	unsigned long tl = this_load;
1195	unsigned long tl_per_task;	1197	unsigned long tl_per_task;
1196	struct task_group *tg;
1197	unsigned long weight;	1198	unsigned long weight;
1198	int balanced;	1199	int balanced;
1199		1200
1200	if (!(this_sd->flags & SD_WAKE_AFFINE) \|\| !sched_feat(AFFINE_WAKEUPS))	1201	if (!(this_sd->flags & SD_WAKE_AFFINE) \|\| !sched_feat(AFFINE_WAKEUPS))
1201	return 0;	1202	return 0;
1202		1203
		1204	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost \|\|
		1205	p->se.avg_overlap > sysctl_sched_migration_cost))
		1206	sync = 0;
		1207
1203	/*	1208	/*
1204	* If sync wakeup then subtract the (maximum possible)	1209	* If sync wakeup then subtract the (maximum possible)
1205	* effect of the currently running task from the load	1210	* effect of the currently running task from the load
@@ -1309,16 +1314,63 @@ out:
1309	}	1314	}
1310	#endif /* CONFIG_SMP */	1315	#endif /* CONFIG_SMP */
1311		1316
1312	static unsigned long wakeup_gran(struct sched_entity *se)	1317	/*
		1318	* Adaptive granularity
		1319	*
		1320	* se->avg_wakeup gives the average time a task runs until it does a wakeup,
		1321	* with the limit of wakeup_gran -- when it never does a wakeup.
		1322	*
		1323	* So the smaller avg_wakeup is the faster we want this task to preempt,
		1324	* but we don't want to treat the preemptee unfairly and therefore allow it
		1325	* to run for at least the amount of time we'd like to run.
		1326	*
		1327	* NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
		1328	*
		1329	* NOTE: we use *nr_running to scale with load, this nicely matches the
		1330	* degrading latency on load.
		1331	*/
		1332	static unsigned long
		1333	adaptive_gran(struct sched_entity curr, struct sched_entity se)
		1334	{
		1335	u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
		1336	u64 expected_wakeup = 2se->avg_wakeup cfs_rq_of(se)->nr_running;
		1337	u64 gran = 0;
		1338
		1339	if (this_run < expected_wakeup)
		1340	gran = expected_wakeup - this_run;
		1341
		1342	return min_t(s64, gran, sysctl_sched_wakeup_granularity);
		1343	}
		1344
		1345	static unsigned long
		1346	wakeup_gran(struct sched_entity curr, struct sched_entity se)
1313	{	1347	{
1314	unsigned long gran = sysctl_sched_wakeup_granularity;	1348	unsigned long gran = sysctl_sched_wakeup_granularity;
1315		1349
		1350	if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
		1351	gran = adaptive_gran(curr, se);
		1352
1316	/*	1353	/*
1317	* More easily preempt - nice tasks, while not making it harder for	1354	* Since its curr running now, convert the gran from real-time
1318	* + nice tasks.	1355	* to virtual-time in his units.
1319	*/	1356	*/
1320	if (!sched_feat(ASYM_GRAN) \|\| se->load.weight > NICE_0_LOAD)	1357	if (sched_feat(ASYM_GRAN)) {
1321	gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);	1358	/*
		1359	* By using 'se' instead of 'curr' we penalize light tasks, so
		1360	* they get preempted easier. That is, if 'se' < 'curr' then
		1361	* the resulting gran will be larger, therefore penalizing the
		1362	* lighter, if otoh 'se' > 'curr' then the resulting gran will
		1363	* be smaller, again penalizing the lighter task.
		1364	*
		1365	* This is especially important for buddies when the leftmost
		1366	* task is higher priority than the buddy.
		1367	*/
		1368	if (unlikely(se->load.weight != NICE_0_LOAD))
		1369	gran = calc_delta_fair(gran, se);
		1370	} else {
		1371	if (unlikely(curr->load.weight != NICE_0_LOAD))
		1372	gran = calc_delta_fair(gran, curr);
		1373	}
1322		1374
1323	return gran;	1375	return gran;
1324	}	1376	}
@@ -1345,7 +1397,7 @@ wakeup_preempt_entity(struct sched_entity curr, struct sched_entity se)
1345	if (vdiff <= 0)	1397	if (vdiff <= 0)
1346	return -1;	1398	return -1;
1347		1399
1348	gran = wakeup_gran(curr);	1400	gran = wakeup_gran(curr, se);
1349	if (vdiff > gran)	1401	if (vdiff > gran)
1350	return 1;	1402	return 1;
1351		1403
@@ -1426,7 +1478,9 @@ static void check_preempt_wakeup(struct rq rq, struct task_struct p, int sync)
1426	if (!sched_feat(WAKEUP_PREEMPT))	1478	if (!sched_feat(WAKEUP_PREEMPT))
1427	return;	1479	return;
1428		1480
1429	if (sched_feat(WAKEUP_OVERLAP) && sync) {	1481	if (sched_feat(WAKEUP_OVERLAP) && (sync \|\|
		1482	(se->avg_overlap < sysctl_sched_migration_cost &&
		1483	pse->avg_overlap < sysctl_sched_migration_cost))) {
1430	resched_task(curr);	1484	resched_task(curr);
1431	return;	1485	return;
1432	}	1486	}