sched: Remove avg_overlap

Both avg_overlap and avg_wakeup had an inherent problem in that their accuracy was detrimentally affected by cross-cpu wakeups, this because we are missing the necessary call to update_curr(). This can't be fixed without increasing overhead in our already too fat fastpath. Additionally, with recent load balancing changes making us prefer to place tasks in an idle cache domain (which is good for compute bound loads), communicating tasks suffer when a sync wakeup, which would enable affine placement, is turned into a non-sync wakeup by SYNC_LESS. With one task on the runqueue, wake_affine() rejects the affine wakeup request, leaving the unfortunate where placed, taking frequent cache misses. Remove it, and recover some fastpath cycles. Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1268301121.6785.30.camel@marge.simson.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Mike Galbraith <efault@gmx.de> 2010-03-11 11:15:51 -0500
committer: Ingo Molnar <mingo@elte.hu> 2010-03-11 12:32:50 -0500
commit: e12f31d3e5d36328c7fbd0fce40a95e70b59152c (patch)
tree: 3eaee7fede5ba830395d2e527fdfe60f1aba73f4
parent: b42e0c41a422a212ddea0666d5a3a0e3c35206db (diff)
5 files changed, 0 insertions, 71 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70c560f5ada0..8604884cee87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1180,9 +1180,6 @@ struct sched_entity {
        u64                     vruntime;
        u64                     prev_sum_exec_runtime;
-        u64                     last_wakeup;
-        u64                     avg_overlap;
        u64                     nr_migrations;
 #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched.c b/kernel/sched.c
index 35a8626ace7d..68ed6f4f3c13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1887,11 +1887,6 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-        if (sleep && p->se.last_wakeup) {
-                update_avg(&p->se.avg_overlap,
-                        p->se.sum_exec_runtime - p->se.last_wakeup);
-                p->se.last_wakeup = 0;
-        }
        sched_info_dequeued(p);
        p->sched_class->dequeue_task(rq, p, sleep);
        p->se.on_rq = 0;
@@ -2452,15 +2447,6 @@ out_activate:
        activate_task(rq, p, 1);
        success = 1;
-        /*
-         * Only attribute actual wakeups done by this task.
-         */
-        if (!in_interrupt()) {
-                struct sched_entity *se = &current->se;
-                se->last_wakeup = se->sum_exec_runtime;
-        }
 out_running:
        trace_sched_wakeup(rq, p, success);
        check_preempt_curr(rq, p, wake_flags);
@@ -2522,8 +2508,6 @@ static void __sched_fork(struct task_struct *p)
        p->se.sum_exec_runtime          = 0;
        p->se.prev_sum_exec_runtime     = 0;
        p->se.nr_migrations             = 0;
-        p->se.last_wakeup               = 0;
-        p->se.avg_overlap               = 0;
 #ifdef CONFIG_SCHEDSTATS
        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3594,23 +3578,6 @@ static inline void schedule_debug(struct task_struct *prev)
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-        if (prev->state == TASK_RUNNING) {
-                u64 runtime = prev->se.sum_exec_runtime;
-                runtime -= prev->se.prev_sum_exec_runtime;
-                runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
-                /*
-                 * In order to avoid avg_overlap growing stale when we are
-                 * indeed overlapping and hence not getting put to sleep, grow
-                 * the avg_overlap on preemption.
-                 *
-                 * We use the average preemption runtime because that
-                 * correlates to the amount of cache footprint a task can
-                 * build up.
-                 */
-                update_avg(&prev->se.avg_overlap, runtime);
-        }
        prev->sched_class->put_prev_task(rq, prev);
 }
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 20b95a420fec..8a46a719f367 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -407,7 +407,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
        PN(se.exec_start);
        PN(se.vruntime);
        PN(se.sum_exec_runtime);
-        PN(se.avg_overlap);
        nr_switches = p->nvcsw + p->nivcsw;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6fc62854422c..c3b69d4b5d65 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1241,7 +1241,6 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
-        struct task_struct *curr = current;
        unsigned long this_load, load;
        int idx, this_cpu, prev_cpu;
        unsigned long tl_per_task;
@@ -1256,18 +1255,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
        load      = source_load(prev_cpu, idx);
        this_load = target_load(this_cpu, idx);
-        if (sync) {
-               if (sched_feat(SYNC_LESS) &&
-                   (curr->se.avg_overlap > sysctl_sched_migration_cost ||
-                    p->se.avg_overlap > sysctl_sched_migration_cost))
-                       sync = 0;
-        } else {
-                if (sched_feat(SYNC_MORE) &&
-                    (curr->se.avg_overlap < sysctl_sched_migration_cost &&
-                     p->se.avg_overlap < sysctl_sched_migration_cost))
-                        sync = 1;
-        }
        /*
         * If sync wakeup then subtract the (maximum possible)
         * effect of the currently running task from the load
@@ -1711,11 +1698,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
        if (sched_feat(WAKEUP_SYNC) && sync)
                goto preempt;
-        if (sched_feat(WAKEUP_OVERLAP) &&
-                        se->avg_overlap < sysctl_sched_migration_cost &&
-                        pse->avg_overlap < sysctl_sched_migration_cost)
-                goto preempt;
        if (!sched_feat(WAKEUP_PREEMPT))
                return;
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 96ef5dbc66e1..c545e048dfed 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -42,12 +42,6 @@ SCHED_FEAT(ASYM_GRAN, 1)
 SCHED_FEAT(WAKEUP_SYNC, 0)
 /*
- * Wakeup preempt based on task behaviour. Tasks that do not overlap
- * don't get preempted.
- */
-SCHED_FEAT(WAKEUP_OVERLAP, 0)
-/*
 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
 * the remote end is likely to consume the data we just wrote, and
 * therefore has cache benefit from being placed on the same cpu, see
@@ -64,16 +58,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
 SCHED_FEAT(AFFINE_WAKEUPS, 1)
 /*
- * Weaken SYNC hint based on overlap
- */
-SCHED_FEAT(SYNC_LESS, 1)
-/*
- * Add SYNC hint based on overlap
- */
-SCHED_FEAT(SYNC_MORE, 0)
-/*
 * Prefer to schedule the task we woke last (assuming it failed
 * wakeup-preemption), since its likely going to consume data we
 * touched, increases cache locality.
author	Mike Galbraith <efault@gmx.de>	2010-03-11 11:15:51 -0500
committer	Ingo Molnar <mingo@elte.hu>	2010-03-11 12:32:50 -0500
commit	e12f31d3e5d36328c7fbd0fce40a95e70b59152c (patch)
tree	3eaee7fede5ba830395d2e527fdfe60f1aba73f4
parent	b42e0c41a422a212ddea0666d5a3a0e3c35206db (diff)

diff --git a/include/linux/sched.h b/include/linux/sched.h index 70c560f5ada0..8604884cee87 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -1180,9 +1180,6 @@ struct sched_entity {
1180	u64 vruntime;	1180	u64 vruntime;
1181	u64 prev_sum_exec_runtime;	1181	u64 prev_sum_exec_runtime;
1182		1182
1183	u64 last_wakeup;
1184	u64 avg_overlap;
1185
1186	u64 nr_migrations;	1183	u64 nr_migrations;
1187		1184
1188	#ifdef CONFIG_SCHEDSTATS	1185	#ifdef CONFIG_SCHEDSTATS


diff --git a/kernel/sched.c b/kernel/sched.c index 35a8626ace7d..68ed6f4f3c13 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -1887,11 +1887,6 @@ enqueue_task(struct rq rq, struct task_struct p, int wakeup, bool head)
1887		1887
1888	static void dequeue_task(struct rq rq, struct task_struct p, int sleep)	1888	static void dequeue_task(struct rq rq, struct task_struct p, int sleep)
1889	{	1889	{
1890	if (sleep && p->se.last_wakeup) {
1891	update_avg(&p->se.avg_overlap,
1892	p->se.sum_exec_runtime - p->se.last_wakeup);
1893	p->se.last_wakeup = 0;
1894	}
1895	sched_info_dequeued(p);	1890	sched_info_dequeued(p);
1896	p->sched_class->dequeue_task(rq, p, sleep);	1891	p->sched_class->dequeue_task(rq, p, sleep);
1897	p->se.on_rq = 0;	1892	p->se.on_rq = 0;
@@ -2452,15 +2447,6 @@ out_activate:
2452	activate_task(rq, p, 1);	2447	activate_task(rq, p, 1);
2453	success = 1;	2448	success = 1;
2454		2449
2455	/*
2456	* Only attribute actual wakeups done by this task.
2457	*/
2458	if (!in_interrupt()) {
2459	struct sched_entity *se = &current->se;
2460
2461	se->last_wakeup = se->sum_exec_runtime;
2462	}
2463
2464	out_running:	2450	out_running:
2465	trace_sched_wakeup(rq, p, success);	2451	trace_sched_wakeup(rq, p, success);
2466	check_preempt_curr(rq, p, wake_flags);	2452	check_preempt_curr(rq, p, wake_flags);
@@ -2522,8 +2508,6 @@ static void __sched_fork(struct task_struct *p)
2522	p->se.sum_exec_runtime = 0;	2508	p->se.sum_exec_runtime = 0;
2523	p->se.prev_sum_exec_runtime = 0;	2509	p->se.prev_sum_exec_runtime = 0;
2524	p->se.nr_migrations = 0;	2510	p->se.nr_migrations = 0;
2525	p->se.last_wakeup = 0;
2526	p->se.avg_overlap = 0;
2527		2511
2528	#ifdef CONFIG_SCHEDSTATS	2512	#ifdef CONFIG_SCHEDSTATS
2529	memset(&p->se.statistics, 0, sizeof(p->se.statistics));	2513	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3594,23 +3578,6 @@ static inline void schedule_debug(struct task_struct *prev)
3594		3578
3595	static void put_prev_task(struct rq rq, struct task_struct prev)	3579	static void put_prev_task(struct rq rq, struct task_struct prev)
3596	{	3580	{
3597	if (prev->state == TASK_RUNNING) {
3598	u64 runtime = prev->se.sum_exec_runtime;
3599
3600	runtime -= prev->se.prev_sum_exec_runtime;
3601	runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
3602
3603	/*
3604	* In order to avoid avg_overlap growing stale when we are
3605	* indeed overlapping and hence not getting put to sleep, grow
3606	* the avg_overlap on preemption.
3607	*
3608	* We use the average preemption runtime because that
3609	* correlates to the amount of cache footprint a task can
3610	* build up.
3611	*/
3612	update_avg(&prev->se.avg_overlap, runtime);
3613	}
3614	prev->sched_class->put_prev_task(rq, prev);	3581	prev->sched_class->put_prev_task(rq, prev);
3615	}	3582	}
3616		3583


diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 20b95a420fec..8a46a719f367 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c
@@ -407,7 +407,6 @@ void proc_sched_show_task(struct task_struct p, struct seq_file m)
407	PN(se.exec_start);	407	PN(se.exec_start);
408	PN(se.vruntime);	408	PN(se.vruntime);
409	PN(se.sum_exec_runtime);	409	PN(se.sum_exec_runtime);
410	PN(se.avg_overlap);
411		410
412	nr_switches = p->nvcsw + p->nivcsw;	411	nr_switches = p->nvcsw + p->nivcsw;
413		412


diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6fc62854422c..c3b69d4b5d65 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -1241,7 +1241,6 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1241		1241
1242	static int wake_affine(struct sched_domain sd, struct task_struct p, int sync)	1242	static int wake_affine(struct sched_domain sd, struct task_struct p, int sync)
1243	{	1243	{
1244	struct task_struct *curr = current;
1245	unsigned long this_load, load;	1244	unsigned long this_load, load;
1246	int idx, this_cpu, prev_cpu;	1245	int idx, this_cpu, prev_cpu;
1247	unsigned long tl_per_task;	1246	unsigned long tl_per_task;
@@ -1256,18 +1255,6 @@ static int wake_affine(struct sched_domain sd, struct task_struct p, int sync)
1256	load = source_load(prev_cpu, idx);	1255	load = source_load(prev_cpu, idx);
1257	this_load = target_load(this_cpu, idx);	1256	this_load = target_load(this_cpu, idx);
1258		1257
1259	if (sync) {
1260	if (sched_feat(SYNC_LESS) &&
1261	(curr->se.avg_overlap > sysctl_sched_migration_cost \|\|
1262	p->se.avg_overlap > sysctl_sched_migration_cost))
1263	sync = 0;
1264	} else {
1265	if (sched_feat(SYNC_MORE) &&
1266	(curr->se.avg_overlap < sysctl_sched_migration_cost &&
1267	p->se.avg_overlap < sysctl_sched_migration_cost))
1268	sync = 1;
1269	}
1270
1271	/*	1258	/*
1272	* If sync wakeup then subtract the (maximum possible)	1259	* If sync wakeup then subtract the (maximum possible)
1273	* effect of the currently running task from the load	1260	* effect of the currently running task from the load
@@ -1711,11 +1698,6 @@ static void check_preempt_wakeup(struct rq rq, struct task_struct p, int wake_
1711	if (sched_feat(WAKEUP_SYNC) && sync)	1698	if (sched_feat(WAKEUP_SYNC) && sync)
1712	goto preempt;	1699	goto preempt;
1713		1700
1714	if (sched_feat(WAKEUP_OVERLAP) &&
1715	se->avg_overlap < sysctl_sched_migration_cost &&
1716	pse->avg_overlap < sysctl_sched_migration_cost)
1717	goto preempt;
1718
1719	if (!sched_feat(WAKEUP_PREEMPT))	1701	if (!sched_feat(WAKEUP_PREEMPT))
1720	return;	1702	return;
1721		1703


diff --git a/kernel/sched_features.h b/kernel/sched_features.h index 96ef5dbc66e1..c545e048dfed 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h
@@ -42,12 +42,6 @@ SCHED_FEAT(ASYM_GRAN, 1)
42	SCHED_FEAT(WAKEUP_SYNC, 0)	42	SCHED_FEAT(WAKEUP_SYNC, 0)
43		43
44	/*	44	/*
45	* Wakeup preempt based on task behaviour. Tasks that do not overlap
46	* don't get preempted.
47	*/
48	SCHED_FEAT(WAKEUP_OVERLAP, 0)
49
50	/*
51	* Use the SYNC wakeup hint, pipes and the likes use this to indicate	45	* Use the SYNC wakeup hint, pipes and the likes use this to indicate
52	* the remote end is likely to consume the data we just wrote, and	46	* the remote end is likely to consume the data we just wrote, and
53	* therefore has cache benefit from being placed on the same cpu, see	47	* therefore has cache benefit from being placed on the same cpu, see
@@ -64,16 +58,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
64	SCHED_FEAT(AFFINE_WAKEUPS, 1)	58	SCHED_FEAT(AFFINE_WAKEUPS, 1)
65		59
66	/*	60	/*
67	* Weaken SYNC hint based on overlap
68	*/
69	SCHED_FEAT(SYNC_LESS, 1)
70
71	/*
72	* Add SYNC hint based on overlap
73	*/
74	SCHED_FEAT(SYNC_MORE, 0)
75
76	/*
77	* Prefer to schedule the task we woke last (assuming it failed	61	* Prefer to schedule the task we woke last (assuming it failed
78	* wakeup-preemption), since its likely going to consume data we	62	* wakeup-preemption), since its likely going to consume data we
79	* touched, increases cache locality.	63	* touched, increases cache locality.