aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Galbraith <efault@gmx.de>2010-03-11 11:15:51 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-11 12:32:50 -0500
commite12f31d3e5d36328c7fbd0fce40a95e70b59152c (patch)
tree3eaee7fede5ba830395d2e527fdfe60f1aba73f4
parentb42e0c41a422a212ddea0666d5a3a0e3c35206db (diff)
sched: Remove avg_overlap
Both avg_overlap and avg_wakeup had an inherent problem in that their accuracy was detrimentally affected by cross-cpu wakeups, this because we are missing the necessary call to update_curr(). This can't be fixed without increasing overhead in our already too fat fastpath. Additionally, with recent load balancing changes making us prefer to place tasks in an idle cache domain (which is good for compute bound loads), communicating tasks suffer when a sync wakeup, which would enable affine placement, is turned into a non-sync wakeup by SYNC_LESS. With one task on the runqueue, wake_affine() rejects the affine wakeup request, leaving the unfortunate where placed, taking frequent cache misses. Remove it, and recover some fastpath cycles. Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <1268301121.6785.30.camel@marge.simson.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/sched.c33
-rw-r--r--kernel/sched_debug.c1
-rw-r--r--kernel/sched_fair.c18
-rw-r--r--kernel/sched_features.h16
5 files changed, 0 insertions, 71 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70c560f5ada0..8604884cee87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1180,9 +1180,6 @@ struct sched_entity {
1180 u64 vruntime; 1180 u64 vruntime;
1181 u64 prev_sum_exec_runtime; 1181 u64 prev_sum_exec_runtime;
1182 1182
1183 u64 last_wakeup;
1184 u64 avg_overlap;
1185
1186 u64 nr_migrations; 1183 u64 nr_migrations;
1187 1184
1188#ifdef CONFIG_SCHEDSTATS 1185#ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched.c b/kernel/sched.c
index 35a8626ace7d..68ed6f4f3c13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1887,11 +1887,6 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
1887 1887
1888static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) 1888static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
1889{ 1889{
1890 if (sleep && p->se.last_wakeup) {
1891 update_avg(&p->se.avg_overlap,
1892 p->se.sum_exec_runtime - p->se.last_wakeup);
1893 p->se.last_wakeup = 0;
1894 }
1895 sched_info_dequeued(p); 1890 sched_info_dequeued(p);
1896 p->sched_class->dequeue_task(rq, p, sleep); 1891 p->sched_class->dequeue_task(rq, p, sleep);
1897 p->se.on_rq = 0; 1892 p->se.on_rq = 0;
@@ -2452,15 +2447,6 @@ out_activate:
2452 activate_task(rq, p, 1); 2447 activate_task(rq, p, 1);
2453 success = 1; 2448 success = 1;
2454 2449
2455 /*
2456 * Only attribute actual wakeups done by this task.
2457 */
2458 if (!in_interrupt()) {
2459 struct sched_entity *se = &current->se;
2460
2461 se->last_wakeup = se->sum_exec_runtime;
2462 }
2463
2464out_running: 2450out_running:
2465 trace_sched_wakeup(rq, p, success); 2451 trace_sched_wakeup(rq, p, success);
2466 check_preempt_curr(rq, p, wake_flags); 2452 check_preempt_curr(rq, p, wake_flags);
@@ -2522,8 +2508,6 @@ static void __sched_fork(struct task_struct *p)
2522 p->se.sum_exec_runtime = 0; 2508 p->se.sum_exec_runtime = 0;
2523 p->se.prev_sum_exec_runtime = 0; 2509 p->se.prev_sum_exec_runtime = 0;
2524 p->se.nr_migrations = 0; 2510 p->se.nr_migrations = 0;
2525 p->se.last_wakeup = 0;
2526 p->se.avg_overlap = 0;
2527 2511
2528#ifdef CONFIG_SCHEDSTATS 2512#ifdef CONFIG_SCHEDSTATS
2529 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 2513 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3594,23 +3578,6 @@ static inline void schedule_debug(struct task_struct *prev)
3594 3578
3595static void put_prev_task(struct rq *rq, struct task_struct *prev) 3579static void put_prev_task(struct rq *rq, struct task_struct *prev)
3596{ 3580{
3597 if (prev->state == TASK_RUNNING) {
3598 u64 runtime = prev->se.sum_exec_runtime;
3599
3600 runtime -= prev->se.prev_sum_exec_runtime;
3601 runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
3602
3603 /*
3604 * In order to avoid avg_overlap growing stale when we are
3605 * indeed overlapping and hence not getting put to sleep, grow
3606 * the avg_overlap on preemption.
3607 *
3608 * We use the average preemption runtime because that
3609 * correlates to the amount of cache footprint a task can
3610 * build up.
3611 */
3612 update_avg(&prev->se.avg_overlap, runtime);
3613 }
3614 prev->sched_class->put_prev_task(rq, prev); 3581 prev->sched_class->put_prev_task(rq, prev);
3615} 3582}
3616 3583
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 20b95a420fec..8a46a719f367 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -407,7 +407,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
407 PN(se.exec_start); 407 PN(se.exec_start);
408 PN(se.vruntime); 408 PN(se.vruntime);
409 PN(se.sum_exec_runtime); 409 PN(se.sum_exec_runtime);
410 PN(se.avg_overlap);
411 410
412 nr_switches = p->nvcsw + p->nivcsw; 411 nr_switches = p->nvcsw + p->nivcsw;
413 412
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6fc62854422c..c3b69d4b5d65 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1241,7 +1241,6 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
1241 1241
1242static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync) 1242static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1243{ 1243{
1244 struct task_struct *curr = current;
1245 unsigned long this_load, load; 1244 unsigned long this_load, load;
1246 int idx, this_cpu, prev_cpu; 1245 int idx, this_cpu, prev_cpu;
1247 unsigned long tl_per_task; 1246 unsigned long tl_per_task;
@@ -1256,18 +1255,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
1256 load = source_load(prev_cpu, idx); 1255 load = source_load(prev_cpu, idx);
1257 this_load = target_load(this_cpu, idx); 1256 this_load = target_load(this_cpu, idx);
1258 1257
1259 if (sync) {
1260 if (sched_feat(SYNC_LESS) &&
1261 (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1262 p->se.avg_overlap > sysctl_sched_migration_cost))
1263 sync = 0;
1264 } else {
1265 if (sched_feat(SYNC_MORE) &&
1266 (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1267 p->se.avg_overlap < sysctl_sched_migration_cost))
1268 sync = 1;
1269 }
1270
1271 /* 1258 /*
1272 * If sync wakeup then subtract the (maximum possible) 1259 * If sync wakeup then subtract the (maximum possible)
1273 * effect of the currently running task from the load 1260 * effect of the currently running task from the load
@@ -1711,11 +1698,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1711 if (sched_feat(WAKEUP_SYNC) && sync) 1698 if (sched_feat(WAKEUP_SYNC) && sync)
1712 goto preempt; 1699 goto preempt;
1713 1700
1714 if (sched_feat(WAKEUP_OVERLAP) &&
1715 se->avg_overlap < sysctl_sched_migration_cost &&
1716 pse->avg_overlap < sysctl_sched_migration_cost)
1717 goto preempt;
1718
1719 if (!sched_feat(WAKEUP_PREEMPT)) 1701 if (!sched_feat(WAKEUP_PREEMPT))
1720 return; 1702 return;
1721 1703
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 96ef5dbc66e1..c545e048dfed 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -42,12 +42,6 @@ SCHED_FEAT(ASYM_GRAN, 1)
42SCHED_FEAT(WAKEUP_SYNC, 0) 42SCHED_FEAT(WAKEUP_SYNC, 0)
43 43
44/* 44/*
45 * Wakeup preempt based on task behaviour. Tasks that do not overlap
46 * don't get preempted.
47 */
48SCHED_FEAT(WAKEUP_OVERLAP, 0)
49
50/*
51 * Use the SYNC wakeup hint, pipes and the likes use this to indicate 45 * Use the SYNC wakeup hint, pipes and the likes use this to indicate
52 * the remote end is likely to consume the data we just wrote, and 46 * the remote end is likely to consume the data we just wrote, and
53 * therefore has cache benefit from being placed on the same cpu, see 47 * therefore has cache benefit from being placed on the same cpu, see
@@ -64,16 +58,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
64SCHED_FEAT(AFFINE_WAKEUPS, 1) 58SCHED_FEAT(AFFINE_WAKEUPS, 1)
65 59
66/* 60/*
67 * Weaken SYNC hint based on overlap
68 */
69SCHED_FEAT(SYNC_LESS, 1)
70
71/*
72 * Add SYNC hint based on overlap
73 */
74SCHED_FEAT(SYNC_MORE, 0)
75
76/*
77 * Prefer to schedule the task we woke last (assuming it failed 61 * Prefer to schedule the task we woke last (assuming it failed
78 * wakeup-preemption), since its likely going to consume data we 62 * wakeup-preemption), since its likely going to consume data we
79 * touched, increases cache locality. 63 * touched, increases cache locality.