aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-03-18 20:42:00 -0400
committerIngo Molnar <mingo@elte.hu>2008-03-18 23:27:53 -0400
commit4ae7d5cefd4aa3560e359a3b0f03e12adc8b5c86 (patch)
treec7517529ec199d3d9936b4049880a820534d2703
parent6f3d09291b4982991680b61763b2541e53e2a95f (diff)
sched: improve affine wakeups
improve affine wakeups. Maintain the 'overlap' metric based on CFS's sum_exec_runtime - which means the amount of time a task executes after it wakes up some other task. Use the 'overlap' for the wakeup decisions: if the 'overlap' is short, it means there's strong workload coupling between this task and the woken up task. If the 'overlap' is large then the workload is decoupled and the scheduler will move them to separate CPUs more easily. ( Also slightly move the preempt_check within try_to_wake_up() - this has no effect on functionality but allows 'early wakeups' (for still-on-rq tasks) to be correctly accounted as well.) Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h3
-rw-r--r--kernel/sched.c5
-rw-r--r--kernel/sched_debug.c1
-rw-r--r--kernel/sched_fair.c58
4 files changed, 50 insertions, 17 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11d8e9a74eff..3625fcaf5d0f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -929,6 +929,9 @@ struct sched_entity {
929 u64 vruntime; 929 u64 vruntime;
930 u64 prev_sum_exec_runtime; 930 u64 prev_sum_exec_runtime;
931 931
932 u64 last_wakeup;
933 u64 avg_overlap;
934
932#ifdef CONFIG_SCHEDSTATS 935#ifdef CONFIG_SCHEDSTATS
933 u64 wait_start; 936 u64 wait_start;
934 u64 wait_max; 937 u64 wait_max;
diff --git a/kernel/sched.c b/kernel/sched.c
index d1ad69b270ca..adbd475cfd25 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1855,10 +1855,11 @@ out_activate:
1855 schedstat_inc(p, se.nr_wakeups_remote); 1855 schedstat_inc(p, se.nr_wakeups_remote);
1856 update_rq_clock(rq); 1856 update_rq_clock(rq);
1857 activate_task(rq, p, 1); 1857 activate_task(rq, p, 1);
1858 check_preempt_curr(rq, p);
1859 success = 1; 1858 success = 1;
1860 1859
1861out_running: 1860out_running:
1861 check_preempt_curr(rq, p);
1862
1862 p->state = TASK_RUNNING; 1863 p->state = TASK_RUNNING;
1863#ifdef CONFIG_SMP 1864#ifdef CONFIG_SMP
1864 if (p->sched_class->task_wake_up) 1865 if (p->sched_class->task_wake_up)
@@ -1892,6 +1893,8 @@ static void __sched_fork(struct task_struct *p)
1892 p->se.exec_start = 0; 1893 p->se.exec_start = 0;
1893 p->se.sum_exec_runtime = 0; 1894 p->se.sum_exec_runtime = 0;
1894 p->se.prev_sum_exec_runtime = 0; 1895 p->se.prev_sum_exec_runtime = 0;
1896 p->se.last_wakeup = 0;
1897 p->se.avg_overlap = 0;
1895 1898
1896#ifdef CONFIG_SCHEDSTATS 1899#ifdef CONFIG_SCHEDSTATS
1897 p->se.wait_start = 0; 1900 p->se.wait_start = 0;
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 4b5e24cf2f4a..ef358ba07683 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -288,6 +288,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
288 PN(se.exec_start); 288 PN(se.exec_start);
289 PN(se.vruntime); 289 PN(se.vruntime);
290 PN(se.sum_exec_runtime); 290 PN(se.sum_exec_runtime);
291 PN(se.avg_overlap);
291 292
292 nr_switches = p->nvcsw + p->nivcsw; 293 nr_switches = p->nvcsw + p->nivcsw;
293 294
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index b5a357396b49..87c9d3a2aafa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -556,6 +556,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
556 account_entity_enqueue(cfs_rq, se); 556 account_entity_enqueue(cfs_rq, se);
557} 557}
558 558
559static void update_avg(u64 *avg, u64 sample)
560{
561 s64 diff = sample - *avg;
562 *avg += diff >> 3;
563}
564
565static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
566{
567 if (!se->last_wakeup)
568 return;
569
570 update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
571 se->last_wakeup = 0;
572}
573
559static void 574static void
560dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 575dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
561{ 576{
@@ -566,6 +581,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
566 581
567 update_stats_dequeue(cfs_rq, se); 582 update_stats_dequeue(cfs_rq, se);
568 if (sleep) { 583 if (sleep) {
584 update_avg_stats(cfs_rq, se);
569#ifdef CONFIG_SCHEDSTATS 585#ifdef CONFIG_SCHEDSTATS
570 if (entity_is_task(se)) { 586 if (entity_is_task(se)) {
571 struct task_struct *tsk = task_of(se); 587 struct task_struct *tsk = task_of(se);
@@ -981,12 +997,15 @@ static inline int wake_idle(int cpu, struct task_struct *p)
981 997
982#ifdef CONFIG_SMP 998#ifdef CONFIG_SMP
983 999
1000static const struct sched_class fair_sched_class;
1001
984static int 1002static int
985wake_affine(struct rq *rq, struct sched_domain *this_sd, struct task_struct *p, 1003wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq,
986 int prev_cpu, int this_cpu, int sync, int idx, 1004 struct task_struct *p, int prev_cpu, int this_cpu, int sync,
987 unsigned long load, unsigned long this_load, 1005 int idx, unsigned long load, unsigned long this_load,
988 unsigned int imbalance) 1006 unsigned int imbalance)
989{ 1007{
1008 struct task_struct *curr = this_rq->curr;
990 unsigned long tl = this_load; 1009 unsigned long tl = this_load;
991 unsigned long tl_per_task; 1010 unsigned long tl_per_task;
992 1011
@@ -994,10 +1013,15 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct task_struct *p,
994 return 0; 1013 return 0;
995 1014
996 /* 1015 /*
997 * Attract cache-cold tasks on sync wakeups: 1016 * If the currently running task will sleep within
1017 * a reasonable amount of time then attract this newly
1018 * woken task:
998 */ 1019 */
999 if (sync && !task_hot(p, rq->clock, this_sd)) 1020 if (sync && curr->sched_class == &fair_sched_class) {
1000 return 1; 1021 if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
1022 p->se.avg_overlap < sysctl_sched_migration_cost)
1023 return 1;
1024 }
1001 1025
1002 schedstat_inc(p, se.nr_wakeups_affine_attempts); 1026 schedstat_inc(p, se.nr_wakeups_affine_attempts);
1003 tl_per_task = cpu_avg_load_per_task(this_cpu); 1027 tl_per_task = cpu_avg_load_per_task(this_cpu);
@@ -1030,18 +1054,16 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1030 struct sched_domain *sd, *this_sd = NULL; 1054 struct sched_domain *sd, *this_sd = NULL;
1031 int prev_cpu, this_cpu, new_cpu; 1055 int prev_cpu, this_cpu, new_cpu;
1032 unsigned long load, this_load; 1056 unsigned long load, this_load;
1057 struct rq *rq, *this_rq;
1033 unsigned int imbalance; 1058 unsigned int imbalance;
1034 struct rq *rq;
1035 int idx; 1059 int idx;
1036 1060
1037 prev_cpu = task_cpu(p); 1061 prev_cpu = task_cpu(p);
1038 rq = task_rq(p); 1062 rq = task_rq(p);
1039 this_cpu = smp_processor_id(); 1063 this_cpu = smp_processor_id();
1064 this_rq = cpu_rq(this_cpu);
1040 new_cpu = prev_cpu; 1065 new_cpu = prev_cpu;
1041 1066
1042 if (prev_cpu == this_cpu)
1043 goto out;
1044
1045 /* 1067 /*
1046 * 'this_sd' is the first domain that both 1068 * 'this_sd' is the first domain that both
1047 * this_cpu and prev_cpu are present in: 1069 * this_cpu and prev_cpu are present in:
@@ -1069,11 +1091,12 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1069 load = source_load(prev_cpu, idx); 1091 load = source_load(prev_cpu, idx);
1070 this_load = target_load(this_cpu, idx); 1092 this_load = target_load(this_cpu, idx);
1071 1093
1072 if (wake_affine(rq, this_sd, p, prev_cpu, this_cpu, sync, idx, 1094 if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx,
1073 load, this_load, imbalance)) { 1095 load, this_load, imbalance))
1074 new_cpu = this_cpu; 1096 return this_cpu;
1097
1098 if (prev_cpu == this_cpu)
1075 goto out; 1099 goto out;
1076 }
1077 1100
1078 /* 1101 /*
1079 * Start passive balancing when half the imbalance_pct 1102 * Start passive balancing when half the imbalance_pct
@@ -1083,8 +1106,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync)
1083 if (imbalance*this_load <= 100*load) { 1106 if (imbalance*this_load <= 100*load) {
1084 schedstat_inc(this_sd, ttwu_move_balance); 1107 schedstat_inc(this_sd, ttwu_move_balance);
1085 schedstat_inc(p, se.nr_wakeups_passive); 1108 schedstat_inc(p, se.nr_wakeups_passive);
1086 new_cpu = this_cpu; 1109 return this_cpu;
1087 goto out;
1088 } 1110 }
1089 } 1111 }
1090 1112
@@ -1111,6 +1133,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1111 return; 1133 return;
1112 } 1134 }
1113 1135
1136 se->last_wakeup = se->sum_exec_runtime;
1137 if (unlikely(se == pse))
1138 return;
1139
1114 cfs_rq_of(pse)->next = pse; 1140 cfs_rq_of(pse)->next = pse;
1115 1141
1116 /* 1142 /*