diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-03-18 20:42:00 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-03-18 23:27:53 -0400 |
commit | 4ae7d5cefd4aa3560e359a3b0f03e12adc8b5c86 (patch) | |
tree | c7517529ec199d3d9936b4049880a820534d2703 | |
parent | 6f3d09291b4982991680b61763b2541e53e2a95f (diff) |
sched: improve affine wakeups
improve affine wakeups. Maintain the 'overlap' metric based on CFS's
sum_exec_runtime - which means the amount of time a task executes
after it wakes up some other task.
Use the 'overlap' for the wakeup decisions: if the 'overlap' is short,
it means there's strong workload coupling between this task and the
woken up task. If the 'overlap' is large then the workload is decoupled
and the scheduler will move them to separate CPUs more easily.
( Also slightly move the preempt_check within try_to_wake_up() - this has
no effect on functionality but allows 'early wakeups' (for still-on-rq
tasks) to be correctly accounted as well.)
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 3 | ||||
-rw-r--r-- | kernel/sched.c | 5 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 58 |
4 files changed, 50 insertions, 17 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 11d8e9a74eff..3625fcaf5d0f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -929,6 +929,9 @@ struct sched_entity { | |||
929 | u64 vruntime; | 929 | u64 vruntime; |
930 | u64 prev_sum_exec_runtime; | 930 | u64 prev_sum_exec_runtime; |
931 | 931 | ||
932 | u64 last_wakeup; | ||
933 | u64 avg_overlap; | ||
934 | |||
932 | #ifdef CONFIG_SCHEDSTATS | 935 | #ifdef CONFIG_SCHEDSTATS |
933 | u64 wait_start; | 936 | u64 wait_start; |
934 | u64 wait_max; | 937 | u64 wait_max; |
diff --git a/kernel/sched.c b/kernel/sched.c index d1ad69b270ca..adbd475cfd25 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1855,10 +1855,11 @@ out_activate: | |||
1855 | schedstat_inc(p, se.nr_wakeups_remote); | 1855 | schedstat_inc(p, se.nr_wakeups_remote); |
1856 | update_rq_clock(rq); | 1856 | update_rq_clock(rq); |
1857 | activate_task(rq, p, 1); | 1857 | activate_task(rq, p, 1); |
1858 | check_preempt_curr(rq, p); | ||
1859 | success = 1; | 1858 | success = 1; |
1860 | 1859 | ||
1861 | out_running: | 1860 | out_running: |
1861 | check_preempt_curr(rq, p); | ||
1862 | |||
1862 | p->state = TASK_RUNNING; | 1863 | p->state = TASK_RUNNING; |
1863 | #ifdef CONFIG_SMP | 1864 | #ifdef CONFIG_SMP |
1864 | if (p->sched_class->task_wake_up) | 1865 | if (p->sched_class->task_wake_up) |
@@ -1892,6 +1893,8 @@ static void __sched_fork(struct task_struct *p) | |||
1892 | p->se.exec_start = 0; | 1893 | p->se.exec_start = 0; |
1893 | p->se.sum_exec_runtime = 0; | 1894 | p->se.sum_exec_runtime = 0; |
1894 | p->se.prev_sum_exec_runtime = 0; | 1895 | p->se.prev_sum_exec_runtime = 0; |
1896 | p->se.last_wakeup = 0; | ||
1897 | p->se.avg_overlap = 0; | ||
1895 | 1898 | ||
1896 | #ifdef CONFIG_SCHEDSTATS | 1899 | #ifdef CONFIG_SCHEDSTATS |
1897 | p->se.wait_start = 0; | 1900 | p->se.wait_start = 0; |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4b5e24cf2f4a..ef358ba07683 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -288,6 +288,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
288 | PN(se.exec_start); | 288 | PN(se.exec_start); |
289 | PN(se.vruntime); | 289 | PN(se.vruntime); |
290 | PN(se.sum_exec_runtime); | 290 | PN(se.sum_exec_runtime); |
291 | PN(se.avg_overlap); | ||
291 | 292 | ||
292 | nr_switches = p->nvcsw + p->nivcsw; | 293 | nr_switches = p->nvcsw + p->nivcsw; |
293 | 294 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b5a357396b49..87c9d3a2aafa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -556,6 +556,21 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
556 | account_entity_enqueue(cfs_rq, se); | 556 | account_entity_enqueue(cfs_rq, se); |
557 | } | 557 | } |
558 | 558 | ||
559 | static void update_avg(u64 *avg, u64 sample) | ||
560 | { | ||
561 | s64 diff = sample - *avg; | ||
562 | *avg += diff >> 3; | ||
563 | } | ||
564 | |||
565 | static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
566 | { | ||
567 | if (!se->last_wakeup) | ||
568 | return; | ||
569 | |||
570 | update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup); | ||
571 | se->last_wakeup = 0; | ||
572 | } | ||
573 | |||
559 | static void | 574 | static void |
560 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 575 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
561 | { | 576 | { |
@@ -566,6 +581,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
566 | 581 | ||
567 | update_stats_dequeue(cfs_rq, se); | 582 | update_stats_dequeue(cfs_rq, se); |
568 | if (sleep) { | 583 | if (sleep) { |
584 | update_avg_stats(cfs_rq, se); | ||
569 | #ifdef CONFIG_SCHEDSTATS | 585 | #ifdef CONFIG_SCHEDSTATS |
570 | if (entity_is_task(se)) { | 586 | if (entity_is_task(se)) { |
571 | struct task_struct *tsk = task_of(se); | 587 | struct task_struct *tsk = task_of(se); |
@@ -981,12 +997,15 @@ static inline int wake_idle(int cpu, struct task_struct *p) | |||
981 | 997 | ||
982 | #ifdef CONFIG_SMP | 998 | #ifdef CONFIG_SMP |
983 | 999 | ||
1000 | static const struct sched_class fair_sched_class; | ||
1001 | |||
984 | static int | 1002 | static int |
985 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct task_struct *p, | 1003 | wake_affine(struct rq *rq, struct sched_domain *this_sd, struct rq *this_rq, |
986 | int prev_cpu, int this_cpu, int sync, int idx, | 1004 | struct task_struct *p, int prev_cpu, int this_cpu, int sync, |
987 | unsigned long load, unsigned long this_load, | 1005 | int idx, unsigned long load, unsigned long this_load, |
988 | unsigned int imbalance) | 1006 | unsigned int imbalance) |
989 | { | 1007 | { |
1008 | struct task_struct *curr = this_rq->curr; | ||
990 | unsigned long tl = this_load; | 1009 | unsigned long tl = this_load; |
991 | unsigned long tl_per_task; | 1010 | unsigned long tl_per_task; |
992 | 1011 | ||
@@ -994,10 +1013,15 @@ wake_affine(struct rq *rq, struct sched_domain *this_sd, struct task_struct *p, | |||
994 | return 0; | 1013 | return 0; |
995 | 1014 | ||
996 | /* | 1015 | /* |
997 | * Attract cache-cold tasks on sync wakeups: | 1016 | * If the currently running task will sleep within |
1017 | * a reasonable amount of time then attract this newly | ||
1018 | * woken task: | ||
998 | */ | 1019 | */ |
999 | if (sync && !task_hot(p, rq->clock, this_sd)) | 1020 | if (sync && curr->sched_class == &fair_sched_class) { |
1000 | return 1; | 1021 | if (curr->se.avg_overlap < sysctl_sched_migration_cost && |
1022 | p->se.avg_overlap < sysctl_sched_migration_cost) | ||
1023 | return 1; | ||
1024 | } | ||
1001 | 1025 | ||
1002 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | 1026 | schedstat_inc(p, se.nr_wakeups_affine_attempts); |
1003 | tl_per_task = cpu_avg_load_per_task(this_cpu); | 1027 | tl_per_task = cpu_avg_load_per_task(this_cpu); |
@@ -1030,18 +1054,16 @@ static int select_task_rq_fair(struct task_struct *p, int sync) | |||
1030 | struct sched_domain *sd, *this_sd = NULL; | 1054 | struct sched_domain *sd, *this_sd = NULL; |
1031 | int prev_cpu, this_cpu, new_cpu; | 1055 | int prev_cpu, this_cpu, new_cpu; |
1032 | unsigned long load, this_load; | 1056 | unsigned long load, this_load; |
1057 | struct rq *rq, *this_rq; | ||
1033 | unsigned int imbalance; | 1058 | unsigned int imbalance; |
1034 | struct rq *rq; | ||
1035 | int idx; | 1059 | int idx; |
1036 | 1060 | ||
1037 | prev_cpu = task_cpu(p); | 1061 | prev_cpu = task_cpu(p); |
1038 | rq = task_rq(p); | 1062 | rq = task_rq(p); |
1039 | this_cpu = smp_processor_id(); | 1063 | this_cpu = smp_processor_id(); |
1064 | this_rq = cpu_rq(this_cpu); | ||
1040 | new_cpu = prev_cpu; | 1065 | new_cpu = prev_cpu; |
1041 | 1066 | ||
1042 | if (prev_cpu == this_cpu) | ||
1043 | goto out; | ||
1044 | |||
1045 | /* | 1067 | /* |
1046 | * 'this_sd' is the first domain that both | 1068 | * 'this_sd' is the first domain that both |
1047 | * this_cpu and prev_cpu are present in: | 1069 | * this_cpu and prev_cpu are present in: |
@@ -1069,11 +1091,12 @@ static int select_task_rq_fair(struct task_struct *p, int sync) | |||
1069 | load = source_load(prev_cpu, idx); | 1091 | load = source_load(prev_cpu, idx); |
1070 | this_load = target_load(this_cpu, idx); | 1092 | this_load = target_load(this_cpu, idx); |
1071 | 1093 | ||
1072 | if (wake_affine(rq, this_sd, p, prev_cpu, this_cpu, sync, idx, | 1094 | if (wake_affine(rq, this_sd, this_rq, p, prev_cpu, this_cpu, sync, idx, |
1073 | load, this_load, imbalance)) { | 1095 | load, this_load, imbalance)) |
1074 | new_cpu = this_cpu; | 1096 | return this_cpu; |
1097 | |||
1098 | if (prev_cpu == this_cpu) | ||
1075 | goto out; | 1099 | goto out; |
1076 | } | ||
1077 | 1100 | ||
1078 | /* | 1101 | /* |
1079 | * Start passive balancing when half the imbalance_pct | 1102 | * Start passive balancing when half the imbalance_pct |
@@ -1083,8 +1106,7 @@ static int select_task_rq_fair(struct task_struct *p, int sync) | |||
1083 | if (imbalance*this_load <= 100*load) { | 1106 | if (imbalance*this_load <= 100*load) { |
1084 | schedstat_inc(this_sd, ttwu_move_balance); | 1107 | schedstat_inc(this_sd, ttwu_move_balance); |
1085 | schedstat_inc(p, se.nr_wakeups_passive); | 1108 | schedstat_inc(p, se.nr_wakeups_passive); |
1086 | new_cpu = this_cpu; | 1109 | return this_cpu; |
1087 | goto out; | ||
1088 | } | 1110 | } |
1089 | } | 1111 | } |
1090 | 1112 | ||
@@ -1111,6 +1133,10 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1111 | return; | 1133 | return; |
1112 | } | 1134 | } |
1113 | 1135 | ||
1136 | se->last_wakeup = se->sum_exec_runtime; | ||
1137 | if (unlikely(se == pse)) | ||
1138 | return; | ||
1139 | |||
1114 | cfs_rq_of(pse)->next = pse; | 1140 | cfs_rq_of(pse)->next = pse; |
1115 | 1141 | ||
1116 | /* | 1142 | /* |