diff options
author | Nick Piggin <nickpiggin@yahoo.com.au> | 2005-06-25 17:57:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-25 19:24:41 -0400 |
commit | 7897986bad8f6cd50d6149345aca7f6480f49464 (patch) | |
tree | 10a5e08e004ae685aaab6823a3774803455b7704 /kernel/sched.c | |
parent | 99b61ccf0bf0e9a85823d39a5db6a1519caeb13d (diff) |
[PATCH] sched: balance timers
Do CPU load averaging over a number of different intervals. Allow each
interval to be chosen by sending a parameter to source_load and target_load.
0 is instantaneous, idx > 0 returns a decaying average with the most recent
sample weighted at 2^(idx-1). To a maximum of 3 (could be easily increased).
So generally a higher number will result in more conservative balancing.
Signed-off-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 138 |
1 files changed, 74 insertions, 64 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index f665de34ed82..b597b07e7911 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -206,7 +206,7 @@ struct runqueue { | |||
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long cpu_load; | 209 | unsigned long cpu_load[3]; |
210 | #endif | 210 | #endif |
211 | unsigned long long nr_switches; | 211 | unsigned long long nr_switches; |
212 | 212 | ||
@@ -886,23 +886,27 @@ void kick_process(task_t *p) | |||
886 | * We want to under-estimate the load of migration sources, to | 886 | * We want to under-estimate the load of migration sources, to |
887 | * balance conservatively. | 887 | * balance conservatively. |
888 | */ | 888 | */ |
889 | static inline unsigned long source_load(int cpu) | 889 | static inline unsigned long source_load(int cpu, int type) |
890 | { | 890 | { |
891 | runqueue_t *rq = cpu_rq(cpu); | 891 | runqueue_t *rq = cpu_rq(cpu); |
892 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 892 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
893 | if (type == 0) | ||
894 | return load_now; | ||
893 | 895 | ||
894 | return min(rq->cpu_load, load_now); | 896 | return min(rq->cpu_load[type-1], load_now); |
895 | } | 897 | } |
896 | 898 | ||
897 | /* | 899 | /* |
898 | * Return a high guess at the load of a migration-target cpu | 900 | * Return a high guess at the load of a migration-target cpu |
899 | */ | 901 | */ |
900 | static inline unsigned long target_load(int cpu) | 902 | static inline unsigned long target_load(int cpu, int type) |
901 | { | 903 | { |
902 | runqueue_t *rq = cpu_rq(cpu); | 904 | runqueue_t *rq = cpu_rq(cpu); |
903 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 905 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
906 | if (type == 0) | ||
907 | return load_now; | ||
904 | 908 | ||
905 | return max(rq->cpu_load, load_now); | 909 | return max(rq->cpu_load[type-1], load_now); |
906 | } | 910 | } |
907 | 911 | ||
908 | #endif | 912 | #endif |
@@ -967,7 +971,7 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) | |||
967 | runqueue_t *rq; | 971 | runqueue_t *rq; |
968 | #ifdef CONFIG_SMP | 972 | #ifdef CONFIG_SMP |
969 | unsigned long load, this_load; | 973 | unsigned long load, this_load; |
970 | struct sched_domain *sd; | 974 | struct sched_domain *sd, *this_sd = NULL; |
971 | int new_cpu; | 975 | int new_cpu; |
972 | #endif | 976 | #endif |
973 | 977 | ||
@@ -986,72 +990,64 @@ static int try_to_wake_up(task_t * p, unsigned int state, int sync) | |||
986 | if (unlikely(task_running(rq, p))) | 990 | if (unlikely(task_running(rq, p))) |
987 | goto out_activate; | 991 | goto out_activate; |
988 | 992 | ||
989 | #ifdef CONFIG_SCHEDSTATS | 993 | new_cpu = cpu; |
994 | |||
990 | schedstat_inc(rq, ttwu_cnt); | 995 | schedstat_inc(rq, ttwu_cnt); |
991 | if (cpu == this_cpu) { | 996 | if (cpu == this_cpu) { |
992 | schedstat_inc(rq, ttwu_local); | 997 | schedstat_inc(rq, ttwu_local); |
993 | } else { | 998 | goto out_set_cpu; |
994 | for_each_domain(this_cpu, sd) { | 999 | } |
995 | if (cpu_isset(cpu, sd->span)) { | 1000 | |
996 | schedstat_inc(sd, ttwu_wake_remote); | 1001 | for_each_domain(this_cpu, sd) { |
997 | break; | 1002 | if (cpu_isset(cpu, sd->span)) { |
998 | } | 1003 | schedstat_inc(sd, ttwu_wake_remote); |
1004 | this_sd = sd; | ||
1005 | break; | ||
999 | } | 1006 | } |
1000 | } | 1007 | } |
1001 | #endif | ||
1002 | 1008 | ||
1003 | new_cpu = cpu; | 1009 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) |
1004 | if (cpu == this_cpu || unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | ||
1005 | goto out_set_cpu; | 1010 | goto out_set_cpu; |
1006 | 1011 | ||
1007 | load = source_load(cpu); | ||
1008 | this_load = target_load(this_cpu); | ||
1009 | |||
1010 | /* | 1012 | /* |
1011 | * If sync wakeup then subtract the (maximum possible) effect of | 1013 | * Check for affine wakeup and passive balancing possibilities. |
1012 | * the currently running task from the load of the current CPU: | ||
1013 | */ | 1014 | */ |
1014 | if (sync) | 1015 | if (this_sd) { |
1015 | this_load -= SCHED_LOAD_SCALE; | 1016 | int idx = this_sd->wake_idx; |
1016 | 1017 | unsigned int imbalance; | |
1017 | /* Don't pull the task off an idle CPU to a busy one */ | ||
1018 | if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) | ||
1019 | goto out_set_cpu; | ||
1020 | 1018 | ||
1021 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | 1019 | load = source_load(cpu, idx); |
1020 | this_load = target_load(this_cpu, idx); | ||
1022 | 1021 | ||
1023 | /* | ||
1024 | * Scan domains for affine wakeup and passive balancing | ||
1025 | * possibilities. | ||
1026 | */ | ||
1027 | for_each_domain(this_cpu, sd) { | ||
1028 | unsigned int imbalance; | ||
1029 | /* | 1022 | /* |
1030 | * Start passive balancing when half the imbalance_pct | 1023 | * If sync wakeup then subtract the (maximum possible) effect of |
1031 | * limit is reached. | 1024 | * the currently running task from the load of the current CPU: |
1032 | */ | 1025 | */ |
1033 | imbalance = sd->imbalance_pct + (sd->imbalance_pct - 100) / 2; | 1026 | if (sync) |
1027 | this_load -= SCHED_LOAD_SCALE; | ||
1028 | |||
1029 | /* Don't pull the task off an idle CPU to a busy one */ | ||
1030 | if (load < SCHED_LOAD_SCALE/2 && this_load > SCHED_LOAD_SCALE/2) | ||
1031 | goto out_set_cpu; | ||
1034 | 1032 | ||
1035 | if ((sd->flags & SD_WAKE_AFFINE) && | 1033 | new_cpu = this_cpu; /* Wake to this CPU if we can */ |
1036 | !task_hot(p, rq->timestamp_last_tick, sd)) { | 1034 | |
1035 | if ((this_sd->flags & SD_WAKE_AFFINE) && | ||
1036 | !task_hot(p, rq->timestamp_last_tick, this_sd)) { | ||
1037 | /* | 1037 | /* |
1038 | * This domain has SD_WAKE_AFFINE and p is cache cold | 1038 | * This domain has SD_WAKE_AFFINE and p is cache cold |
1039 | * in this domain. | 1039 | * in this domain. |
1040 | */ | 1040 | */ |
1041 | if (cpu_isset(cpu, sd->span)) { | 1041 | schedstat_inc(this_sd, ttwu_move_affine); |
1042 | schedstat_inc(sd, ttwu_move_affine); | 1042 | goto out_set_cpu; |
1043 | goto out_set_cpu; | 1043 | } else if ((this_sd->flags & SD_WAKE_BALANCE) && |
1044 | } | ||
1045 | } else if ((sd->flags & SD_WAKE_BALANCE) && | ||
1046 | imbalance*this_load <= 100*load) { | 1044 | imbalance*this_load <= 100*load) { |
1047 | /* | 1045 | /* |
1048 | * This domain has SD_WAKE_BALANCE and there is | 1046 | * This domain has SD_WAKE_BALANCE and there is |
1049 | * an imbalance. | 1047 | * an imbalance. |
1050 | */ | 1048 | */ |
1051 | if (cpu_isset(cpu, sd->span)) { | 1049 | schedstat_inc(this_sd, ttwu_move_balance); |
1052 | schedstat_inc(sd, ttwu_move_balance); | 1050 | goto out_set_cpu; |
1053 | goto out_set_cpu; | ||
1054 | } | ||
1055 | } | 1051 | } |
1056 | } | 1052 | } |
1057 | 1053 | ||
@@ -1509,7 +1505,7 @@ static int find_idlest_cpu(struct task_struct *p, int this_cpu, | |||
1509 | cpus_and(mask, sd->span, p->cpus_allowed); | 1505 | cpus_and(mask, sd->span, p->cpus_allowed); |
1510 | 1506 | ||
1511 | for_each_cpu_mask(i, mask) { | 1507 | for_each_cpu_mask(i, mask) { |
1512 | load = target_load(i); | 1508 | load = target_load(i, sd->wake_idx); |
1513 | 1509 | ||
1514 | if (load < min_load) { | 1510 | if (load < min_load) { |
1515 | min_cpu = i; | 1511 | min_cpu = i; |
@@ -1522,7 +1518,7 @@ static int find_idlest_cpu(struct task_struct *p, int this_cpu, | |||
1522 | } | 1518 | } |
1523 | 1519 | ||
1524 | /* add +1 to account for the new task */ | 1520 | /* add +1 to account for the new task */ |
1525 | this_load = source_load(this_cpu) + SCHED_LOAD_SCALE; | 1521 | this_load = source_load(this_cpu, sd->wake_idx) + SCHED_LOAD_SCALE; |
1526 | 1522 | ||
1527 | /* | 1523 | /* |
1528 | * Would with the addition of the new task to the | 1524 | * Would with the addition of the new task to the |
@@ -1767,8 +1763,15 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1767 | { | 1763 | { |
1768 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; | 1764 | struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups; |
1769 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; | 1765 | unsigned long max_load, avg_load, total_load, this_load, total_pwr; |
1766 | int load_idx; | ||
1770 | 1767 | ||
1771 | max_load = this_load = total_load = total_pwr = 0; | 1768 | max_load = this_load = total_load = total_pwr = 0; |
1769 | if (idle == NOT_IDLE) | ||
1770 | load_idx = sd->busy_idx; | ||
1771 | else if (idle == NEWLY_IDLE) | ||
1772 | load_idx = sd->newidle_idx; | ||
1773 | else | ||
1774 | load_idx = sd->idle_idx; | ||
1772 | 1775 | ||
1773 | do { | 1776 | do { |
1774 | unsigned long load; | 1777 | unsigned long load; |
@@ -1783,9 +1786,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1783 | for_each_cpu_mask(i, group->cpumask) { | 1786 | for_each_cpu_mask(i, group->cpumask) { |
1784 | /* Bias balancing toward cpus of our domain */ | 1787 | /* Bias balancing toward cpus of our domain */ |
1785 | if (local_group) | 1788 | if (local_group) |
1786 | load = target_load(i); | 1789 | load = target_load(i, load_idx); |
1787 | else | 1790 | else |
1788 | load = source_load(i); | 1791 | load = source_load(i, load_idx); |
1789 | 1792 | ||
1790 | avg_load += load; | 1793 | avg_load += load; |
1791 | } | 1794 | } |
@@ -1895,7 +1898,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group) | |||
1895 | int i; | 1898 | int i; |
1896 | 1899 | ||
1897 | for_each_cpu_mask(i, group->cpumask) { | 1900 | for_each_cpu_mask(i, group->cpumask) { |
1898 | load = source_load(i); | 1901 | load = source_load(i, 0); |
1899 | 1902 | ||
1900 | if (load > max_load) { | 1903 | if (load > max_load) { |
1901 | max_load = load; | 1904 | max_load = load; |
@@ -2150,18 +2153,23 @@ static void rebalance_tick(int this_cpu, runqueue_t *this_rq, | |||
2150 | unsigned long old_load, this_load; | 2153 | unsigned long old_load, this_load; |
2151 | unsigned long j = jiffies + CPU_OFFSET(this_cpu); | 2154 | unsigned long j = jiffies + CPU_OFFSET(this_cpu); |
2152 | struct sched_domain *sd; | 2155 | struct sched_domain *sd; |
2156 | int i; | ||
2153 | 2157 | ||
2154 | /* Update our load */ | ||
2155 | old_load = this_rq->cpu_load; | ||
2156 | this_load = this_rq->nr_running * SCHED_LOAD_SCALE; | 2158 | this_load = this_rq->nr_running * SCHED_LOAD_SCALE; |
2157 | /* | 2159 | /* Update our load */ |
2158 | * Round up the averaging division if load is increasing. This | 2160 | for (i = 0; i < 3; i++) { |
2159 | * prevents us from getting stuck on 9 if the load is 10, for | 2161 | unsigned long new_load = this_load; |
2160 | * example. | 2162 | int scale = 1 << i; |
2161 | */ | 2163 | old_load = this_rq->cpu_load[i]; |
2162 | if (this_load > old_load) | 2164 | /* |
2163 | old_load++; | 2165 | * Round up the averaging division if load is increasing. This |
2164 | this_rq->cpu_load = (old_load + this_load) / 2; | 2166 | * prevents us from getting stuck on 9 if the load is 10, for |
2167 | * example. | ||
2168 | */ | ||
2169 | if (new_load > old_load) | ||
2170 | new_load += scale-1; | ||
2171 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) / scale; | ||
2172 | } | ||
2165 | 2173 | ||
2166 | for_each_domain(this_cpu, sd) { | 2174 | for_each_domain(this_cpu, sd) { |
2167 | unsigned long interval; | 2175 | unsigned long interval; |
@@ -4921,13 +4929,15 @@ void __init sched_init(void) | |||
4921 | 4929 | ||
4922 | rq = cpu_rq(i); | 4930 | rq = cpu_rq(i); |
4923 | spin_lock_init(&rq->lock); | 4931 | spin_lock_init(&rq->lock); |
4932 | rq->nr_running = 0; | ||
4924 | rq->active = rq->arrays; | 4933 | rq->active = rq->arrays; |
4925 | rq->expired = rq->arrays + 1; | 4934 | rq->expired = rq->arrays + 1; |
4926 | rq->best_expired_prio = MAX_PRIO; | 4935 | rq->best_expired_prio = MAX_PRIO; |
4927 | 4936 | ||
4928 | #ifdef CONFIG_SMP | 4937 | #ifdef CONFIG_SMP |
4929 | rq->sd = &sched_domain_dummy; | 4938 | rq->sd = &sched_domain_dummy; |
4930 | rq->cpu_load = 0; | 4939 | for (j = 1; j < 3; j++) |
4940 | rq->cpu_load[j] = 0; | ||
4931 | rq->active_balance = 0; | 4941 | rq->active_balance = 0; |
4932 | rq->push_cpu = 0; | 4942 | rq->push_cpu = 0; |
4933 | rq->migration_thread = NULL; | 4943 | rq->migration_thread = NULL; |