diff options
author | Con Kolivas <kernel@kolivas.org> | 2005-11-09 00:38:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-09 10:56:32 -0500 |
commit | b910472dd3b7c1d51af9a594a759f642520c33e1 (patch) | |
tree | cf2f0496b671bf8728f16b009c84e8811afbcae3 /kernel | |
parent | b54134be53be720da423692665ec215eb14a678b (diff) |
[PATCH] sched: implement nice support across physical cpus on SMP
This patch implements 'nice' support across physical cpus on SMP.
It introduces an extra runqueue variable prio_bias which is the sum of the
(inverted) static priorities of all the tasks on the runqueue.
This is then used to bias busy rebalancing between runqueues to obtain good
distribution of tasks of different nice values. By biasing the balancing only
during busy rebalancing we can avoid having any significant loss of throughput
by not affecting the carefully tuned idle balancing already in place. If all
tasks are running at the same nice level this code should also have minimal
effect. The code is optimised out in the !CONFIG_SMP case.
Signed-off-by: Con Kolivas <kernel@kolivas.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 101 |
1 files changed, 83 insertions, 18 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 3ce26954be12..c6827f94e156 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -206,6 +206,7 @@ struct runqueue { | |||
206 | */ | 206 | */ |
207 | unsigned long nr_running; | 207 | unsigned long nr_running; |
208 | #ifdef CONFIG_SMP | 208 | #ifdef CONFIG_SMP |
209 | unsigned long prio_bias; | ||
209 | unsigned long cpu_load[3]; | 210 | unsigned long cpu_load[3]; |
210 | #endif | 211 | #endif |
211 | unsigned long long nr_switches; | 212 | unsigned long long nr_switches; |
@@ -659,13 +660,45 @@ static int effective_prio(task_t *p) | |||
659 | return prio; | 660 | return prio; |
660 | } | 661 | } |
661 | 662 | ||
663 | #ifdef CONFIG_SMP | ||
664 | static inline void inc_prio_bias(runqueue_t *rq, int static_prio) | ||
665 | { | ||
666 | rq->prio_bias += MAX_PRIO - static_prio; | ||
667 | } | ||
668 | |||
669 | static inline void dec_prio_bias(runqueue_t *rq, int static_prio) | ||
670 | { | ||
671 | rq->prio_bias -= MAX_PRIO - static_prio; | ||
672 | } | ||
673 | #else | ||
674 | static inline void inc_prio_bias(runqueue_t *rq, int static_prio) | ||
675 | { | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int static_prio) | ||
679 | { | ||
680 | } | ||
681 | #endif | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | inc_prio_bias(rq, p->static_prio); | ||
687 | } | ||
688 | |||
689 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
690 | { | ||
691 | rq->nr_running--; | ||
692 | dec_prio_bias(rq, p->static_prio); | ||
693 | } | ||
694 | |||
662 | /* | 695 | /* |
663 | * __activate_task - move a task to the runqueue. | 696 | * __activate_task - move a task to the runqueue. |
664 | */ | 697 | */ |
665 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 698 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
666 | { | 699 | { |
667 | enqueue_task(p, rq->active); | 700 | enqueue_task(p, rq->active); |
668 | rq->nr_running++; | 701 | inc_nr_running(p, rq); |
669 | } | 702 | } |
670 | 703 | ||
671 | /* | 704 | /* |
@@ -674,7 +707,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
674 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 707 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
675 | { | 708 | { |
676 | enqueue_task_head(p, rq->active); | 709 | enqueue_task_head(p, rq->active); |
677 | rq->nr_running++; | 710 | inc_nr_running(p, rq); |
678 | } | 711 | } |
679 | 712 | ||
680 | static int recalc_task_prio(task_t *p, unsigned long long now) | 713 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -793,7 +826,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
793 | */ | 826 | */ |
794 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 827 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
795 | { | 828 | { |
796 | rq->nr_running--; | 829 | dec_nr_running(p, rq); |
797 | dequeue_task(p, p->array); | 830 | dequeue_task(p, p->array); |
798 | p->array = NULL; | 831 | p->array = NULL; |
799 | } | 832 | } |
@@ -930,27 +963,54 @@ void kick_process(task_t *p) | |||
930 | * We want to under-estimate the load of migration sources, to | 963 | * We want to under-estimate the load of migration sources, to |
931 | * balance conservatively. | 964 | * balance conservatively. |
932 | */ | 965 | */ |
933 | static inline unsigned long source_load(int cpu, int type) | 966 | static inline unsigned long __source_load(int cpu, int type, enum idle_type idle) |
934 | { | 967 | { |
935 | runqueue_t *rq = cpu_rq(cpu); | 968 | runqueue_t *rq = cpu_rq(cpu); |
936 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 969 | unsigned long cpu_load = rq->cpu_load[type-1], |
970 | load_now = rq->nr_running * SCHED_LOAD_SCALE; | ||
971 | |||
972 | if (idle == NOT_IDLE) { | ||
973 | /* | ||
974 | * If we are balancing busy runqueues the load is biased by | ||
975 | * priority to create 'nice' support across cpus. | ||
976 | */ | ||
977 | cpu_load *= rq->prio_bias; | ||
978 | load_now *= rq->prio_bias; | ||
979 | } | ||
980 | |||
937 | if (type == 0) | 981 | if (type == 0) |
938 | return load_now; | 982 | return load_now; |
939 | 983 | ||
940 | return min(rq->cpu_load[type-1], load_now); | 984 | return min(cpu_load, load_now); |
985 | } | ||
986 | |||
987 | static inline unsigned long source_load(int cpu, int type) | ||
988 | { | ||
989 | return __source_load(cpu, type, NOT_IDLE); | ||
941 | } | 990 | } |
942 | 991 | ||
943 | /* | 992 | /* |
944 | * Return a high guess at the load of a migration-target cpu | 993 | * Return a high guess at the load of a migration-target cpu |
945 | */ | 994 | */ |
946 | static inline unsigned long target_load(int cpu, int type) | 995 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) |
947 | { | 996 | { |
948 | runqueue_t *rq = cpu_rq(cpu); | 997 | runqueue_t *rq = cpu_rq(cpu); |
949 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; | 998 | unsigned long cpu_load = rq->cpu_load[type-1], |
999 | load_now = rq->nr_running * SCHED_LOAD_SCALE; | ||
1000 | |||
950 | if (type == 0) | 1001 | if (type == 0) |
951 | return load_now; | 1002 | return load_now; |
952 | 1003 | ||
953 | return max(rq->cpu_load[type-1], load_now); | 1004 | if (idle == NOT_IDLE) { |
1005 | cpu_load *= rq->prio_bias; | ||
1006 | load_now *= rq->prio_bias; | ||
1007 | } | ||
1008 | return max(cpu_load, load_now); | ||
1009 | } | ||
1010 | |||
1011 | static inline unsigned long target_load(int cpu, int type) | ||
1012 | { | ||
1013 | return __target_load(cpu, type, NOT_IDLE); | ||
954 | } | 1014 | } |
955 | 1015 | ||
956 | /* | 1016 | /* |
@@ -1411,7 +1471,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1411 | list_add_tail(&p->run_list, ¤t->run_list); | 1471 | list_add_tail(&p->run_list, ¤t->run_list); |
1412 | p->array = current->array; | 1472 | p->array = current->array; |
1413 | p->array->nr_active++; | 1473 | p->array->nr_active++; |
1414 | rq->nr_running++; | 1474 | inc_nr_running(p, rq); |
1415 | } | 1475 | } |
1416 | set_need_resched(); | 1476 | set_need_resched(); |
1417 | } else | 1477 | } else |
@@ -1756,9 +1816,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1756 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1816 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1757 | { | 1817 | { |
1758 | dequeue_task(p, src_array); | 1818 | dequeue_task(p, src_array); |
1759 | src_rq->nr_running--; | 1819 | dec_nr_running(p, src_rq); |
1760 | set_task_cpu(p, this_cpu); | 1820 | set_task_cpu(p, this_cpu); |
1761 | this_rq->nr_running++; | 1821 | inc_nr_running(p, this_rq); |
1762 | enqueue_task(p, this_array); | 1822 | enqueue_task(p, this_array); |
1763 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1823 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1764 | + this_rq->timestamp_last_tick; | 1824 | + this_rq->timestamp_last_tick; |
@@ -1937,9 +1997,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
1937 | 1997 | ||
1938 | /* Bias balancing toward cpus of our domain */ | 1998 | /* Bias balancing toward cpus of our domain */ |
1939 | if (local_group) | 1999 | if (local_group) |
1940 | load = target_load(i, load_idx); | 2000 | load = __target_load(i, load_idx, idle); |
1941 | else | 2001 | else |
1942 | load = source_load(i, load_idx); | 2002 | load = __source_load(i, load_idx, idle); |
1943 | 2003 | ||
1944 | avg_load += load; | 2004 | avg_load += load; |
1945 | } | 2005 | } |
@@ -2044,14 +2104,15 @@ out_balanced: | |||
2044 | /* | 2104 | /* |
2045 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 2105 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
2046 | */ | 2106 | */ |
2047 | static runqueue_t *find_busiest_queue(struct sched_group *group) | 2107 | static runqueue_t *find_busiest_queue(struct sched_group *group, |
2108 | enum idle_type idle) | ||
2048 | { | 2109 | { |
2049 | unsigned long load, max_load = 0; | 2110 | unsigned long load, max_load = 0; |
2050 | runqueue_t *busiest = NULL; | 2111 | runqueue_t *busiest = NULL; |
2051 | int i; | 2112 | int i; |
2052 | 2113 | ||
2053 | for_each_cpu_mask(i, group->cpumask) { | 2114 | for_each_cpu_mask(i, group->cpumask) { |
2054 | load = source_load(i, 0); | 2115 | load = __source_load(i, 0, idle); |
2055 | 2116 | ||
2056 | if (load > max_load) { | 2117 | if (load > max_load) { |
2057 | max_load = load; | 2118 | max_load = load; |
@@ -2095,7 +2156,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq, | |||
2095 | goto out_balanced; | 2156 | goto out_balanced; |
2096 | } | 2157 | } |
2097 | 2158 | ||
2098 | busiest = find_busiest_queue(group); | 2159 | busiest = find_busiest_queue(group, idle); |
2099 | if (!busiest) { | 2160 | if (!busiest) { |
2100 | schedstat_inc(sd, lb_nobusyq[idle]); | 2161 | schedstat_inc(sd, lb_nobusyq[idle]); |
2101 | goto out_balanced; | 2162 | goto out_balanced; |
@@ -2218,7 +2279,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq, | |||
2218 | goto out_balanced; | 2279 | goto out_balanced; |
2219 | } | 2280 | } |
2220 | 2281 | ||
2221 | busiest = find_busiest_queue(group); | 2282 | busiest = find_busiest_queue(group, NEWLY_IDLE); |
2222 | if (!busiest) { | 2283 | if (!busiest) { |
2223 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); | 2284 | schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); |
2224 | goto out_balanced; | 2285 | goto out_balanced; |
@@ -3447,7 +3508,9 @@ void set_user_nice(task_t *p, long nice) | |||
3447 | * not SCHED_NORMAL: | 3508 | * not SCHED_NORMAL: |
3448 | */ | 3509 | */ |
3449 | if (rt_task(p)) { | 3510 | if (rt_task(p)) { |
3511 | dec_prio_bias(rq, p->static_prio); | ||
3450 | p->static_prio = NICE_TO_PRIO(nice); | 3512 | p->static_prio = NICE_TO_PRIO(nice); |
3513 | inc_prio_bias(rq, p->static_prio); | ||
3451 | goto out_unlock; | 3514 | goto out_unlock; |
3452 | } | 3515 | } |
3453 | array = p->array; | 3516 | array = p->array; |
@@ -3457,7 +3520,9 @@ void set_user_nice(task_t *p, long nice) | |||
3457 | old_prio = p->prio; | 3520 | old_prio = p->prio; |
3458 | new_prio = NICE_TO_PRIO(nice); | 3521 | new_prio = NICE_TO_PRIO(nice); |
3459 | delta = new_prio - old_prio; | 3522 | delta = new_prio - old_prio; |
3523 | dec_prio_bias(rq, p->static_prio); | ||
3460 | p->static_prio = NICE_TO_PRIO(nice); | 3524 | p->static_prio = NICE_TO_PRIO(nice); |
3525 | inc_prio_bias(rq, p->static_prio); | ||
3461 | p->prio += delta; | 3526 | p->prio += delta; |
3462 | 3527 | ||
3463 | if (array) { | 3528 | if (array) { |