aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorCon Kolivas <kernel@kolivas.org>2005-11-09 00:38:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-09 10:56:32 -0500
commitb910472dd3b7c1d51af9a594a759f642520c33e1 (patch)
treecf2f0496b671bf8728f16b009c84e8811afbcae3 /kernel
parentb54134be53be720da423692665ec215eb14a678b (diff)
[PATCH] sched: implement nice support across physical cpus on SMP
This patch implements 'nice' support across physical cpus on SMP. It introduces an extra runqueue variable prio_bias which is the sum of the (inverted) static priorities of all the tasks on the runqueue. This is then used to bias busy rebalancing between runqueues to obtain good distribution of tasks of different nice values. By biasing the balancing only during busy rebalancing we can avoid having any significant loss of throughput by not affecting the carefully tuned idle balancing already in place. If all tasks are running at the same nice level this code should also have minimal effect. The code is optimised out in the !CONFIG_SMP case. Signed-off-by: Con Kolivas <kernel@kolivas.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c101
1 files changed, 83 insertions, 18 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 3ce26954be12..c6827f94e156 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -206,6 +206,7 @@ struct runqueue {
206 */ 206 */
207 unsigned long nr_running; 207 unsigned long nr_running;
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209 unsigned long prio_bias;
209 unsigned long cpu_load[3]; 210 unsigned long cpu_load[3];
210#endif 211#endif
211 unsigned long long nr_switches; 212 unsigned long long nr_switches;
@@ -659,13 +660,45 @@ static int effective_prio(task_t *p)
659 return prio; 660 return prio;
660} 661}
661 662
663#ifdef CONFIG_SMP
664static inline void inc_prio_bias(runqueue_t *rq, int static_prio)
665{
666 rq->prio_bias += MAX_PRIO - static_prio;
667}
668
669static inline void dec_prio_bias(runqueue_t *rq, int static_prio)
670{
671 rq->prio_bias -= MAX_PRIO - static_prio;
672}
673#else
674static inline void inc_prio_bias(runqueue_t *rq, int static_prio)
675{
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int static_prio)
679{
680}
681#endif
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 inc_prio_bias(rq, p->static_prio);
687}
688
689static inline void dec_nr_running(task_t *p, runqueue_t *rq)
690{
691 rq->nr_running--;
692 dec_prio_bias(rq, p->static_prio);
693}
694
662/* 695/*
663 * __activate_task - move a task to the runqueue. 696 * __activate_task - move a task to the runqueue.
664 */ 697 */
665static inline void __activate_task(task_t *p, runqueue_t *rq) 698static inline void __activate_task(task_t *p, runqueue_t *rq)
666{ 699{
667 enqueue_task(p, rq->active); 700 enqueue_task(p, rq->active);
668 rq->nr_running++; 701 inc_nr_running(p, rq);
669} 702}
670 703
671/* 704/*
@@ -674,7 +707,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
674static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 707static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
675{ 708{
676 enqueue_task_head(p, rq->active); 709 enqueue_task_head(p, rq->active);
677 rq->nr_running++; 710 inc_nr_running(p, rq);
678} 711}
679 712
680static int recalc_task_prio(task_t *p, unsigned long long now) 713static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -793,7 +826,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
793 */ 826 */
794static void deactivate_task(struct task_struct *p, runqueue_t *rq) 827static void deactivate_task(struct task_struct *p, runqueue_t *rq)
795{ 828{
796 rq->nr_running--; 829 dec_nr_running(p, rq);
797 dequeue_task(p, p->array); 830 dequeue_task(p, p->array);
798 p->array = NULL; 831 p->array = NULL;
799} 832}
@@ -930,27 +963,54 @@ void kick_process(task_t *p)
930 * We want to under-estimate the load of migration sources, to 963 * We want to under-estimate the load of migration sources, to
931 * balance conservatively. 964 * balance conservatively.
932 */ 965 */
933static inline unsigned long source_load(int cpu, int type) 966static inline unsigned long __source_load(int cpu, int type, enum idle_type idle)
934{ 967{
935 runqueue_t *rq = cpu_rq(cpu); 968 runqueue_t *rq = cpu_rq(cpu);
936 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 969 unsigned long cpu_load = rq->cpu_load[type-1],
970 load_now = rq->nr_running * SCHED_LOAD_SCALE;
971
972 if (idle == NOT_IDLE) {
973 /*
974 * If we are balancing busy runqueues the load is biased by
975 * priority to create 'nice' support across cpus.
976 */
977 cpu_load *= rq->prio_bias;
978 load_now *= rq->prio_bias;
979 }
980
937 if (type == 0) 981 if (type == 0)
938 return load_now; 982 return load_now;
939 983
940 return min(rq->cpu_load[type-1], load_now); 984 return min(cpu_load, load_now);
985}
986
987static inline unsigned long source_load(int cpu, int type)
988{
989 return __source_load(cpu, type, NOT_IDLE);
941} 990}
942 991
943/* 992/*
944 * Return a high guess at the load of a migration-target cpu 993 * Return a high guess at the load of a migration-target cpu
945 */ 994 */
946static inline unsigned long target_load(int cpu, int type) 995static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
947{ 996{
948 runqueue_t *rq = cpu_rq(cpu); 997 runqueue_t *rq = cpu_rq(cpu);
949 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; 998 unsigned long cpu_load = rq->cpu_load[type-1],
999 load_now = rq->nr_running * SCHED_LOAD_SCALE;
1000
950 if (type == 0) 1001 if (type == 0)
951 return load_now; 1002 return load_now;
952 1003
953 return max(rq->cpu_load[type-1], load_now); 1004 if (idle == NOT_IDLE) {
1005 cpu_load *= rq->prio_bias;
1006 load_now *= rq->prio_bias;
1007 }
1008 return max(cpu_load, load_now);
1009}
1010
1011static inline unsigned long target_load(int cpu, int type)
1012{
1013 return __target_load(cpu, type, NOT_IDLE);
954} 1014}
955 1015
956/* 1016/*
@@ -1411,7 +1471,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1411 list_add_tail(&p->run_list, &current->run_list); 1471 list_add_tail(&p->run_list, &current->run_list);
1412 p->array = current->array; 1472 p->array = current->array;
1413 p->array->nr_active++; 1473 p->array->nr_active++;
1414 rq->nr_running++; 1474 inc_nr_running(p, rq);
1415 } 1475 }
1416 set_need_resched(); 1476 set_need_resched();
1417 } else 1477 } else
@@ -1756,9 +1816,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1756 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1816 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1757{ 1817{
1758 dequeue_task(p, src_array); 1818 dequeue_task(p, src_array);
1759 src_rq->nr_running--; 1819 dec_nr_running(p, src_rq);
1760 set_task_cpu(p, this_cpu); 1820 set_task_cpu(p, this_cpu);
1761 this_rq->nr_running++; 1821 inc_nr_running(p, this_rq);
1762 enqueue_task(p, this_array); 1822 enqueue_task(p, this_array);
1763 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1823 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1764 + this_rq->timestamp_last_tick; 1824 + this_rq->timestamp_last_tick;
@@ -1937,9 +1997,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
1937 1997
1938 /* Bias balancing toward cpus of our domain */ 1998 /* Bias balancing toward cpus of our domain */
1939 if (local_group) 1999 if (local_group)
1940 load = target_load(i, load_idx); 2000 load = __target_load(i, load_idx, idle);
1941 else 2001 else
1942 load = source_load(i, load_idx); 2002 load = __source_load(i, load_idx, idle);
1943 2003
1944 avg_load += load; 2004 avg_load += load;
1945 } 2005 }
@@ -2044,14 +2104,15 @@ out_balanced:
2044/* 2104/*
2045 * find_busiest_queue - find the busiest runqueue among the cpus in group. 2105 * find_busiest_queue - find the busiest runqueue among the cpus in group.
2046 */ 2106 */
2047static runqueue_t *find_busiest_queue(struct sched_group *group) 2107static runqueue_t *find_busiest_queue(struct sched_group *group,
2108 enum idle_type idle)
2048{ 2109{
2049 unsigned long load, max_load = 0; 2110 unsigned long load, max_load = 0;
2050 runqueue_t *busiest = NULL; 2111 runqueue_t *busiest = NULL;
2051 int i; 2112 int i;
2052 2113
2053 for_each_cpu_mask(i, group->cpumask) { 2114 for_each_cpu_mask(i, group->cpumask) {
2054 load = source_load(i, 0); 2115 load = __source_load(i, 0, idle);
2055 2116
2056 if (load > max_load) { 2117 if (load > max_load) {
2057 max_load = load; 2118 max_load = load;
@@ -2095,7 +2156,7 @@ static int load_balance(int this_cpu, runqueue_t *this_rq,
2095 goto out_balanced; 2156 goto out_balanced;
2096 } 2157 }
2097 2158
2098 busiest = find_busiest_queue(group); 2159 busiest = find_busiest_queue(group, idle);
2099 if (!busiest) { 2160 if (!busiest) {
2100 schedstat_inc(sd, lb_nobusyq[idle]); 2161 schedstat_inc(sd, lb_nobusyq[idle]);
2101 goto out_balanced; 2162 goto out_balanced;
@@ -2218,7 +2279,7 @@ static int load_balance_newidle(int this_cpu, runqueue_t *this_rq,
2218 goto out_balanced; 2279 goto out_balanced;
2219 } 2280 }
2220 2281
2221 busiest = find_busiest_queue(group); 2282 busiest = find_busiest_queue(group, NEWLY_IDLE);
2222 if (!busiest) { 2283 if (!busiest) {
2223 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]); 2284 schedstat_inc(sd, lb_nobusyq[NEWLY_IDLE]);
2224 goto out_balanced; 2285 goto out_balanced;
@@ -3447,7 +3508,9 @@ void set_user_nice(task_t *p, long nice)
3447 * not SCHED_NORMAL: 3508 * not SCHED_NORMAL:
3448 */ 3509 */
3449 if (rt_task(p)) { 3510 if (rt_task(p)) {
3511 dec_prio_bias(rq, p->static_prio);
3450 p->static_prio = NICE_TO_PRIO(nice); 3512 p->static_prio = NICE_TO_PRIO(nice);
3513 inc_prio_bias(rq, p->static_prio);
3451 goto out_unlock; 3514 goto out_unlock;
3452 } 3515 }
3453 array = p->array; 3516 array = p->array;
@@ -3457,7 +3520,9 @@ void set_user_nice(task_t *p, long nice)
3457 old_prio = p->prio; 3520 old_prio = p->prio;
3458 new_prio = NICE_TO_PRIO(nice); 3521 new_prio = NICE_TO_PRIO(nice);
3459 delta = new_prio - old_prio; 3522 delta = new_prio - old_prio;
3523 dec_prio_bias(rq, p->static_prio);
3460 p->static_prio = NICE_TO_PRIO(nice); 3524 p->static_prio = NICE_TO_PRIO(nice);
3525 inc_prio_bias(rq, p->static_prio);
3461 p->prio += delta; 3526 p->prio += delta;
3462 3527
3463 if (array) { 3528 if (array) {