aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Piggin <npiggin@suse.de>2006-02-10 04:51:02 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-02-10 11:13:11 -0500
commita2000572ad511f5f43091ed7bd2cc3b913104a1e (patch)
tree56279392dece8e11c33d3754854551aa6773095b
parent4b0955a6edb9b058ca1314ca210a92ee166c4d9a (diff)
[PATCH] sched: remove smpnice
I don't think the code is quite ready, which is why I asked for Peter's additions to also be merged before I acked it (although it turned out that it still isn't quite ready with his additions either). Basically I have had similar observations to Suresh in that it does not play nicely with the rest of the balancing infrastructure (and raised similar concerns in my review). The samples (group of 4) I got for "maximum recorded imbalance" on a 2x2 SMP+HT Xeon are as follows: | Following boot | hackbench 20 | hackbench 40 -----------+----------------+---------------------+--------------------- 2.6.16-rc2 | 30,37,100,112 | 5600,5530,6020,6090 | 6390,7090,8760,8470 +nosmpnice | 3, 2, 4, 2 | 28, 150, 294, 132 | 348, 348, 294, 347 Hackbench raw performance is down around 15% with smpnice (but that in itself isn't a huge deal because it is just a benchmark). However, the samples show that the imbalance passed into move_tasks is increased by about a factor of 10-30. I think this would also go some way to explaining latency blips turning up in the balancing code (though I haven't actually measured that). We'll probably have to revert this in the SUSE kernel. Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> Cc: Peter Williams <pwil3058@bigpond.net.au> Cc: "Martin J. Bligh" <mbligh@aracnet.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--kernel/sched.c129
1 files changed, 18 insertions, 111 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bc38804e40dd..87d93be336a1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -215,7 +215,6 @@ struct runqueue {
215 */ 215 */
216 unsigned long nr_running; 216 unsigned long nr_running;
217#ifdef CONFIG_SMP 217#ifdef CONFIG_SMP
218 unsigned long prio_bias;
219 unsigned long cpu_load[3]; 218 unsigned long cpu_load[3];
220#endif 219#endif
221 unsigned long long nr_switches; 220 unsigned long long nr_switches;
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p)
669 return prio; 668 return prio;
670} 669}
671 670
672#ifdef CONFIG_SMP
673static inline void inc_prio_bias(runqueue_t *rq, int prio)
674{
675 rq->prio_bias += MAX_PRIO - prio;
676}
677
678static inline void dec_prio_bias(runqueue_t *rq, int prio)
679{
680 rq->prio_bias -= MAX_PRIO - prio;
681}
682
683static inline void inc_nr_running(task_t *p, runqueue_t *rq)
684{
685 rq->nr_running++;
686 if (rt_task(p)) {
687 if (p != rq->migration_thread)
688 /*
689 * The migration thread does the actual balancing. Do
690 * not bias by its priority as the ultra high priority
691 * will skew balancing adversely.
692 */
693 inc_prio_bias(rq, p->prio);
694 } else
695 inc_prio_bias(rq, p->static_prio);
696}
697
698static inline void dec_nr_running(task_t *p, runqueue_t *rq)
699{
700 rq->nr_running--;
701 if (rt_task(p)) {
702 if (p != rq->migration_thread)
703 dec_prio_bias(rq, p->prio);
704 } else
705 dec_prio_bias(rq, p->static_prio);
706}
707#else
708static inline void inc_prio_bias(runqueue_t *rq, int prio)
709{
710}
711
712static inline void dec_prio_bias(runqueue_t *rq, int prio)
713{
714}
715
716static inline void inc_nr_running(task_t *p, runqueue_t *rq)
717{
718 rq->nr_running++;
719}
720
721static inline void dec_nr_running(task_t *p, runqueue_t *rq)
722{
723 rq->nr_running--;
724}
725#endif
726
727/* 671/*
728 * __activate_task - move a task to the runqueue. 672 * __activate_task - move a task to the runqueue.
729 */ 673 */
730static inline void __activate_task(task_t *p, runqueue_t *rq) 674static inline void __activate_task(task_t *p, runqueue_t *rq)
731{ 675{
732 enqueue_task(p, rq->active); 676 enqueue_task(p, rq->active);
733 inc_nr_running(p, rq); 677 rq->nr_running++;
734} 678}
735 679
736/* 680/*
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
739static inline void __activate_idle_task(task_t *p, runqueue_t *rq) 683static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
740{ 684{
741 enqueue_task_head(p, rq->active); 685 enqueue_task_head(p, rq->active);
742 inc_nr_running(p, rq); 686 rq->nr_running++;
743} 687}
744 688
745static int recalc_task_prio(task_t *p, unsigned long long now) 689static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
863 */ 807 */
864static void deactivate_task(struct task_struct *p, runqueue_t *rq) 808static void deactivate_task(struct task_struct *p, runqueue_t *rq)
865{ 809{
866 dec_nr_running(p, rq); 810 rq->nr_running--;
867 dequeue_task(p, p->array); 811 dequeue_task(p, p->array);
868 p->array = NULL; 812 p->array = NULL;
869} 813}
@@ -1007,61 +951,27 @@ void kick_process(task_t *p)
1007 * We want to under-estimate the load of migration sources, to 951 * We want to under-estimate the load of migration sources, to
1008 * balance conservatively. 952 * balance conservatively.
1009 */ 953 */
1010static unsigned long __source_load(int cpu, int type, enum idle_type idle) 954static inline unsigned long source_load(int cpu, int type)
1011{ 955{
1012 runqueue_t *rq = cpu_rq(cpu); 956 runqueue_t *rq = cpu_rq(cpu);
1013 unsigned long running = rq->nr_running; 957 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1014 unsigned long source_load, cpu_load = rq->cpu_load[type-1],
1015 load_now = running * SCHED_LOAD_SCALE;
1016
1017 if (type == 0) 958 if (type == 0)
1018 source_load = load_now; 959 return load_now;
1019 else
1020 source_load = min(cpu_load, load_now);
1021
1022 if (running > 1 || (idle == NOT_IDLE && running))
1023 /*
1024 * If we are busy rebalancing the load is biased by
1025 * priority to create 'nice' support across cpus. When
1026 * idle rebalancing we should only bias the source_load if
1027 * there is more than one task running on that queue to
1028 * prevent idle rebalance from trying to pull tasks from a
1029 * queue with only one running task.
1030 */
1031 source_load = source_load * rq->prio_bias / running;
1032 960
1033 return source_load; 961 return min(rq->cpu_load[type-1], load_now);
1034}
1035
1036static inline unsigned long source_load(int cpu, int type)
1037{
1038 return __source_load(cpu, type, NOT_IDLE);
1039} 962}
1040 963
1041/* 964/*
1042 * Return a high guess at the load of a migration-target cpu 965 * Return a high guess at the load of a migration-target cpu
1043 */ 966 */
1044static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) 967static inline unsigned long target_load(int cpu, int type)
1045{ 968{
1046 runqueue_t *rq = cpu_rq(cpu); 969 runqueue_t *rq = cpu_rq(cpu);
1047 unsigned long running = rq->nr_running; 970 unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
1048 unsigned long target_load, cpu_load = rq->cpu_load[type-1],
1049 load_now = running * SCHED_LOAD_SCALE;
1050
1051 if (type == 0) 971 if (type == 0)
1052 target_load = load_now; 972 return load_now;
1053 else
1054 target_load = max(cpu_load, load_now);
1055 973
1056 if (running > 1 || (idle == NOT_IDLE && running)) 974 return max(rq->cpu_load[type-1], load_now);
1057 target_load = target_load * rq->prio_bias / running;
1058
1059 return target_load;
1060}
1061
1062static inline unsigned long target_load(int cpu, int type)
1063{
1064 return __target_load(cpu, type, NOT_IDLE);
1065} 975}
1066 976
1067/* 977/*
@@ -1530,7 +1440,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
1530 list_add_tail(&p->run_list, &current->run_list); 1440 list_add_tail(&p->run_list, &current->run_list);
1531 p->array = current->array; 1441 p->array = current->array;
1532 p->array->nr_active++; 1442 p->array->nr_active++;
1533 inc_nr_running(p, rq); 1443 rq->nr_running++;
1534 } 1444 }
1535 set_need_resched(); 1445 set_need_resched();
1536 } else 1446 } else
@@ -1875,9 +1785,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
1875 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) 1785 runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
1876{ 1786{
1877 dequeue_task(p, src_array); 1787 dequeue_task(p, src_array);
1878 dec_nr_running(p, src_rq); 1788 src_rq->nr_running--;
1879 set_task_cpu(p, this_cpu); 1789 set_task_cpu(p, this_cpu);
1880 inc_nr_running(p, this_rq); 1790 this_rq->nr_running++;
1881 enqueue_task(p, this_array); 1791 enqueue_task(p, this_array);
1882 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) 1792 p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
1883 + this_rq->timestamp_last_tick; 1793 + this_rq->timestamp_last_tick;
@@ -2056,9 +1966,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
2056 1966
2057 /* Bias balancing toward cpus of our domain */ 1967 /* Bias balancing toward cpus of our domain */
2058 if (local_group) 1968 if (local_group)
2059 load = __target_load(i, load_idx, idle); 1969 load = target_load(i, load_idx);
2060 else 1970 else
2061 load = __source_load(i, load_idx, idle); 1971 load = source_load(i, load_idx);
2062 1972
2063 avg_load += load; 1973 avg_load += load;
2064 } 1974 }
@@ -2171,7 +2081,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
2171 int i; 2081 int i;
2172 2082
2173 for_each_cpu_mask(i, group->cpumask) { 2083 for_each_cpu_mask(i, group->cpumask) {
2174 load = __source_load(i, 0, idle); 2084 load = source_load(i, 0);
2175 2085
2176 if (load > max_load) { 2086 if (load > max_load) {
2177 max_load = load; 2087 max_load = load;
@@ -3571,10 +3481,8 @@ void set_user_nice(task_t *p, long nice)
3571 goto out_unlock; 3481 goto out_unlock;
3572 } 3482 }
3573 array = p->array; 3483 array = p->array;
3574 if (array) { 3484 if (array)
3575 dequeue_task(p, array); 3485 dequeue_task(p, array);
3576 dec_prio_bias(rq, p->static_prio);
3577 }
3578 3486
3579 old_prio = p->prio; 3487 old_prio = p->prio;
3580 new_prio = NICE_TO_PRIO(nice); 3488 new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3492,6 @@ void set_user_nice(task_t *p, long nice)
3584 3492
3585 if (array) { 3493 if (array) {
3586 enqueue_task(p, array); 3494 enqueue_task(p, array);
3587 inc_prio_bias(rq, p->static_prio);
3588 /* 3495 /*
3589 * If the task increased its priority or is running and 3496 * If the task increased its priority or is running and
3590 * lowered its priority, then reschedule its CPU: 3497 * lowered its priority, then reschedule its CPU: