diff options
author | Nick Piggin <npiggin@suse.de> | 2006-02-10 04:51:02 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-02-10 11:13:11 -0500 |
commit | a2000572ad511f5f43091ed7bd2cc3b913104a1e (patch) | |
tree | 56279392dece8e11c33d3754854551aa6773095b /kernel/sched.c | |
parent | 4b0955a6edb9b058ca1314ca210a92ee166c4d9a (diff) |
[PATCH] sched: remove smpnice
I don't think the code is quite ready, which is why I asked for Peter's
additions to also be merged before I acked it (although it turned out that
it still isn't quite ready with his additions either).
Basically I have had similar observations to Suresh in that it does not
play nicely with the rest of the balancing infrastructure (and raised
similar concerns in my review).
The samples (group of 4) I got for "maximum recorded imbalance" on a 2x2
SMP+HT Xeon are as follows:
| Following boot | hackbench 20 | hackbench 40
-----------+----------------+---------------------+---------------------
2.6.16-rc2 | 30,37,100,112 | 5600,5530,6020,6090 | 6390,7090,8760,8470
+nosmpnice | 3, 2, 4, 2 | 28, 150, 294, 132 | 348, 348, 294, 347
Hackbench raw performance is down around 15% with smpnice (but that in
itself isn't a huge deal because it is just a benchmark). However, the
samples show that the imbalance passed into move_tasks is increased by
about a factor of 10-30. I think this would also go some way to explaining
latency blips turning up in the balancing code (though I haven't actually
measured that).
We'll probably have to revert this in the SUSE kernel.
Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: "Martin J. Bligh" <mbligh@aracnet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 129 |
1 files changed, 18 insertions, 111 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index bc38804e40dd..87d93be336a1 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -215,7 +215,6 @@ struct runqueue { | |||
215 | */ | 215 | */ |
216 | unsigned long nr_running; | 216 | unsigned long nr_running; |
217 | #ifdef CONFIG_SMP | 217 | #ifdef CONFIG_SMP |
218 | unsigned long prio_bias; | ||
219 | unsigned long cpu_load[3]; | 218 | unsigned long cpu_load[3]; |
220 | #endif | 219 | #endif |
221 | unsigned long long nr_switches; | 220 | unsigned long long nr_switches; |
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p) | |||
669 | return prio; | 668 | return prio; |
670 | } | 669 | } |
671 | 670 | ||
672 | #ifdef CONFIG_SMP | ||
673 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
674 | { | ||
675 | rq->prio_bias += MAX_PRIO - prio; | ||
676 | } | ||
677 | |||
678 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
679 | { | ||
680 | rq->prio_bias -= MAX_PRIO - prio; | ||
681 | } | ||
682 | |||
683 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
684 | { | ||
685 | rq->nr_running++; | ||
686 | if (rt_task(p)) { | ||
687 | if (p != rq->migration_thread) | ||
688 | /* | ||
689 | * The migration thread does the actual balancing. Do | ||
690 | * not bias by its priority as the ultra high priority | ||
691 | * will skew balancing adversely. | ||
692 | */ | ||
693 | inc_prio_bias(rq, p->prio); | ||
694 | } else | ||
695 | inc_prio_bias(rq, p->static_prio); | ||
696 | } | ||
697 | |||
698 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
699 | { | ||
700 | rq->nr_running--; | ||
701 | if (rt_task(p)) { | ||
702 | if (p != rq->migration_thread) | ||
703 | dec_prio_bias(rq, p->prio); | ||
704 | } else | ||
705 | dec_prio_bias(rq, p->static_prio); | ||
706 | } | ||
707 | #else | ||
708 | static inline void inc_prio_bias(runqueue_t *rq, int prio) | ||
709 | { | ||
710 | } | ||
711 | |||
712 | static inline void dec_prio_bias(runqueue_t *rq, int prio) | ||
713 | { | ||
714 | } | ||
715 | |||
716 | static inline void inc_nr_running(task_t *p, runqueue_t *rq) | ||
717 | { | ||
718 | rq->nr_running++; | ||
719 | } | ||
720 | |||
721 | static inline void dec_nr_running(task_t *p, runqueue_t *rq) | ||
722 | { | ||
723 | rq->nr_running--; | ||
724 | } | ||
725 | #endif | ||
726 | |||
727 | /* | 671 | /* |
728 | * __activate_task - move a task to the runqueue. | 672 | * __activate_task - move a task to the runqueue. |
729 | */ | 673 | */ |
730 | static inline void __activate_task(task_t *p, runqueue_t *rq) | 674 | static inline void __activate_task(task_t *p, runqueue_t *rq) |
731 | { | 675 | { |
732 | enqueue_task(p, rq->active); | 676 | enqueue_task(p, rq->active); |
733 | inc_nr_running(p, rq); | 677 | rq->nr_running++; |
734 | } | 678 | } |
735 | 679 | ||
736 | /* | 680 | /* |
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq) | |||
739 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) | 683 | static inline void __activate_idle_task(task_t *p, runqueue_t *rq) |
740 | { | 684 | { |
741 | enqueue_task_head(p, rq->active); | 685 | enqueue_task_head(p, rq->active); |
742 | inc_nr_running(p, rq); | 686 | rq->nr_running++; |
743 | } | 687 | } |
744 | 688 | ||
745 | static int recalc_task_prio(task_t *p, unsigned long long now) | 689 | static int recalc_task_prio(task_t *p, unsigned long long now) |
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local) | |||
863 | */ | 807 | */ |
864 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) | 808 | static void deactivate_task(struct task_struct *p, runqueue_t *rq) |
865 | { | 809 | { |
866 | dec_nr_running(p, rq); | 810 | rq->nr_running--; |
867 | dequeue_task(p, p->array); | 811 | dequeue_task(p, p->array); |
868 | p->array = NULL; | 812 | p->array = NULL; |
869 | } | 813 | } |
@@ -1007,61 +951,27 @@ void kick_process(task_t *p) | |||
1007 | * We want to under-estimate the load of migration sources, to | 951 | * We want to under-estimate the load of migration sources, to |
1008 | * balance conservatively. | 952 | * balance conservatively. |
1009 | */ | 953 | */ |
1010 | static unsigned long __source_load(int cpu, int type, enum idle_type idle) | 954 | static inline unsigned long source_load(int cpu, int type) |
1011 | { | 955 | { |
1012 | runqueue_t *rq = cpu_rq(cpu); | 956 | runqueue_t *rq = cpu_rq(cpu); |
1013 | unsigned long running = rq->nr_running; | 957 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1014 | unsigned long source_load, cpu_load = rq->cpu_load[type-1], | ||
1015 | load_now = running * SCHED_LOAD_SCALE; | ||
1016 | |||
1017 | if (type == 0) | 958 | if (type == 0) |
1018 | source_load = load_now; | 959 | return load_now; |
1019 | else | ||
1020 | source_load = min(cpu_load, load_now); | ||
1021 | |||
1022 | if (running > 1 || (idle == NOT_IDLE && running)) | ||
1023 | /* | ||
1024 | * If we are busy rebalancing the load is biased by | ||
1025 | * priority to create 'nice' support across cpus. When | ||
1026 | * idle rebalancing we should only bias the source_load if | ||
1027 | * there is more than one task running on that queue to | ||
1028 | * prevent idle rebalance from trying to pull tasks from a | ||
1029 | * queue with only one running task. | ||
1030 | */ | ||
1031 | source_load = source_load * rq->prio_bias / running; | ||
1032 | 960 | ||
1033 | return source_load; | 961 | return min(rq->cpu_load[type-1], load_now); |
1034 | } | ||
1035 | |||
1036 | static inline unsigned long source_load(int cpu, int type) | ||
1037 | { | ||
1038 | return __source_load(cpu, type, NOT_IDLE); | ||
1039 | } | 962 | } |
1040 | 963 | ||
1041 | /* | 964 | /* |
1042 | * Return a high guess at the load of a migration-target cpu | 965 | * Return a high guess at the load of a migration-target cpu |
1043 | */ | 966 | */ |
1044 | static inline unsigned long __target_load(int cpu, int type, enum idle_type idle) | 967 | static inline unsigned long target_load(int cpu, int type) |
1045 | { | 968 | { |
1046 | runqueue_t *rq = cpu_rq(cpu); | 969 | runqueue_t *rq = cpu_rq(cpu); |
1047 | unsigned long running = rq->nr_running; | 970 | unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE; |
1048 | unsigned long target_load, cpu_load = rq->cpu_load[type-1], | ||
1049 | load_now = running * SCHED_LOAD_SCALE; | ||
1050 | |||
1051 | if (type == 0) | 971 | if (type == 0) |
1052 | target_load = load_now; | 972 | return load_now; |
1053 | else | ||
1054 | target_load = max(cpu_load, load_now); | ||
1055 | 973 | ||
1056 | if (running > 1 || (idle == NOT_IDLE && running)) | 974 | return max(rq->cpu_load[type-1], load_now); |
1057 | target_load = target_load * rq->prio_bias / running; | ||
1058 | |||
1059 | return target_load; | ||
1060 | } | ||
1061 | |||
1062 | static inline unsigned long target_load(int cpu, int type) | ||
1063 | { | ||
1064 | return __target_load(cpu, type, NOT_IDLE); | ||
1065 | } | 975 | } |
1066 | 976 | ||
1067 | /* | 977 | /* |
@@ -1530,7 +1440,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags) | |||
1530 | list_add_tail(&p->run_list, ¤t->run_list); | 1440 | list_add_tail(&p->run_list, ¤t->run_list); |
1531 | p->array = current->array; | 1441 | p->array = current->array; |
1532 | p->array->nr_active++; | 1442 | p->array->nr_active++; |
1533 | inc_nr_running(p, rq); | 1443 | rq->nr_running++; |
1534 | } | 1444 | } |
1535 | set_need_resched(); | 1445 | set_need_resched(); |
1536 | } else | 1446 | } else |
@@ -1875,9 +1785,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p, | |||
1875 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) | 1785 | runqueue_t *this_rq, prio_array_t *this_array, int this_cpu) |
1876 | { | 1786 | { |
1877 | dequeue_task(p, src_array); | 1787 | dequeue_task(p, src_array); |
1878 | dec_nr_running(p, src_rq); | 1788 | src_rq->nr_running--; |
1879 | set_task_cpu(p, this_cpu); | 1789 | set_task_cpu(p, this_cpu); |
1880 | inc_nr_running(p, this_rq); | 1790 | this_rq->nr_running++; |
1881 | enqueue_task(p, this_array); | 1791 | enqueue_task(p, this_array); |
1882 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) | 1792 | p->timestamp = (p->timestamp - src_rq->timestamp_last_tick) |
1883 | + this_rq->timestamp_last_tick; | 1793 | + this_rq->timestamp_last_tick; |
@@ -2056,9 +1966,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
2056 | 1966 | ||
2057 | /* Bias balancing toward cpus of our domain */ | 1967 | /* Bias balancing toward cpus of our domain */ |
2058 | if (local_group) | 1968 | if (local_group) |
2059 | load = __target_load(i, load_idx, idle); | 1969 | load = target_load(i, load_idx); |
2060 | else | 1970 | else |
2061 | load = __source_load(i, load_idx, idle); | 1971 | load = source_load(i, load_idx); |
2062 | 1972 | ||
2063 | avg_load += load; | 1973 | avg_load += load; |
2064 | } | 1974 | } |
@@ -2171,7 +2081,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group, | |||
2171 | int i; | 2081 | int i; |
2172 | 2082 | ||
2173 | for_each_cpu_mask(i, group->cpumask) { | 2083 | for_each_cpu_mask(i, group->cpumask) { |
2174 | load = __source_load(i, 0, idle); | 2084 | load = source_load(i, 0); |
2175 | 2085 | ||
2176 | if (load > max_load) { | 2086 | if (load > max_load) { |
2177 | max_load = load; | 2087 | max_load = load; |
@@ -3571,10 +3481,8 @@ void set_user_nice(task_t *p, long nice) | |||
3571 | goto out_unlock; | 3481 | goto out_unlock; |
3572 | } | 3482 | } |
3573 | array = p->array; | 3483 | array = p->array; |
3574 | if (array) { | 3484 | if (array) |
3575 | dequeue_task(p, array); | 3485 | dequeue_task(p, array); |
3576 | dec_prio_bias(rq, p->static_prio); | ||
3577 | } | ||
3578 | 3486 | ||
3579 | old_prio = p->prio; | 3487 | old_prio = p->prio; |
3580 | new_prio = NICE_TO_PRIO(nice); | 3488 | new_prio = NICE_TO_PRIO(nice); |
@@ -3584,7 +3492,6 @@ void set_user_nice(task_t *p, long nice) | |||
3584 | 3492 | ||
3585 | if (array) { | 3493 | if (array) { |
3586 | enqueue_task(p, array); | 3494 | enqueue_task(p, array); |
3587 | inc_prio_bias(rq, p->static_prio); | ||
3588 | /* | 3495 | /* |
3589 | * If the task increased its priority or is running and | 3496 | * If the task increased its priority or is running and |
3590 | * lowered its priority, then reschedule its CPU: | 3497 | * lowered its priority, then reschedule its CPU: |