diff options
author | Con Kolivas <kernel@kolivas.org> | 2007-03-05 03:30:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-03-05 10:57:51 -0500 |
commit | 69f7c0a1be84b10a81b6edcce2dbee0cdec26eba (patch) | |
tree | a6d4988fda72595ea71ba7e2b4ac11f91fde0159 | |
parent | 759b9775c25f5e69aaea8a75c3914019e2dc5539 (diff) |
[PATCH] sched: remove SMT nice
Remove the SMT-nice feature which idles sibling cpus on SMT cpus to
facilitiate nice working properly where cpu power is shared. The idling of
cpus in the presence of runnable tasks is considered too fragile, easy to
break with outside code, and the complexity of managing this system if an
architecture comes along with many logical cores sharing cpu power will be
unworkable.
Remove the associated per_cpu_gain variable in sched_domains used only by
this code.
Also:
The reason is that with dynticks enabled, this code breaks without yet
further tweaks so dynticks brought on the rapid demise of this code. So
either we tweak this code or kill it off entirely. It was Ingo's preference
to kill it off. Either way this needs to happen for 2.6.21 since dynticks
has gone in.
Signed-off-by: Con Kolivas <kernel@kolivas.org>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/asm-i386/topology.h | 1 | ||||
-rw-r--r-- | include/asm-ia64/topology.h | 2 | ||||
-rw-r--r-- | include/asm-mips/mach-ip27/topology.h | 1 | ||||
-rw-r--r-- | include/asm-powerpc/topology.h | 1 | ||||
-rw-r--r-- | include/asm-x86_64/topology.h | 1 | ||||
-rw-r--r-- | include/linux/sched.h | 1 | ||||
-rw-r--r-- | include/linux/topology.h | 4 | ||||
-rw-r--r-- | kernel/sched.c | 155 |
8 files changed, 1 insertions, 165 deletions
diff --git a/include/asm-i386/topology.h b/include/asm-i386/topology.h index ac58580ad664..7fc512d90ea8 100644 --- a/include/asm-i386/topology.h +++ b/include/asm-i386/topology.h | |||
@@ -85,7 +85,6 @@ static inline int node_to_first_cpu(int node) | |||
85 | .idle_idx = 1, \ | 85 | .idle_idx = 1, \ |
86 | .newidle_idx = 2, \ | 86 | .newidle_idx = 2, \ |
87 | .wake_idx = 1, \ | 87 | .wake_idx = 1, \ |
88 | .per_cpu_gain = 100, \ | ||
89 | .flags = SD_LOAD_BALANCE \ | 88 | .flags = SD_LOAD_BALANCE \ |
90 | | SD_BALANCE_EXEC \ | 89 | | SD_BALANCE_EXEC \ |
91 | | SD_BALANCE_FORK \ | 90 | | SD_BALANCE_FORK \ |
diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 22ed6749557e..233f1caae048 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h | |||
@@ -65,7 +65,6 @@ void build_cpu_to_node_map(void); | |||
65 | .max_interval = 4, \ | 65 | .max_interval = 4, \ |
66 | .busy_factor = 64, \ | 66 | .busy_factor = 64, \ |
67 | .imbalance_pct = 125, \ | 67 | .imbalance_pct = 125, \ |
68 | .per_cpu_gain = 100, \ | ||
69 | .cache_nice_tries = 2, \ | 68 | .cache_nice_tries = 2, \ |
70 | .busy_idx = 2, \ | 69 | .busy_idx = 2, \ |
71 | .idle_idx = 1, \ | 70 | .idle_idx = 1, \ |
@@ -97,7 +96,6 @@ void build_cpu_to_node_map(void); | |||
97 | .newidle_idx = 0, /* unused */ \ | 96 | .newidle_idx = 0, /* unused */ \ |
98 | .wake_idx = 1, \ | 97 | .wake_idx = 1, \ |
99 | .forkexec_idx = 1, \ | 98 | .forkexec_idx = 1, \ |
100 | .per_cpu_gain = 100, \ | ||
101 | .flags = SD_LOAD_BALANCE \ | 99 | .flags = SD_LOAD_BALANCE \ |
102 | | SD_BALANCE_EXEC \ | 100 | | SD_BALANCE_EXEC \ |
103 | | SD_BALANCE_FORK \ | 101 | | SD_BALANCE_FORK \ |
diff --git a/include/asm-mips/mach-ip27/topology.h b/include/asm-mips/mach-ip27/topology.h index 44790fdc5d00..61d9be3f3175 100644 --- a/include/asm-mips/mach-ip27/topology.h +++ b/include/asm-mips/mach-ip27/topology.h | |||
@@ -28,7 +28,6 @@ extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; | |||
28 | .busy_factor = 32, \ | 28 | .busy_factor = 32, \ |
29 | .imbalance_pct = 125, \ | 29 | .imbalance_pct = 125, \ |
30 | .cache_nice_tries = 1, \ | 30 | .cache_nice_tries = 1, \ |
31 | .per_cpu_gain = 100, \ | ||
32 | .flags = SD_LOAD_BALANCE \ | 31 | .flags = SD_LOAD_BALANCE \ |
33 | | SD_BALANCE_EXEC \ | 32 | | SD_BALANCE_EXEC \ |
34 | | SD_WAKE_BALANCE, \ | 33 | | SD_WAKE_BALANCE, \ |
diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h index 6610495f5f16..0ad21a849b5f 100644 --- a/include/asm-powerpc/topology.h +++ b/include/asm-powerpc/topology.h | |||
@@ -57,7 +57,6 @@ static inline int pcibus_to_node(struct pci_bus *bus) | |||
57 | .busy_factor = 32, \ | 57 | .busy_factor = 32, \ |
58 | .imbalance_pct = 125, \ | 58 | .imbalance_pct = 125, \ |
59 | .cache_nice_tries = 1, \ | 59 | .cache_nice_tries = 1, \ |
60 | .per_cpu_gain = 100, \ | ||
61 | .busy_idx = 3, \ | 60 | .busy_idx = 3, \ |
62 | .idle_idx = 1, \ | 61 | .idle_idx = 1, \ |
63 | .newidle_idx = 2, \ | 62 | .newidle_idx = 2, \ |
diff --git a/include/asm-x86_64/topology.h b/include/asm-x86_64/topology.h index 2facec5914d2..4fd6fb23953e 100644 --- a/include/asm-x86_64/topology.h +++ b/include/asm-x86_64/topology.h | |||
@@ -43,7 +43,6 @@ extern int __node_distance(int, int); | |||
43 | .newidle_idx = 0, \ | 43 | .newidle_idx = 0, \ |
44 | .wake_idx = 1, \ | 44 | .wake_idx = 1, \ |
45 | .forkexec_idx = 1, \ | 45 | .forkexec_idx = 1, \ |
46 | .per_cpu_gain = 100, \ | ||
47 | .flags = SD_LOAD_BALANCE \ | 46 | .flags = SD_LOAD_BALANCE \ |
48 | | SD_BALANCE_FORK \ | 47 | | SD_BALANCE_FORK \ |
49 | | SD_BALANCE_EXEC \ | 48 | | SD_BALANCE_EXEC \ |
diff --git a/include/linux/sched.h b/include/linux/sched.h index 6f7c9a4d80e5..49fe2997a016 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -684,7 +684,6 @@ struct sched_domain { | |||
684 | unsigned int imbalance_pct; /* No balance until over watermark */ | 684 | unsigned int imbalance_pct; /* No balance until over watermark */ |
685 | unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ | 685 | unsigned long long cache_hot_time; /* Task considered cache hot (ns) */ |
686 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ | 686 | unsigned int cache_nice_tries; /* Leave cache hot tasks for # tries */ |
687 | unsigned int per_cpu_gain; /* CPU % gained by adding domain cpus */ | ||
688 | unsigned int busy_idx; | 687 | unsigned int busy_idx; |
689 | unsigned int idle_idx; | 688 | unsigned int idle_idx; |
690 | unsigned int newidle_idx; | 689 | unsigned int newidle_idx; |
diff --git a/include/linux/topology.h b/include/linux/topology.h index 6c5a6e6e813b..a9d1f049cc15 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -96,7 +96,6 @@ | |||
96 | .busy_factor = 64, \ | 96 | .busy_factor = 64, \ |
97 | .imbalance_pct = 110, \ | 97 | .imbalance_pct = 110, \ |
98 | .cache_nice_tries = 0, \ | 98 | .cache_nice_tries = 0, \ |
99 | .per_cpu_gain = 25, \ | ||
100 | .busy_idx = 0, \ | 99 | .busy_idx = 0, \ |
101 | .idle_idx = 0, \ | 100 | .idle_idx = 0, \ |
102 | .newidle_idx = 1, \ | 101 | .newidle_idx = 1, \ |
@@ -128,7 +127,6 @@ | |||
128 | .busy_factor = 64, \ | 127 | .busy_factor = 64, \ |
129 | .imbalance_pct = 125, \ | 128 | .imbalance_pct = 125, \ |
130 | .cache_nice_tries = 1, \ | 129 | .cache_nice_tries = 1, \ |
131 | .per_cpu_gain = 100, \ | ||
132 | .busy_idx = 2, \ | 130 | .busy_idx = 2, \ |
133 | .idle_idx = 1, \ | 131 | .idle_idx = 1, \ |
134 | .newidle_idx = 2, \ | 132 | .newidle_idx = 2, \ |
@@ -159,7 +157,6 @@ | |||
159 | .busy_factor = 64, \ | 157 | .busy_factor = 64, \ |
160 | .imbalance_pct = 125, \ | 158 | .imbalance_pct = 125, \ |
161 | .cache_nice_tries = 1, \ | 159 | .cache_nice_tries = 1, \ |
162 | .per_cpu_gain = 100, \ | ||
163 | .busy_idx = 2, \ | 160 | .busy_idx = 2, \ |
164 | .idle_idx = 1, \ | 161 | .idle_idx = 1, \ |
165 | .newidle_idx = 2, \ | 162 | .newidle_idx = 2, \ |
@@ -193,7 +190,6 @@ | |||
193 | .newidle_idx = 0, /* unused */ \ | 190 | .newidle_idx = 0, /* unused */ \ |
194 | .wake_idx = 0, /* unused */ \ | 191 | .wake_idx = 0, /* unused */ \ |
195 | .forkexec_idx = 0, /* unused */ \ | 192 | .forkexec_idx = 0, /* unused */ \ |
196 | .per_cpu_gain = 100, \ | ||
197 | .flags = SD_LOAD_BALANCE \ | 193 | .flags = SD_LOAD_BALANCE \ |
198 | | SD_SERIALIZE, \ | 194 | | SD_SERIALIZE, \ |
199 | .last_balance = jiffies, \ | 195 | .last_balance = jiffies, \ |
diff --git a/kernel/sched.c b/kernel/sched.c index 5f102e6c7a4c..a4ca632c477c 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -3006,23 +3006,6 @@ static inline void idle_balance(int cpu, struct rq *rq) | |||
3006 | } | 3006 | } |
3007 | #endif | 3007 | #endif |
3008 | 3008 | ||
3009 | static inline void wake_priority_sleeper(struct rq *rq) | ||
3010 | { | ||
3011 | #ifdef CONFIG_SCHED_SMT | ||
3012 | if (!rq->nr_running) | ||
3013 | return; | ||
3014 | |||
3015 | spin_lock(&rq->lock); | ||
3016 | /* | ||
3017 | * If an SMT sibling task has been put to sleep for priority | ||
3018 | * reasons reschedule the idle task to see if it can now run. | ||
3019 | */ | ||
3020 | if (rq->nr_running) | ||
3021 | resched_task(rq->idle); | ||
3022 | spin_unlock(&rq->lock); | ||
3023 | #endif | ||
3024 | } | ||
3025 | |||
3026 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 3009 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
3027 | 3010 | ||
3028 | EXPORT_PER_CPU_SYMBOL(kstat); | 3011 | EXPORT_PER_CPU_SYMBOL(kstat); |
@@ -3239,10 +3222,7 @@ void scheduler_tick(void) | |||
3239 | 3222 | ||
3240 | update_cpu_clock(p, rq, now); | 3223 | update_cpu_clock(p, rq, now); |
3241 | 3224 | ||
3242 | if (p == rq->idle) | 3225 | if (p != rq->idle) |
3243 | /* Task on the idle queue */ | ||
3244 | wake_priority_sleeper(rq); | ||
3245 | else | ||
3246 | task_running_tick(rq, p); | 3226 | task_running_tick(rq, p); |
3247 | #ifdef CONFIG_SMP | 3227 | #ifdef CONFIG_SMP |
3248 | update_load(rq); | 3228 | update_load(rq); |
@@ -3251,136 +3231,6 @@ void scheduler_tick(void) | |||
3251 | #endif | 3231 | #endif |
3252 | } | 3232 | } |
3253 | 3233 | ||
3254 | #ifdef CONFIG_SCHED_SMT | ||
3255 | static inline void wakeup_busy_runqueue(struct rq *rq) | ||
3256 | { | ||
3257 | /* If an SMT runqueue is sleeping due to priority reasons wake it up */ | ||
3258 | if (rq->curr == rq->idle && rq->nr_running) | ||
3259 | resched_task(rq->idle); | ||
3260 | } | ||
3261 | |||
3262 | /* | ||
3263 | * Called with interrupt disabled and this_rq's runqueue locked. | ||
3264 | */ | ||
3265 | static void wake_sleeping_dependent(int this_cpu) | ||
3266 | { | ||
3267 | struct sched_domain *tmp, *sd = NULL; | ||
3268 | int i; | ||
3269 | |||
3270 | for_each_domain(this_cpu, tmp) { | ||
3271 | if (tmp->flags & SD_SHARE_CPUPOWER) { | ||
3272 | sd = tmp; | ||
3273 | break; | ||
3274 | } | ||
3275 | } | ||
3276 | |||
3277 | if (!sd) | ||
3278 | return; | ||
3279 | |||
3280 | for_each_cpu_mask(i, sd->span) { | ||
3281 | struct rq *smt_rq = cpu_rq(i); | ||
3282 | |||
3283 | if (i == this_cpu) | ||
3284 | continue; | ||
3285 | if (unlikely(!spin_trylock(&smt_rq->lock))) | ||
3286 | continue; | ||
3287 | |||
3288 | wakeup_busy_runqueue(smt_rq); | ||
3289 | spin_unlock(&smt_rq->lock); | ||
3290 | } | ||
3291 | } | ||
3292 | |||
3293 | /* | ||
3294 | * number of 'lost' timeslices this task wont be able to fully | ||
3295 | * utilize, if another task runs on a sibling. This models the | ||
3296 | * slowdown effect of other tasks running on siblings: | ||
3297 | */ | ||
3298 | static inline unsigned long | ||
3299 | smt_slice(struct task_struct *p, struct sched_domain *sd) | ||
3300 | { | ||
3301 | return p->time_slice * (100 - sd->per_cpu_gain) / 100; | ||
3302 | } | ||
3303 | |||
3304 | /* | ||
3305 | * To minimise lock contention and not have to drop this_rq's runlock we only | ||
3306 | * trylock the sibling runqueues and bypass those runqueues if we fail to | ||
3307 | * acquire their lock. As we only trylock the normal locking order does not | ||
3308 | * need to be obeyed. | ||
3309 | */ | ||
3310 | static int | ||
3311 | dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) | ||
3312 | { | ||
3313 | struct sched_domain *tmp, *sd = NULL; | ||
3314 | int ret = 0, i; | ||
3315 | |||
3316 | /* kernel/rt threads do not participate in dependent sleeping */ | ||
3317 | if (!p->mm || rt_task(p)) | ||
3318 | return 0; | ||
3319 | |||
3320 | for_each_domain(this_cpu, tmp) { | ||
3321 | if (tmp->flags & SD_SHARE_CPUPOWER) { | ||
3322 | sd = tmp; | ||
3323 | break; | ||
3324 | } | ||
3325 | } | ||
3326 | |||
3327 | if (!sd) | ||
3328 | return 0; | ||
3329 | |||
3330 | for_each_cpu_mask(i, sd->span) { | ||
3331 | struct task_struct *smt_curr; | ||
3332 | struct rq *smt_rq; | ||
3333 | |||
3334 | if (i == this_cpu) | ||
3335 | continue; | ||
3336 | |||
3337 | smt_rq = cpu_rq(i); | ||
3338 | if (unlikely(!spin_trylock(&smt_rq->lock))) | ||
3339 | continue; | ||
3340 | |||
3341 | smt_curr = smt_rq->curr; | ||
3342 | |||
3343 | if (!smt_curr->mm) | ||
3344 | goto unlock; | ||
3345 | |||
3346 | /* | ||
3347 | * If a user task with lower static priority than the | ||
3348 | * running task on the SMT sibling is trying to schedule, | ||
3349 | * delay it till there is proportionately less timeslice | ||
3350 | * left of the sibling task to prevent a lower priority | ||
3351 | * task from using an unfair proportion of the | ||
3352 | * physical cpu's resources. -ck | ||
3353 | */ | ||
3354 | if (rt_task(smt_curr)) { | ||
3355 | /* | ||
3356 | * With real time tasks we run non-rt tasks only | ||
3357 | * per_cpu_gain% of the time. | ||
3358 | */ | ||
3359 | if ((jiffies % DEF_TIMESLICE) > | ||
3360 | (sd->per_cpu_gain * DEF_TIMESLICE / 100)) | ||
3361 | ret = 1; | ||
3362 | } else { | ||
3363 | if (smt_curr->static_prio < p->static_prio && | ||
3364 | !TASK_PREEMPTS_CURR(p, smt_rq) && | ||
3365 | smt_slice(smt_curr, sd) > task_timeslice(p)) | ||
3366 | ret = 1; | ||
3367 | } | ||
3368 | unlock: | ||
3369 | spin_unlock(&smt_rq->lock); | ||
3370 | } | ||
3371 | return ret; | ||
3372 | } | ||
3373 | #else | ||
3374 | static inline void wake_sleeping_dependent(int this_cpu) | ||
3375 | { | ||
3376 | } | ||
3377 | static inline int | ||
3378 | dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p) | ||
3379 | { | ||
3380 | return 0; | ||
3381 | } | ||
3382 | #endif | ||
3383 | |||
3384 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) | 3234 | #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT) |
3385 | 3235 | ||
3386 | void fastcall add_preempt_count(int val) | 3236 | void fastcall add_preempt_count(int val) |
@@ -3507,7 +3357,6 @@ need_resched_nonpreemptible: | |||
3507 | if (!rq->nr_running) { | 3357 | if (!rq->nr_running) { |
3508 | next = rq->idle; | 3358 | next = rq->idle; |
3509 | rq->expired_timestamp = 0; | 3359 | rq->expired_timestamp = 0; |
3510 | wake_sleeping_dependent(cpu); | ||
3511 | goto switch_tasks; | 3360 | goto switch_tasks; |
3512 | } | 3361 | } |
3513 | } | 3362 | } |
@@ -3547,8 +3396,6 @@ need_resched_nonpreemptible: | |||
3547 | } | 3396 | } |
3548 | } | 3397 | } |
3549 | next->sleep_type = SLEEP_NORMAL; | 3398 | next->sleep_type = SLEEP_NORMAL; |
3550 | if (rq->nr_running == 1 && dependent_sleeper(cpu, rq, next)) | ||
3551 | next = rq->idle; | ||
3552 | switch_tasks: | 3399 | switch_tasks: |
3553 | if (next == rq->idle) | 3400 | if (next == rq->idle) |
3554 | schedstat_inc(rq, sched_goidle); | 3401 | schedstat_inc(rq, sched_goidle); |