diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:23:41 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-02-28 13:23:41 -0500 |
| commit | 2531216f236cb2a1f39ffa12a4a9339541e52191 (patch) | |
| tree | 74ca94a99a00ebca463ca67458b8099728cece15 | |
| parent | 6556a6743549defc32e5f90ee2cb1ecd833a44c3 (diff) | |
| parent | 0970d2992dfd7d5ec2c787417cf464f01eeaf42a (diff) | |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: Fix race between ttwu() and task_rq_lock()
sched: Fix SMT scheduler regression in find_busiest_queue()
sched: Fix sched_mv_power_savings for !SMT
kernel/sched.c: Suppress unused var warning
| -rw-r--r-- | include/linux/sched.h | 5 | ||||
| -rw-r--r-- | kernel/sched.c | 86 |
2 files changed, 62 insertions, 29 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 78efe7c485ac..1f5fa53b46b1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
| @@ -878,7 +878,10 @@ static inline int sd_balance_for_mc_power(void) | |||
| 878 | if (sched_smt_power_savings) | 878 | if (sched_smt_power_savings) |
| 879 | return SD_POWERSAVINGS_BALANCE; | 879 | return SD_POWERSAVINGS_BALANCE; |
| 880 | 880 | ||
| 881 | return SD_PREFER_SIBLING; | 881 | if (!sched_mc_power_savings) |
| 882 | return SD_PREFER_SIBLING; | ||
| 883 | |||
| 884 | return 0; | ||
| 882 | } | 885 | } |
| 883 | 886 | ||
| 884 | static inline int sd_balance_for_package_power(void) | 887 | static inline int sd_balance_for_package_power(void) |
diff --git a/kernel/sched.c b/kernel/sched.c index 9d163f83e5c3..caf54e1eef6e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -946,16 +946,33 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev) | |||
| 946 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ | 946 | #endif /* __ARCH_WANT_UNLOCKED_CTXSW */ |
| 947 | 947 | ||
| 948 | /* | 948 | /* |
| 949 | * Check whether the task is waking, we use this to synchronize against | ||
| 950 | * ttwu() so that task_cpu() reports a stable number. | ||
| 951 | * | ||
| 952 | * We need to make an exception for PF_STARTING tasks because the fork | ||
| 953 | * path might require task_rq_lock() to work, eg. it can call | ||
| 954 | * set_cpus_allowed_ptr() from the cpuset clone_ns code. | ||
| 955 | */ | ||
| 956 | static inline int task_is_waking(struct task_struct *p) | ||
| 957 | { | ||
| 958 | return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING)); | ||
| 959 | } | ||
| 960 | |||
| 961 | /* | ||
| 949 | * __task_rq_lock - lock the runqueue a given task resides on. | 962 | * __task_rq_lock - lock the runqueue a given task resides on. |
| 950 | * Must be called interrupts disabled. | 963 | * Must be called interrupts disabled. |
| 951 | */ | 964 | */ |
| 952 | static inline struct rq *__task_rq_lock(struct task_struct *p) | 965 | static inline struct rq *__task_rq_lock(struct task_struct *p) |
| 953 | __acquires(rq->lock) | 966 | __acquires(rq->lock) |
| 954 | { | 967 | { |
| 968 | struct rq *rq; | ||
| 969 | |||
| 955 | for (;;) { | 970 | for (;;) { |
| 956 | struct rq *rq = task_rq(p); | 971 | while (task_is_waking(p)) |
| 972 | cpu_relax(); | ||
| 973 | rq = task_rq(p); | ||
| 957 | raw_spin_lock(&rq->lock); | 974 | raw_spin_lock(&rq->lock); |
| 958 | if (likely(rq == task_rq(p))) | 975 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
| 959 | return rq; | 976 | return rq; |
| 960 | raw_spin_unlock(&rq->lock); | 977 | raw_spin_unlock(&rq->lock); |
| 961 | } | 978 | } |
| @@ -972,10 +989,12 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) | |||
| 972 | struct rq *rq; | 989 | struct rq *rq; |
| 973 | 990 | ||
| 974 | for (;;) { | 991 | for (;;) { |
| 992 | while (task_is_waking(p)) | ||
| 993 | cpu_relax(); | ||
| 975 | local_irq_save(*flags); | 994 | local_irq_save(*flags); |
| 976 | rq = task_rq(p); | 995 | rq = task_rq(p); |
| 977 | raw_spin_lock(&rq->lock); | 996 | raw_spin_lock(&rq->lock); |
| 978 | if (likely(rq == task_rq(p))) | 997 | if (likely(rq == task_rq(p) && !task_is_waking(p))) |
| 979 | return rq; | 998 | return rq; |
| 980 | raw_spin_unlock_irqrestore(&rq->lock, *flags); | 999 | raw_spin_unlock_irqrestore(&rq->lock, *flags); |
| 981 | } | 1000 | } |
| @@ -2413,14 +2432,27 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, | |||
| 2413 | __task_rq_unlock(rq); | 2432 | __task_rq_unlock(rq); |
| 2414 | 2433 | ||
| 2415 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2434 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
| 2416 | if (cpu != orig_cpu) | 2435 | if (cpu != orig_cpu) { |
| 2436 | /* | ||
| 2437 | * Since we migrate the task without holding any rq->lock, | ||
| 2438 | * we need to be careful with task_rq_lock(), since that | ||
| 2439 | * might end up locking an invalid rq. | ||
| 2440 | */ | ||
| 2417 | set_task_cpu(p, cpu); | 2441 | set_task_cpu(p, cpu); |
| 2442 | } | ||
| 2418 | 2443 | ||
| 2419 | rq = __task_rq_lock(p); | 2444 | rq = cpu_rq(cpu); |
| 2445 | raw_spin_lock(&rq->lock); | ||
| 2420 | update_rq_clock(rq); | 2446 | update_rq_clock(rq); |
| 2421 | 2447 | ||
| 2448 | /* | ||
| 2449 | * We migrated the task without holding either rq->lock, however | ||
| 2450 | * since the task is not on the task list itself, nobody else | ||
| 2451 | * will try and migrate the task, hence the rq should match the | ||
| 2452 | * cpu we just moved it to. | ||
| 2453 | */ | ||
| 2454 | WARN_ON(task_cpu(p) != cpu); | ||
| 2422 | WARN_ON(p->state != TASK_WAKING); | 2455 | WARN_ON(p->state != TASK_WAKING); |
| 2423 | cpu = task_cpu(p); | ||
| 2424 | 2456 | ||
| 2425 | #ifdef CONFIG_SCHEDSTATS | 2457 | #ifdef CONFIG_SCHEDSTATS |
| 2426 | schedstat_inc(rq, ttwu_count); | 2458 | schedstat_inc(rq, ttwu_count); |
| @@ -2668,7 +2700,13 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2668 | set_task_cpu(p, cpu); | 2700 | set_task_cpu(p, cpu); |
| 2669 | #endif | 2701 | #endif |
| 2670 | 2702 | ||
| 2671 | rq = task_rq_lock(p, &flags); | 2703 | /* |
| 2704 | * Since the task is not on the rq and we still have TASK_WAKING set | ||
| 2705 | * nobody else will migrate this task. | ||
| 2706 | */ | ||
| 2707 | rq = cpu_rq(cpu); | ||
| 2708 | raw_spin_lock_irqsave(&rq->lock, flags); | ||
| 2709 | |||
| 2672 | BUG_ON(p->state != TASK_WAKING); | 2710 | BUG_ON(p->state != TASK_WAKING); |
| 2673 | p->state = TASK_RUNNING; | 2711 | p->state = TASK_RUNNING; |
| 2674 | update_rq_clock(rq); | 2712 | update_rq_clock(rq); |
| @@ -4130,12 +4168,23 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
| 4130 | continue; | 4168 | continue; |
| 4131 | 4169 | ||
| 4132 | rq = cpu_rq(i); | 4170 | rq = cpu_rq(i); |
| 4133 | wl = weighted_cpuload(i) * SCHED_LOAD_SCALE; | 4171 | wl = weighted_cpuload(i); |
| 4134 | wl /= power; | ||
| 4135 | 4172 | ||
| 4173 | /* | ||
| 4174 | * When comparing with imbalance, use weighted_cpuload() | ||
| 4175 | * which is not scaled with the cpu power. | ||
| 4176 | */ | ||
| 4136 | if (capacity && rq->nr_running == 1 && wl > imbalance) | 4177 | if (capacity && rq->nr_running == 1 && wl > imbalance) |
| 4137 | continue; | 4178 | continue; |
| 4138 | 4179 | ||
| 4180 | /* | ||
| 4181 | * For the load comparisons with the other cpu's, consider | ||
| 4182 | * the weighted_cpuload() scaled with the cpu power, so that | ||
| 4183 | * the load can be moved away from the cpu that is potentially | ||
| 4184 | * running at a lower capacity. | ||
| 4185 | */ | ||
| 4186 | wl = (wl * SCHED_LOAD_SCALE) / power; | ||
| 4187 | |||
| 4139 | if (wl > max_load) { | 4188 | if (wl > max_load) { |
| 4140 | max_load = wl; | 4189 | max_load = wl; |
| 4141 | busiest = rq; | 4190 | busiest = rq; |
| @@ -7156,27 +7205,8 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask) | |||
| 7156 | struct rq *rq; | 7205 | struct rq *rq; |
| 7157 | int ret = 0; | 7206 | int ret = 0; |
| 7158 | 7207 | ||
| 7159 | /* | ||
| 7160 | * Since we rely on wake-ups to migrate sleeping tasks, don't change | ||
| 7161 | * the ->cpus_allowed mask from under waking tasks, which would be | ||
| 7162 | * possible when we change rq->lock in ttwu(), so synchronize against | ||
| 7163 | * TASK_WAKING to avoid that. | ||
| 7164 | * | ||
| 7165 | * Make an exception for freshly cloned tasks, since cpuset namespaces | ||
| 7166 | * might move the task about, we have to validate the target in | ||
| 7167 | * wake_up_new_task() anyway since the cpu might have gone away. | ||
| 7168 | */ | ||
| 7169 | again: | ||
| 7170 | while (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) | ||
| 7171 | cpu_relax(); | ||
| 7172 | |||
| 7173 | rq = task_rq_lock(p, &flags); | 7208 | rq = task_rq_lock(p, &flags); |
| 7174 | 7209 | ||
| 7175 | if (p->state == TASK_WAKING && !(p->flags & PF_STARTING)) { | ||
| 7176 | task_rq_unlock(rq, &flags); | ||
| 7177 | goto again; | ||
| 7178 | } | ||
| 7179 | |||
| 7180 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { | 7210 | if (!cpumask_intersects(new_mask, cpu_active_mask)) { |
| 7181 | ret = -EINVAL; | 7211 | ret = -EINVAL; |
| 7182 | goto out; | 7212 | goto out; |
