diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 02:55:11 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-14 02:55:11 -0500 |
commit | fe8a45df368038566c62bf311accf4319b210123 (patch) | |
tree | c39ab7df07ccf71eaeed742d3a575c269f992cbc /kernel | |
parent | 5e30025a319910695f5010dc0fb53a23299da14d (diff) | |
parent | 85b088e934b9943322bfe37077289ae60f1b3414 (diff) |
Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar:
"Four bugfixes and one performance fix"
* 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
sched/fair: Avoid integer overflow
sched: Optimize task_sched_runtime()
sched/numa: Cure update_numa_stats() vs. hotplug
sched/numa: Fix NULL pointer dereference in task_numa_migrate()
sched: Fix endless sync_sched/rcu() loop inside _cpu_down()
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 14 | ||||
-rw-r--r-- | kernel/sched/fair.c | 31 |
3 files changed, 46 insertions, 4 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 973d034acf84..deff2e693766 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -306,7 +306,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
306 | __func__, cpu); | 306 | __func__, cpu); |
307 | goto out_release; | 307 | goto out_release; |
308 | } | 308 | } |
309 | smpboot_park_threads(cpu); | ||
310 | 309 | ||
311 | /* | 310 | /* |
312 | * By now we've cleared cpu_active_mask, wait for all preempt-disabled | 311 | * By now we've cleared cpu_active_mask, wait for all preempt-disabled |
@@ -315,12 +314,16 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) | |||
315 | * | 314 | * |
316 | * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might | 315 | * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might |
317 | * not imply sync_sched(), so explicitly call both. | 316 | * not imply sync_sched(), so explicitly call both. |
317 | * | ||
318 | * Do sync before park smpboot threads to take care the rcu boost case. | ||
318 | */ | 319 | */ |
319 | #ifdef CONFIG_PREEMPT | 320 | #ifdef CONFIG_PREEMPT |
320 | synchronize_sched(); | 321 | synchronize_sched(); |
321 | #endif | 322 | #endif |
322 | synchronize_rcu(); | 323 | synchronize_rcu(); |
323 | 324 | ||
325 | smpboot_park_threads(cpu); | ||
326 | |||
324 | /* | 327 | /* |
325 | * So now all preempt/rcu users must observe !cpu_active(). | 328 | * So now all preempt/rcu users must observe !cpu_active(). |
326 | */ | 329 | */ |
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1deccd78be98..c1808606ee5f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
@@ -2253,6 +2253,20 @@ unsigned long long task_sched_runtime(struct task_struct *p) | |||
2253 | struct rq *rq; | 2253 | struct rq *rq; |
2254 | u64 ns = 0; | 2254 | u64 ns = 0; |
2255 | 2255 | ||
2256 | #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) | ||
2257 | /* | ||
2258 | * 64-bit doesn't need locks to atomically read a 64bit value. | ||
2259 | * So we have a optimization chance when the task's delta_exec is 0. | ||
2260 | * Reading ->on_cpu is racy, but this is ok. | ||
2261 | * | ||
2262 | * If we race with it leaving cpu, we'll take a lock. So we're correct. | ||
2263 | * If we race with it entering cpu, unaccounted time is 0. This is | ||
2264 | * indistinguishable from the read occurring a few cycles earlier. | ||
2265 | */ | ||
2266 | if (!p->on_cpu) | ||
2267 | return p->se.sum_exec_runtime; | ||
2268 | #endif | ||
2269 | |||
2256 | rq = task_rq_lock(p, &flags); | 2270 | rq = task_rq_lock(p, &flags); |
2257 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); | 2271 | ns = p->se.sum_exec_runtime + do_task_delta_exec(p, rq); |
2258 | task_rq_unlock(rq, p, &flags); | 2272 | task_rq_unlock(rq, p, &flags); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index df77c605c7a6..e8b652ebe027 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -1000,7 +1000,7 @@ struct numa_stats { | |||
1000 | */ | 1000 | */ |
1001 | static void update_numa_stats(struct numa_stats *ns, int nid) | 1001 | static void update_numa_stats(struct numa_stats *ns, int nid) |
1002 | { | 1002 | { |
1003 | int cpu; | 1003 | int cpu, cpus = 0; |
1004 | 1004 | ||
1005 | memset(ns, 0, sizeof(*ns)); | 1005 | memset(ns, 0, sizeof(*ns)); |
1006 | for_each_cpu(cpu, cpumask_of_node(nid)) { | 1006 | for_each_cpu(cpu, cpumask_of_node(nid)) { |
@@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid) | |||
1009 | ns->nr_running += rq->nr_running; | 1009 | ns->nr_running += rq->nr_running; |
1010 | ns->load += weighted_cpuload(cpu); | 1010 | ns->load += weighted_cpuload(cpu); |
1011 | ns->power += power_of(cpu); | 1011 | ns->power += power_of(cpu); |
1012 | |||
1013 | cpus++; | ||
1012 | } | 1014 | } |
1013 | 1015 | ||
1016 | /* | ||
1017 | * If we raced with hotplug and there are no CPUs left in our mask | ||
1018 | * the @ns structure is NULL'ed and task_numa_compare() will | ||
1019 | * not find this node attractive. | ||
1020 | * | ||
1021 | * We'll either bail at !has_capacity, or we'll detect a huge imbalance | ||
1022 | * and bail there. | ||
1023 | */ | ||
1024 | if (!cpus) | ||
1025 | return; | ||
1026 | |||
1014 | ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power; | 1027 | ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power; |
1015 | ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE); | 1028 | ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE); |
1016 | ns->has_capacity = (ns->nr_running < ns->capacity); | 1029 | ns->has_capacity = (ns->nr_running < ns->capacity); |
@@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p) | |||
1201 | */ | 1214 | */ |
1202 | rcu_read_lock(); | 1215 | rcu_read_lock(); |
1203 | sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu)); | 1216 | sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu)); |
1204 | env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2; | 1217 | if (sd) |
1218 | env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2; | ||
1205 | rcu_read_unlock(); | 1219 | rcu_read_unlock(); |
1206 | 1220 | ||
1221 | /* | ||
1222 | * Cpusets can break the scheduler domain tree into smaller | ||
1223 | * balance domains, some of which do not cross NUMA boundaries. | ||
1224 | * Tasks that are "trapped" in such domains cannot be migrated | ||
1225 | * elsewhere, so there is no point in (re)trying. | ||
1226 | */ | ||
1227 | if (unlikely(!sd)) { | ||
1228 | p->numa_preferred_nid = cpu_to_node(task_cpu(p)); | ||
1229 | return -EINVAL; | ||
1230 | } | ||
1231 | |||
1207 | taskweight = task_weight(p, env.src_nid); | 1232 | taskweight = task_weight(p, env.src_nid); |
1208 | groupweight = group_weight(p, env.src_nid); | 1233 | groupweight = group_weight(p, env.src_nid); |
1209 | update_numa_stats(&env.src_stats, env.src_nid); | 1234 | update_numa_stats(&env.src_stats, env.src_nid); |
@@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa, | |||
2153 | long contrib; | 2178 | long contrib; |
2154 | 2179 | ||
2155 | /* The fraction of a cpu used by this cfs_rq */ | 2180 | /* The fraction of a cpu used by this cfs_rq */ |
2156 | contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT, | 2181 | contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT, |
2157 | sa->runnable_avg_period + 1); | 2182 | sa->runnable_avg_period + 1); |
2158 | contrib -= cfs_rq->tg_runnable_contrib; | 2183 | contrib -= cfs_rq->tg_runnable_contrib; |
2159 | 2184 | ||