diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 00:00:02 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-18 00:00:02 -0400 |
commit | dcbf77b9e86e1726f5fbd01bb98820dac06d456e (patch) | |
tree | 2f0b728ce70c03e1d0e3461e8a3c3d1fbe68fb90 /kernel/sched.c | |
parent | ca043a66ae48c74fa628ec92178f7a54f5b9a106 (diff) | |
parent | 29cd8bae396583a2ee9a3340db8c5102acf9f6fd (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (37 commits)
sched: Fix SD_POWERSAVING_BALANCE|SD_PREFER_LOCAL vs SD_WAKE_AFFINE
sched: Stop buddies from hogging the system
sched: Add new wakeup preemption mode: WAKEUP_RUNNING
sched: Fix TASK_WAKING & loadaverage breakage
sched: Disable wakeup balancing
sched: Rename flags to wake_flags
sched: Clean up the load_idx selection in select_task_rq_fair
sched: Optimize cgroup vs wakeup a bit
sched: x86: Name old_perf in a unique way
sched: Implement a gentler fair-sleepers feature
sched: Add SD_PREFER_LOCAL
sched: Add a few SYNC hint knobs to play with
sched: Fix sync wakeups again
sched: Add WF_FORK
sched: Rename sync arguments
sched: Rename select_task_rq() argument
sched: Feature to disable APERF/MPERF cpu_power
x86: sched: Provide arch implementations using aperf/mperf
x86: Add generic aperf/mperf code
x86: Move APERF/MPERF into a X86_FEATURE
...
Fix up trivial conflict in arch/x86/include/asm/processor.h due to
nearby addition of amd_get_nb_id() declaration from the EDAC merge.
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 444 |
1 files changed, 143 insertions, 301 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d9db3fb17573..faf4d463bbff 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -119,8 +119,6 @@ | |||
119 | */ | 119 | */ |
120 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
121 | 121 | ||
122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
123 | |||
124 | static inline int rt_policy(int policy) | 122 | static inline int rt_policy(int policy) |
125 | { | 123 | { |
126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 124 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
@@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
378 | 376 | ||
379 | #else | 377 | #else |
380 | 378 | ||
381 | #ifdef CONFIG_SMP | ||
382 | static int root_task_group_empty(void) | ||
383 | { | ||
384 | return 1; | ||
385 | } | ||
386 | #endif | ||
387 | |||
388 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 379 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
389 | static inline struct task_group *task_group(struct task_struct *p) | 380 | static inline struct task_group *task_group(struct task_struct *p) |
390 | { | 381 | { |
@@ -514,14 +505,6 @@ struct root_domain { | |||
514 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
515 | struct cpupri cpupri; | 506 | struct cpupri cpupri; |
516 | #endif | 507 | #endif |
517 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
518 | /* | ||
519 | * Preferred wake up cpu nominated by sched_mc balance that will be | ||
520 | * used when most cpus are idle in the system indicating overall very | ||
521 | * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) | ||
522 | */ | ||
523 | unsigned int sched_mc_preferred_wakeup_cpu; | ||
524 | #endif | ||
525 | }; | 508 | }; |
526 | 509 | ||
527 | /* | 510 | /* |
@@ -646,9 +629,10 @@ struct rq { | |||
646 | 629 | ||
647 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 630 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
648 | 631 | ||
649 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) | 632 | static inline |
633 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
650 | { | 634 | { |
651 | rq->curr->sched_class->check_preempt_curr(rq, p, sync); | 635 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
652 | } | 636 | } |
653 | 637 | ||
654 | static inline int cpu_of(struct rq *rq) | 638 | static inline int cpu_of(struct rq *rq) |
@@ -1509,8 +1493,65 @@ static int tg_nop(struct task_group *tg, void *data) | |||
1509 | #endif | 1493 | #endif |
1510 | 1494 | ||
1511 | #ifdef CONFIG_SMP | 1495 | #ifdef CONFIG_SMP |
1512 | static unsigned long source_load(int cpu, int type); | 1496 | /* Used instead of source_load when we know the type == 0 */ |
1513 | static unsigned long target_load(int cpu, int type); | 1497 | static unsigned long weighted_cpuload(const int cpu) |
1498 | { | ||
1499 | return cpu_rq(cpu)->load.weight; | ||
1500 | } | ||
1501 | |||
1502 | /* | ||
1503 | * Return a low guess at the load of a migration-source cpu weighted | ||
1504 | * according to the scheduling class and "nice" value. | ||
1505 | * | ||
1506 | * We want to under-estimate the load of migration sources, to | ||
1507 | * balance conservatively. | ||
1508 | */ | ||
1509 | static unsigned long source_load(int cpu, int type) | ||
1510 | { | ||
1511 | struct rq *rq = cpu_rq(cpu); | ||
1512 | unsigned long total = weighted_cpuload(cpu); | ||
1513 | |||
1514 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1515 | return total; | ||
1516 | |||
1517 | return min(rq->cpu_load[type-1], total); | ||
1518 | } | ||
1519 | |||
1520 | /* | ||
1521 | * Return a high guess at the load of a migration-target cpu weighted | ||
1522 | * according to the scheduling class and "nice" value. | ||
1523 | */ | ||
1524 | static unsigned long target_load(int cpu, int type) | ||
1525 | { | ||
1526 | struct rq *rq = cpu_rq(cpu); | ||
1527 | unsigned long total = weighted_cpuload(cpu); | ||
1528 | |||
1529 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
1530 | return total; | ||
1531 | |||
1532 | return max(rq->cpu_load[type-1], total); | ||
1533 | } | ||
1534 | |||
1535 | static struct sched_group *group_of(int cpu) | ||
1536 | { | ||
1537 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
1538 | |||
1539 | if (!sd) | ||
1540 | return NULL; | ||
1541 | |||
1542 | return sd->groups; | ||
1543 | } | ||
1544 | |||
1545 | static unsigned long power_of(int cpu) | ||
1546 | { | ||
1547 | struct sched_group *group = group_of(cpu); | ||
1548 | |||
1549 | if (!group) | ||
1550 | return SCHED_LOAD_SCALE; | ||
1551 | |||
1552 | return group->cpu_power; | ||
1553 | } | ||
1554 | |||
1514 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1555 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
1515 | 1556 | ||
1516 | static unsigned long cpu_avg_load_per_task(int cpu) | 1557 | static unsigned long cpu_avg_load_per_task(int cpu) |
@@ -1695,6 +1736,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
1695 | 1736 | ||
1696 | #ifdef CONFIG_PREEMPT | 1737 | #ifdef CONFIG_PREEMPT |
1697 | 1738 | ||
1739 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
1740 | |||
1698 | /* | 1741 | /* |
1699 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | 1742 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
1700 | * way at the expense of forcing extra atomic operations in all | 1743 | * way at the expense of forcing extra atomic operations in all |
@@ -1959,13 +2002,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1959 | } | 2002 | } |
1960 | 2003 | ||
1961 | #ifdef CONFIG_SMP | 2004 | #ifdef CONFIG_SMP |
1962 | |||
1963 | /* Used instead of source_load when we know the type == 0 */ | ||
1964 | static unsigned long weighted_cpuload(const int cpu) | ||
1965 | { | ||
1966 | return cpu_rq(cpu)->load.weight; | ||
1967 | } | ||
1968 | |||
1969 | /* | 2005 | /* |
1970 | * Is this task likely cache-hot: | 2006 | * Is this task likely cache-hot: |
1971 | */ | 2007 | */ |
@@ -2239,185 +2275,6 @@ void kick_process(struct task_struct *p) | |||
2239 | preempt_enable(); | 2275 | preempt_enable(); |
2240 | } | 2276 | } |
2241 | EXPORT_SYMBOL_GPL(kick_process); | 2277 | EXPORT_SYMBOL_GPL(kick_process); |
2242 | |||
2243 | /* | ||
2244 | * Return a low guess at the load of a migration-source cpu weighted | ||
2245 | * according to the scheduling class and "nice" value. | ||
2246 | * | ||
2247 | * We want to under-estimate the load of migration sources, to | ||
2248 | * balance conservatively. | ||
2249 | */ | ||
2250 | static unsigned long source_load(int cpu, int type) | ||
2251 | { | ||
2252 | struct rq *rq = cpu_rq(cpu); | ||
2253 | unsigned long total = weighted_cpuload(cpu); | ||
2254 | |||
2255 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2256 | return total; | ||
2257 | |||
2258 | return min(rq->cpu_load[type-1], total); | ||
2259 | } | ||
2260 | |||
2261 | /* | ||
2262 | * Return a high guess at the load of a migration-target cpu weighted | ||
2263 | * according to the scheduling class and "nice" value. | ||
2264 | */ | ||
2265 | static unsigned long target_load(int cpu, int type) | ||
2266 | { | ||
2267 | struct rq *rq = cpu_rq(cpu); | ||
2268 | unsigned long total = weighted_cpuload(cpu); | ||
2269 | |||
2270 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
2271 | return total; | ||
2272 | |||
2273 | return max(rq->cpu_load[type-1], total); | ||
2274 | } | ||
2275 | |||
2276 | /* | ||
2277 | * find_idlest_group finds and returns the least busy CPU group within the | ||
2278 | * domain. | ||
2279 | */ | ||
2280 | static struct sched_group * | ||
2281 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
2282 | { | ||
2283 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
2284 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
2285 | int load_idx = sd->forkexec_idx; | ||
2286 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
2287 | |||
2288 | do { | ||
2289 | unsigned long load, avg_load; | ||
2290 | int local_group; | ||
2291 | int i; | ||
2292 | |||
2293 | /* Skip over this group if it has no CPUs allowed */ | ||
2294 | if (!cpumask_intersects(sched_group_cpus(group), | ||
2295 | &p->cpus_allowed)) | ||
2296 | continue; | ||
2297 | |||
2298 | local_group = cpumask_test_cpu(this_cpu, | ||
2299 | sched_group_cpus(group)); | ||
2300 | |||
2301 | /* Tally up the load of all CPUs in the group */ | ||
2302 | avg_load = 0; | ||
2303 | |||
2304 | for_each_cpu(i, sched_group_cpus(group)) { | ||
2305 | /* Bias balancing toward cpus of our domain */ | ||
2306 | if (local_group) | ||
2307 | load = source_load(i, load_idx); | ||
2308 | else | ||
2309 | load = target_load(i, load_idx); | ||
2310 | |||
2311 | avg_load += load; | ||
2312 | } | ||
2313 | |||
2314 | /* Adjust by relative CPU power of the group */ | ||
2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
2316 | |||
2317 | if (local_group) { | ||
2318 | this_load = avg_load; | ||
2319 | this = group; | ||
2320 | } else if (avg_load < min_load) { | ||
2321 | min_load = avg_load; | ||
2322 | idlest = group; | ||
2323 | } | ||
2324 | } while (group = group->next, group != sd->groups); | ||
2325 | |||
2326 | if (!idlest || 100*this_load < imbalance*min_load) | ||
2327 | return NULL; | ||
2328 | return idlest; | ||
2329 | } | ||
2330 | |||
2331 | /* | ||
2332 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
2333 | */ | ||
2334 | static int | ||
2335 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
2336 | { | ||
2337 | unsigned long load, min_load = ULONG_MAX; | ||
2338 | int idlest = -1; | ||
2339 | int i; | ||
2340 | |||
2341 | /* Traverse only the allowed CPUs */ | ||
2342 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
2343 | load = weighted_cpuload(i); | ||
2344 | |||
2345 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
2346 | min_load = load; | ||
2347 | idlest = i; | ||
2348 | } | ||
2349 | } | ||
2350 | |||
2351 | return idlest; | ||
2352 | } | ||
2353 | |||
2354 | /* | ||
2355 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
2356 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
2357 | * SD_BALANCE_EXEC. | ||
2358 | * | ||
2359 | * Balance, ie. select the least loaded group. | ||
2360 | * | ||
2361 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
2362 | * | ||
2363 | * preempt must be disabled. | ||
2364 | */ | ||
2365 | static int sched_balance_self(int cpu, int flag) | ||
2366 | { | ||
2367 | struct task_struct *t = current; | ||
2368 | struct sched_domain *tmp, *sd = NULL; | ||
2369 | |||
2370 | for_each_domain(cpu, tmp) { | ||
2371 | /* | ||
2372 | * If power savings logic is enabled for a domain, stop there. | ||
2373 | */ | ||
2374 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
2375 | break; | ||
2376 | if (tmp->flags & flag) | ||
2377 | sd = tmp; | ||
2378 | } | ||
2379 | |||
2380 | if (sd) | ||
2381 | update_shares(sd); | ||
2382 | |||
2383 | while (sd) { | ||
2384 | struct sched_group *group; | ||
2385 | int new_cpu, weight; | ||
2386 | |||
2387 | if (!(sd->flags & flag)) { | ||
2388 | sd = sd->child; | ||
2389 | continue; | ||
2390 | } | ||
2391 | |||
2392 | group = find_idlest_group(sd, t, cpu); | ||
2393 | if (!group) { | ||
2394 | sd = sd->child; | ||
2395 | continue; | ||
2396 | } | ||
2397 | |||
2398 | new_cpu = find_idlest_cpu(group, t, cpu); | ||
2399 | if (new_cpu == -1 || new_cpu == cpu) { | ||
2400 | /* Now try balancing at a lower domain level of cpu */ | ||
2401 | sd = sd->child; | ||
2402 | continue; | ||
2403 | } | ||
2404 | |||
2405 | /* Now try balancing at a lower domain level of new_cpu */ | ||
2406 | cpu = new_cpu; | ||
2407 | weight = cpumask_weight(sched_domain_span(sd)); | ||
2408 | sd = NULL; | ||
2409 | for_each_domain(cpu, tmp) { | ||
2410 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
2411 | break; | ||
2412 | if (tmp->flags & flag) | ||
2413 | sd = tmp; | ||
2414 | } | ||
2415 | /* while loop will break here if sd == NULL */ | ||
2416 | } | ||
2417 | |||
2418 | return cpu; | ||
2419 | } | ||
2420 | |||
2421 | #endif /* CONFIG_SMP */ | 2278 | #endif /* CONFIG_SMP */ |
2422 | 2279 | ||
2423 | /** | 2280 | /** |
@@ -2455,37 +2312,22 @@ void task_oncpu_function_call(struct task_struct *p, | |||
2455 | * | 2312 | * |
2456 | * returns failure only if the task is already active. | 2313 | * returns failure only if the task is already active. |
2457 | */ | 2314 | */ |
2458 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 2315 | static int try_to_wake_up(struct task_struct *p, unsigned int state, |
2316 | int wake_flags) | ||
2459 | { | 2317 | { |
2460 | int cpu, orig_cpu, this_cpu, success = 0; | 2318 | int cpu, orig_cpu, this_cpu, success = 0; |
2461 | unsigned long flags; | 2319 | unsigned long flags; |
2462 | long old_state; | ||
2463 | struct rq *rq; | 2320 | struct rq *rq; |
2464 | 2321 | ||
2465 | if (!sched_feat(SYNC_WAKEUPS)) | 2322 | if (!sched_feat(SYNC_WAKEUPS)) |
2466 | sync = 0; | 2323 | wake_flags &= ~WF_SYNC; |
2467 | |||
2468 | #ifdef CONFIG_SMP | ||
2469 | if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { | ||
2470 | struct sched_domain *sd; | ||
2471 | 2324 | ||
2472 | this_cpu = raw_smp_processor_id(); | 2325 | this_cpu = get_cpu(); |
2473 | cpu = task_cpu(p); | ||
2474 | |||
2475 | for_each_domain(this_cpu, sd) { | ||
2476 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
2477 | update_shares(sd); | ||
2478 | break; | ||
2479 | } | ||
2480 | } | ||
2481 | } | ||
2482 | #endif | ||
2483 | 2326 | ||
2484 | smp_wmb(); | 2327 | smp_wmb(); |
2485 | rq = task_rq_lock(p, &flags); | 2328 | rq = task_rq_lock(p, &flags); |
2486 | update_rq_clock(rq); | 2329 | update_rq_clock(rq); |
2487 | old_state = p->state; | 2330 | if (!(p->state & state)) |
2488 | if (!(old_state & state)) | ||
2489 | goto out; | 2331 | goto out; |
2490 | 2332 | ||
2491 | if (p->se.on_rq) | 2333 | if (p->se.on_rq) |
@@ -2493,27 +2335,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2493 | 2335 | ||
2494 | cpu = task_cpu(p); | 2336 | cpu = task_cpu(p); |
2495 | orig_cpu = cpu; | 2337 | orig_cpu = cpu; |
2496 | this_cpu = smp_processor_id(); | ||
2497 | 2338 | ||
2498 | #ifdef CONFIG_SMP | 2339 | #ifdef CONFIG_SMP |
2499 | if (unlikely(task_running(rq, p))) | 2340 | if (unlikely(task_running(rq, p))) |
2500 | goto out_activate; | 2341 | goto out_activate; |
2501 | 2342 | ||
2502 | cpu = p->sched_class->select_task_rq(p, sync); | 2343 | /* |
2503 | if (cpu != orig_cpu) { | 2344 | * In order to handle concurrent wakeups and release the rq->lock |
2345 | * we put the task in TASK_WAKING state. | ||
2346 | * | ||
2347 | * First fix up the nr_uninterruptible count: | ||
2348 | */ | ||
2349 | if (task_contributes_to_load(p)) | ||
2350 | rq->nr_uninterruptible--; | ||
2351 | p->state = TASK_WAKING; | ||
2352 | task_rq_unlock(rq, &flags); | ||
2353 | |||
2354 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
2355 | if (cpu != orig_cpu) | ||
2504 | set_task_cpu(p, cpu); | 2356 | set_task_cpu(p, cpu); |
2505 | task_rq_unlock(rq, &flags); | ||
2506 | /* might preempt at this point */ | ||
2507 | rq = task_rq_lock(p, &flags); | ||
2508 | old_state = p->state; | ||
2509 | if (!(old_state & state)) | ||
2510 | goto out; | ||
2511 | if (p->se.on_rq) | ||
2512 | goto out_running; | ||
2513 | 2357 | ||
2514 | this_cpu = smp_processor_id(); | 2358 | rq = task_rq_lock(p, &flags); |
2515 | cpu = task_cpu(p); | 2359 | WARN_ON(p->state != TASK_WAKING); |
2516 | } | 2360 | cpu = task_cpu(p); |
2517 | 2361 | ||
2518 | #ifdef CONFIG_SCHEDSTATS | 2362 | #ifdef CONFIG_SCHEDSTATS |
2519 | schedstat_inc(rq, ttwu_count); | 2363 | schedstat_inc(rq, ttwu_count); |
@@ -2533,7 +2377,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
2533 | out_activate: | 2377 | out_activate: |
2534 | #endif /* CONFIG_SMP */ | 2378 | #endif /* CONFIG_SMP */ |
2535 | schedstat_inc(p, se.nr_wakeups); | 2379 | schedstat_inc(p, se.nr_wakeups); |
2536 | if (sync) | 2380 | if (wake_flags & WF_SYNC) |
2537 | schedstat_inc(p, se.nr_wakeups_sync); | 2381 | schedstat_inc(p, se.nr_wakeups_sync); |
2538 | if (orig_cpu != cpu) | 2382 | if (orig_cpu != cpu) |
2539 | schedstat_inc(p, se.nr_wakeups_migrate); | 2383 | schedstat_inc(p, se.nr_wakeups_migrate); |
@@ -2562,7 +2406,7 @@ out_activate: | |||
2562 | 2406 | ||
2563 | out_running: | 2407 | out_running: |
2564 | trace_sched_wakeup(rq, p, success); | 2408 | trace_sched_wakeup(rq, p, success); |
2565 | check_preempt_curr(rq, p, sync); | 2409 | check_preempt_curr(rq, p, wake_flags); |
2566 | 2410 | ||
2567 | p->state = TASK_RUNNING; | 2411 | p->state = TASK_RUNNING; |
2568 | #ifdef CONFIG_SMP | 2412 | #ifdef CONFIG_SMP |
@@ -2571,6 +2415,7 @@ out_running: | |||
2571 | #endif | 2415 | #endif |
2572 | out: | 2416 | out: |
2573 | task_rq_unlock(rq, &flags); | 2417 | task_rq_unlock(rq, &flags); |
2418 | put_cpu(); | ||
2574 | 2419 | ||
2575 | return success; | 2420 | return success; |
2576 | } | 2421 | } |
@@ -2613,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) | |||
2613 | p->se.avg_overlap = 0; | 2458 | p->se.avg_overlap = 0; |
2614 | p->se.start_runtime = 0; | 2459 | p->se.start_runtime = 0; |
2615 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2461 | p->se.avg_running = 0; | ||
2616 | 2462 | ||
2617 | #ifdef CONFIG_SCHEDSTATS | 2463 | #ifdef CONFIG_SCHEDSTATS |
2618 | p->se.wait_start = 0; | 2464 | p->se.wait_start = 0; |
@@ -2674,11 +2520,6 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2674 | 2520 | ||
2675 | __sched_fork(p); | 2521 | __sched_fork(p); |
2676 | 2522 | ||
2677 | #ifdef CONFIG_SMP | ||
2678 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
2679 | #endif | ||
2680 | set_task_cpu(p, cpu); | ||
2681 | |||
2682 | /* | 2523 | /* |
2683 | * Make sure we do not leak PI boosting priority to the child. | 2524 | * Make sure we do not leak PI boosting priority to the child. |
2684 | */ | 2525 | */ |
@@ -2709,6 +2550,11 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
2709 | if (!rt_prio(p->prio)) | 2550 | if (!rt_prio(p->prio)) |
2710 | p->sched_class = &fair_sched_class; | 2551 | p->sched_class = &fair_sched_class; |
2711 | 2552 | ||
2553 | #ifdef CONFIG_SMP | ||
2554 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | ||
2555 | #endif | ||
2556 | set_task_cpu(p, cpu); | ||
2557 | |||
2712 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2558 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
2713 | if (likely(sched_info_on())) | 2559 | if (likely(sched_info_on())) |
2714 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2560 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
@@ -2754,7 +2600,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
2754 | inc_nr_running(rq); | 2600 | inc_nr_running(rq); |
2755 | } | 2601 | } |
2756 | trace_sched_wakeup_new(rq, p, 1); | 2602 | trace_sched_wakeup_new(rq, p, 1); |
2757 | check_preempt_curr(rq, p, 0); | 2603 | check_preempt_curr(rq, p, WF_FORK); |
2758 | #ifdef CONFIG_SMP | 2604 | #ifdef CONFIG_SMP |
2759 | if (p->sched_class->task_wake_up) | 2605 | if (p->sched_class->task_wake_up) |
2760 | p->sched_class->task_wake_up(rq, p); | 2606 | p->sched_class->task_wake_up(rq, p); |
@@ -3263,7 +3109,7 @@ out: | |||
3263 | void sched_exec(void) | 3109 | void sched_exec(void) |
3264 | { | 3110 | { |
3265 | int new_cpu, this_cpu = get_cpu(); | 3111 | int new_cpu, this_cpu = get_cpu(); |
3266 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | 3112 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); |
3267 | put_cpu(); | 3113 | put_cpu(); |
3268 | if (new_cpu != this_cpu) | 3114 | if (new_cpu != this_cpu) |
3269 | sched_migrate_task(current, new_cpu); | 3115 | sched_migrate_task(current, new_cpu); |
@@ -3683,11 +3529,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3683 | *imbalance = sds->min_load_per_task; | 3529 | *imbalance = sds->min_load_per_task; |
3684 | sds->busiest = sds->group_min; | 3530 | sds->busiest = sds->group_min; |
3685 | 3531 | ||
3686 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | ||
3687 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | ||
3688 | group_first_cpu(sds->group_leader); | ||
3689 | } | ||
3690 | |||
3691 | return 1; | 3532 | return 1; |
3692 | 3533 | ||
3693 | } | 3534 | } |
@@ -3711,7 +3552,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
3711 | } | 3552 | } |
3712 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3553 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
3713 | 3554 | ||
3714 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | 3555 | |
3556 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3557 | { | ||
3558 | return SCHED_LOAD_SCALE; | ||
3559 | } | ||
3560 | |||
3561 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
3562 | { | ||
3563 | return default_scale_freq_power(sd, cpu); | ||
3564 | } | ||
3565 | |||
3566 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3715 | { | 3567 | { |
3716 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3568 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
3717 | unsigned long smt_gain = sd->smt_gain; | 3569 | unsigned long smt_gain = sd->smt_gain; |
@@ -3721,6 +3573,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | |||
3721 | return smt_gain; | 3573 | return smt_gain; |
3722 | } | 3574 | } |
3723 | 3575 | ||
3576 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
3577 | { | ||
3578 | return default_scale_smt_power(sd, cpu); | ||
3579 | } | ||
3580 | |||
3724 | unsigned long scale_rt_power(int cpu) | 3581 | unsigned long scale_rt_power(int cpu) |
3725 | { | 3582 | { |
3726 | struct rq *rq = cpu_rq(cpu); | 3583 | struct rq *rq = cpu_rq(cpu); |
@@ -3745,10 +3602,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
3745 | unsigned long power = SCHED_LOAD_SCALE; | 3602 | unsigned long power = SCHED_LOAD_SCALE; |
3746 | struct sched_group *sdg = sd->groups; | 3603 | struct sched_group *sdg = sd->groups; |
3747 | 3604 | ||
3748 | /* here we could scale based on cpufreq */ | 3605 | if (sched_feat(ARCH_POWER)) |
3606 | power *= arch_scale_freq_power(sd, cpu); | ||
3607 | else | ||
3608 | power *= default_scale_freq_power(sd, cpu); | ||
3609 | |||
3610 | power >>= SCHED_LOAD_SHIFT; | ||
3749 | 3611 | ||
3750 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3612 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
3751 | power *= arch_scale_smt_power(sd, cpu); | 3613 | if (sched_feat(ARCH_POWER)) |
3614 | power *= arch_scale_smt_power(sd, cpu); | ||
3615 | else | ||
3616 | power *= default_scale_smt_power(sd, cpu); | ||
3617 | |||
3752 | power >>= SCHED_LOAD_SHIFT; | 3618 | power >>= SCHED_LOAD_SHIFT; |
3753 | } | 3619 | } |
3754 | 3620 | ||
@@ -4161,26 +4027,6 @@ ret: | |||
4161 | return NULL; | 4027 | return NULL; |
4162 | } | 4028 | } |
4163 | 4029 | ||
4164 | static struct sched_group *group_of(int cpu) | ||
4165 | { | ||
4166 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
4167 | |||
4168 | if (!sd) | ||
4169 | return NULL; | ||
4170 | |||
4171 | return sd->groups; | ||
4172 | } | ||
4173 | |||
4174 | static unsigned long power_of(int cpu) | ||
4175 | { | ||
4176 | struct sched_group *group = group_of(cpu); | ||
4177 | |||
4178 | if (!group) | ||
4179 | return SCHED_LOAD_SCALE; | ||
4180 | |||
4181 | return group->cpu_power; | ||
4182 | } | ||
4183 | |||
4184 | /* | 4030 | /* |
4185 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4031 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
4186 | */ | 4032 | */ |
@@ -5465,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5465 | #endif | 5311 | #endif |
5466 | } | 5312 | } |
5467 | 5313 | ||
5468 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5314 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
5469 | { | 5315 | { |
5470 | if (prev->state == TASK_RUNNING) { | 5316 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
5471 | u64 runtime = prev->se.sum_exec_runtime; | ||
5472 | 5317 | ||
5473 | runtime -= prev->se.prev_sum_exec_runtime; | 5318 | update_avg(&p->se.avg_running, runtime); |
5474 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5475 | 5319 | ||
5320 | if (p->state == TASK_RUNNING) { | ||
5476 | /* | 5321 | /* |
5477 | * In order to avoid avg_overlap growing stale when we are | 5322 | * In order to avoid avg_overlap growing stale when we are |
5478 | * indeed overlapping and hence not getting put to sleep, grow | 5323 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5482,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
5482 | * correlates to the amount of cache footprint a task can | 5327 | * correlates to the amount of cache footprint a task can |
5483 | * build up. | 5328 | * build up. |
5484 | */ | 5329 | */ |
5485 | update_avg(&prev->se.avg_overlap, runtime); | 5330 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
5331 | update_avg(&p->se.avg_overlap, runtime); | ||
5332 | } else { | ||
5333 | update_avg(&p->se.avg_running, 0); | ||
5486 | } | 5334 | } |
5487 | prev->sched_class->put_prev_task(rq, prev); | 5335 | p->sched_class->put_prev_task(rq, p); |
5488 | } | 5336 | } |
5489 | 5337 | ||
5490 | /* | 5338 | /* |
@@ -5716,10 +5564,10 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
5716 | 5564 | ||
5717 | #endif /* CONFIG_PREEMPT */ | 5565 | #endif /* CONFIG_PREEMPT */ |
5718 | 5566 | ||
5719 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | 5567 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
5720 | void *key) | 5568 | void *key) |
5721 | { | 5569 | { |
5722 | return try_to_wake_up(curr->private, mode, sync); | 5570 | return try_to_wake_up(curr->private, mode, wake_flags); |
5723 | } | 5571 | } |
5724 | EXPORT_SYMBOL(default_wake_function); | 5572 | EXPORT_SYMBOL(default_wake_function); |
5725 | 5573 | ||
@@ -5733,14 +5581,14 @@ EXPORT_SYMBOL(default_wake_function); | |||
5733 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5581 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5734 | */ | 5582 | */ |
5735 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5583 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5736 | int nr_exclusive, int sync, void *key) | 5584 | int nr_exclusive, int wake_flags, void *key) |
5737 | { | 5585 | { |
5738 | wait_queue_t *curr, *next; | 5586 | wait_queue_t *curr, *next; |
5739 | 5587 | ||
5740 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { | 5588 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
5741 | unsigned flags = curr->flags; | 5589 | unsigned flags = curr->flags; |
5742 | 5590 | ||
5743 | if (curr->func(curr, mode, sync, key) && | 5591 | if (curr->func(curr, mode, wake_flags, key) && |
5744 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) | 5592 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
5745 | break; | 5593 | break; |
5746 | } | 5594 | } |
@@ -5801,16 +5649,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | |||
5801 | int nr_exclusive, void *key) | 5649 | int nr_exclusive, void *key) |
5802 | { | 5650 | { |
5803 | unsigned long flags; | 5651 | unsigned long flags; |
5804 | int sync = 1; | 5652 | int wake_flags = WF_SYNC; |
5805 | 5653 | ||
5806 | if (unlikely(!q)) | 5654 | if (unlikely(!q)) |
5807 | return; | 5655 | return; |
5808 | 5656 | ||
5809 | if (unlikely(!nr_exclusive)) | 5657 | if (unlikely(!nr_exclusive)) |
5810 | sync = 0; | 5658 | wake_flags = 0; |
5811 | 5659 | ||
5812 | spin_lock_irqsave(&q->lock, flags); | 5660 | spin_lock_irqsave(&q->lock, flags); |
5813 | __wake_up_common(q, mode, nr_exclusive, sync, key); | 5661 | __wake_up_common(q, mode, nr_exclusive, wake_flags, key); |
5814 | spin_unlock_irqrestore(&q->lock, flags); | 5662 | spin_unlock_irqrestore(&q->lock, flags); |
5815 | } | 5663 | } |
5816 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); | 5664 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); |
@@ -8000,9 +7848,7 @@ static int sd_degenerate(struct sched_domain *sd) | |||
8000 | } | 7848 | } |
8001 | 7849 | ||
8002 | /* Following flags don't use groups */ | 7850 | /* Following flags don't use groups */ |
8003 | if (sd->flags & (SD_WAKE_IDLE | | 7851 | if (sd->flags & (SD_WAKE_AFFINE)) |
8004 | SD_WAKE_AFFINE | | ||
8005 | SD_WAKE_BALANCE)) | ||
8006 | return 0; | 7852 | return 0; |
8007 | 7853 | ||
8008 | return 1; | 7854 | return 1; |
@@ -8019,10 +7865,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
8019 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) | 7865 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) |
8020 | return 0; | 7866 | return 0; |
8021 | 7867 | ||
8022 | /* Does parent contain flags not in child? */ | ||
8023 | /* WAKE_BALANCE is a subset of WAKE_AFFINE */ | ||
8024 | if (cflags & SD_WAKE_AFFINE) | ||
8025 | pflags &= ~SD_WAKE_BALANCE; | ||
8026 | /* Flags needing groups don't count if only 1 group in parent */ | 7868 | /* Flags needing groups don't count if only 1 group in parent */ |
8027 | if (parent->groups == parent->groups->next) { | 7869 | if (parent->groups == parent->groups->next) { |
8028 | pflags &= ~(SD_LOAD_BALANCE | | 7870 | pflags &= ~(SD_LOAD_BALANCE | |
@@ -8708,10 +8550,10 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
8708 | request = attr->relax_domain_level; | 8550 | request = attr->relax_domain_level; |
8709 | if (request < sd->level) { | 8551 | if (request < sd->level) { |
8710 | /* turn off idle balance on this domain */ | 8552 | /* turn off idle balance on this domain */ |
8711 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | 8553 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8712 | } else { | 8554 | } else { |
8713 | /* turn on idle balance on this domain */ | 8555 | /* turn on idle balance on this domain */ |
8714 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | 8556 | sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
8715 | } | 8557 | } |
8716 | } | 8558 | } |
8717 | 8559 | ||