diff options
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 570 |
1 files changed, 215 insertions, 355 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d9db3fb17573..76c0e9691fc0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -39,7 +39,7 @@ | |||
| 39 | #include <linux/completion.h> | 39 | #include <linux/completion.h> |
| 40 | #include <linux/kernel_stat.h> | 40 | #include <linux/kernel_stat.h> |
| 41 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
| 42 | #include <linux/perf_counter.h> | 42 | #include <linux/perf_event.h> |
| 43 | #include <linux/security.h> | 43 | #include <linux/security.h> |
| 44 | #include <linux/notifier.h> | 44 | #include <linux/notifier.h> |
| 45 | #include <linux/profile.h> | 45 | #include <linux/profile.h> |
| @@ -119,8 +119,6 @@ | |||
| 119 | */ | 119 | */ |
| 120 | #define RUNTIME_INF ((u64)~0ULL) | 120 | #define RUNTIME_INF ((u64)~0ULL) |
| 121 | 121 | ||
| 122 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 123 | |||
| 124 | static inline int rt_policy(int policy) | 122 | static inline int rt_policy(int policy) |
| 125 | { | 123 | { |
| 126 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 124 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) |
| @@ -378,13 +376,6 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) | |||
| 378 | 376 | ||
| 379 | #else | 377 | #else |
| 380 | 378 | ||
| 381 | #ifdef CONFIG_SMP | ||
| 382 | static int root_task_group_empty(void) | ||
| 383 | { | ||
| 384 | return 1; | ||
| 385 | } | ||
| 386 | #endif | ||
| 387 | |||
| 388 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } | 379 | static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { } |
| 389 | static inline struct task_group *task_group(struct task_struct *p) | 380 | static inline struct task_group *task_group(struct task_struct *p) |
| 390 | { | 381 | { |
| @@ -514,14 +505,6 @@ struct root_domain { | |||
| 514 | #ifdef CONFIG_SMP | 505 | #ifdef CONFIG_SMP |
| 515 | struct cpupri cpupri; | 506 | struct cpupri cpupri; |
| 516 | #endif | 507 | #endif |
| 517 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
| 518 | /* | ||
| 519 | * Preferred wake up cpu nominated by sched_mc balance that will be | ||
| 520 | * used when most cpus are idle in the system indicating overall very | ||
| 521 | * low system utilisation. Triggered at POWERSAVINGS_BALANCE_WAKEUP(2) | ||
| 522 | */ | ||
| 523 | unsigned int sched_mc_preferred_wakeup_cpu; | ||
| 524 | #endif | ||
| 525 | }; | 508 | }; |
| 526 | 509 | ||
| 527 | /* | 510 | /* |
| @@ -646,9 +629,10 @@ struct rq { | |||
| 646 | 629 | ||
| 647 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 630 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
| 648 | 631 | ||
| 649 | static inline void check_preempt_curr(struct rq *rq, struct task_struct *p, int sync) | 632 | static inline |
| 633 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
| 650 | { | 634 | { |
| 651 | rq->curr->sched_class->check_preempt_curr(rq, p, sync); | 635 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); |
| 652 | } | 636 | } |
| 653 | 637 | ||
| 654 | static inline int cpu_of(struct rq *rq) | 638 | static inline int cpu_of(struct rq *rq) |
| @@ -697,15 +681,9 @@ inline void update_rq_clock(struct rq *rq) | |||
| 697 | * This interface allows printk to be called with the runqueue lock | 681 | * This interface allows printk to be called with the runqueue lock |
| 698 | * held and know whether or not it is OK to wake up the klogd. | 682 | * held and know whether or not it is OK to wake up the klogd. |
| 699 | */ | 683 | */ |
| 700 | int runqueue_is_locked(void) | 684 | int runqueue_is_locked(int cpu) |
| 701 | { | 685 | { |
| 702 | int cpu = get_cpu(); | 686 | return spin_is_locked(&cpu_rq(cpu)->lock); |
| 703 | struct rq *rq = cpu_rq(cpu); | ||
| 704 | int ret; | ||
| 705 | |||
| 706 | ret = spin_is_locked(&rq->lock); | ||
| 707 | put_cpu(); | ||
| 708 | return ret; | ||
| 709 | } | 687 | } |
| 710 | 688 | ||
| 711 | /* | 689 | /* |
| @@ -802,7 +780,7 @@ static int sched_feat_open(struct inode *inode, struct file *filp) | |||
| 802 | return single_open(filp, sched_feat_show, NULL); | 780 | return single_open(filp, sched_feat_show, NULL); |
| 803 | } | 781 | } |
| 804 | 782 | ||
| 805 | static struct file_operations sched_feat_fops = { | 783 | static const struct file_operations sched_feat_fops = { |
| 806 | .open = sched_feat_open, | 784 | .open = sched_feat_open, |
| 807 | .write = sched_feat_write, | 785 | .write = sched_feat_write, |
| 808 | .read = seq_read, | 786 | .read = seq_read, |
| @@ -1509,8 +1487,65 @@ static int tg_nop(struct task_group *tg, void *data) | |||
| 1509 | #endif | 1487 | #endif |
| 1510 | 1488 | ||
| 1511 | #ifdef CONFIG_SMP | 1489 | #ifdef CONFIG_SMP |
| 1512 | static unsigned long source_load(int cpu, int type); | 1490 | /* Used instead of source_load when we know the type == 0 */ |
| 1513 | static unsigned long target_load(int cpu, int type); | 1491 | static unsigned long weighted_cpuload(const int cpu) |
| 1492 | { | ||
| 1493 | return cpu_rq(cpu)->load.weight; | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | /* | ||
| 1497 | * Return a low guess at the load of a migration-source cpu weighted | ||
| 1498 | * according to the scheduling class and "nice" value. | ||
| 1499 | * | ||
| 1500 | * We want to under-estimate the load of migration sources, to | ||
| 1501 | * balance conservatively. | ||
| 1502 | */ | ||
| 1503 | static unsigned long source_load(int cpu, int type) | ||
| 1504 | { | ||
| 1505 | struct rq *rq = cpu_rq(cpu); | ||
| 1506 | unsigned long total = weighted_cpuload(cpu); | ||
| 1507 | |||
| 1508 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 1509 | return total; | ||
| 1510 | |||
| 1511 | return min(rq->cpu_load[type-1], total); | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | /* | ||
| 1515 | * Return a high guess at the load of a migration-target cpu weighted | ||
| 1516 | * according to the scheduling class and "nice" value. | ||
| 1517 | */ | ||
| 1518 | static unsigned long target_load(int cpu, int type) | ||
| 1519 | { | ||
| 1520 | struct rq *rq = cpu_rq(cpu); | ||
| 1521 | unsigned long total = weighted_cpuload(cpu); | ||
| 1522 | |||
| 1523 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 1524 | return total; | ||
| 1525 | |||
| 1526 | return max(rq->cpu_load[type-1], total); | ||
| 1527 | } | ||
| 1528 | |||
| 1529 | static struct sched_group *group_of(int cpu) | ||
| 1530 | { | ||
| 1531 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
| 1532 | |||
| 1533 | if (!sd) | ||
| 1534 | return NULL; | ||
| 1535 | |||
| 1536 | return sd->groups; | ||
| 1537 | } | ||
| 1538 | |||
| 1539 | static unsigned long power_of(int cpu) | ||
| 1540 | { | ||
| 1541 | struct sched_group *group = group_of(cpu); | ||
| 1542 | |||
| 1543 | if (!group) | ||
| 1544 | return SCHED_LOAD_SCALE; | ||
| 1545 | |||
| 1546 | return group->cpu_power; | ||
| 1547 | } | ||
| 1548 | |||
| 1514 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); | 1549 | static int task_hot(struct task_struct *p, u64 now, struct sched_domain *sd); |
| 1515 | 1550 | ||
| 1516 | static unsigned long cpu_avg_load_per_task(int cpu) | 1551 | static unsigned long cpu_avg_load_per_task(int cpu) |
| @@ -1695,6 +1730,8 @@ static inline void update_shares_locked(struct rq *rq, struct sched_domain *sd) | |||
| 1695 | 1730 | ||
| 1696 | #ifdef CONFIG_PREEMPT | 1731 | #ifdef CONFIG_PREEMPT |
| 1697 | 1732 | ||
| 1733 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 1734 | |||
| 1698 | /* | 1735 | /* |
| 1699 | * fair double_lock_balance: Safely acquires both rq->locks in a fair | 1736 | * fair double_lock_balance: Safely acquires both rq->locks in a fair |
| 1700 | * way at the expense of forcing extra atomic operations in all | 1737 | * way at the expense of forcing extra atomic operations in all |
| @@ -1959,13 +1996,6 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
| 1959 | } | 1996 | } |
| 1960 | 1997 | ||
| 1961 | #ifdef CONFIG_SMP | 1998 | #ifdef CONFIG_SMP |
| 1962 | |||
| 1963 | /* Used instead of source_load when we know the type == 0 */ | ||
| 1964 | static unsigned long weighted_cpuload(const int cpu) | ||
| 1965 | { | ||
| 1966 | return cpu_rq(cpu)->load.weight; | ||
| 1967 | } | ||
| 1968 | |||
| 1969 | /* | 1999 | /* |
| 1970 | * Is this task likely cache-hot: | 2000 | * Is this task likely cache-hot: |
| 1971 | */ | 2001 | */ |
| @@ -2023,7 +2053,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2023 | if (task_hot(p, old_rq->clock, NULL)) | 2053 | if (task_hot(p, old_rq->clock, NULL)) |
| 2024 | schedstat_inc(p, se.nr_forced2_migrations); | 2054 | schedstat_inc(p, se.nr_forced2_migrations); |
| 2025 | #endif | 2055 | #endif |
| 2026 | perf_swcounter_event(PERF_COUNT_SW_CPU_MIGRATIONS, | 2056 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, |
| 2027 | 1, 1, NULL, 0); | 2057 | 1, 1, NULL, 0); |
| 2028 | } | 2058 | } |
| 2029 | p->se.vruntime -= old_cfsrq->min_vruntime - | 2059 | p->se.vruntime -= old_cfsrq->min_vruntime - |
| @@ -2239,185 +2269,6 @@ void kick_process(struct task_struct *p) | |||
| 2239 | preempt_enable(); | 2269 | preempt_enable(); |
| 2240 | } | 2270 | } |
| 2241 | EXPORT_SYMBOL_GPL(kick_process); | 2271 | EXPORT_SYMBOL_GPL(kick_process); |
| 2242 | |||
| 2243 | /* | ||
| 2244 | * Return a low guess at the load of a migration-source cpu weighted | ||
| 2245 | * according to the scheduling class and "nice" value. | ||
| 2246 | * | ||
| 2247 | * We want to under-estimate the load of migration sources, to | ||
| 2248 | * balance conservatively. | ||
| 2249 | */ | ||
| 2250 | static unsigned long source_load(int cpu, int type) | ||
| 2251 | { | ||
| 2252 | struct rq *rq = cpu_rq(cpu); | ||
| 2253 | unsigned long total = weighted_cpuload(cpu); | ||
| 2254 | |||
| 2255 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 2256 | return total; | ||
| 2257 | |||
| 2258 | return min(rq->cpu_load[type-1], total); | ||
| 2259 | } | ||
| 2260 | |||
| 2261 | /* | ||
| 2262 | * Return a high guess at the load of a migration-target cpu weighted | ||
| 2263 | * according to the scheduling class and "nice" value. | ||
| 2264 | */ | ||
| 2265 | static unsigned long target_load(int cpu, int type) | ||
| 2266 | { | ||
| 2267 | struct rq *rq = cpu_rq(cpu); | ||
| 2268 | unsigned long total = weighted_cpuload(cpu); | ||
| 2269 | |||
| 2270 | if (type == 0 || !sched_feat(LB_BIAS)) | ||
| 2271 | return total; | ||
| 2272 | |||
| 2273 | return max(rq->cpu_load[type-1], total); | ||
| 2274 | } | ||
| 2275 | |||
| 2276 | /* | ||
| 2277 | * find_idlest_group finds and returns the least busy CPU group within the | ||
| 2278 | * domain. | ||
| 2279 | */ | ||
| 2280 | static struct sched_group * | ||
| 2281 | find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) | ||
| 2282 | { | ||
| 2283 | struct sched_group *idlest = NULL, *this = NULL, *group = sd->groups; | ||
| 2284 | unsigned long min_load = ULONG_MAX, this_load = 0; | ||
| 2285 | int load_idx = sd->forkexec_idx; | ||
| 2286 | int imbalance = 100 + (sd->imbalance_pct-100)/2; | ||
| 2287 | |||
| 2288 | do { | ||
| 2289 | unsigned long load, avg_load; | ||
| 2290 | int local_group; | ||
| 2291 | int i; | ||
| 2292 | |||
| 2293 | /* Skip over this group if it has no CPUs allowed */ | ||
| 2294 | if (!cpumask_intersects(sched_group_cpus(group), | ||
| 2295 | &p->cpus_allowed)) | ||
| 2296 | continue; | ||
| 2297 | |||
| 2298 | local_group = cpumask_test_cpu(this_cpu, | ||
| 2299 | sched_group_cpus(group)); | ||
| 2300 | |||
| 2301 | /* Tally up the load of all CPUs in the group */ | ||
| 2302 | avg_load = 0; | ||
| 2303 | |||
| 2304 | for_each_cpu(i, sched_group_cpus(group)) { | ||
| 2305 | /* Bias balancing toward cpus of our domain */ | ||
| 2306 | if (local_group) | ||
| 2307 | load = source_load(i, load_idx); | ||
| 2308 | else | ||
| 2309 | load = target_load(i, load_idx); | ||
| 2310 | |||
| 2311 | avg_load += load; | ||
| 2312 | } | ||
| 2313 | |||
| 2314 | /* Adjust by relative CPU power of the group */ | ||
| 2315 | avg_load = (avg_load * SCHED_LOAD_SCALE) / group->cpu_power; | ||
| 2316 | |||
| 2317 | if (local_group) { | ||
| 2318 | this_load = avg_load; | ||
| 2319 | this = group; | ||
| 2320 | } else if (avg_load < min_load) { | ||
| 2321 | min_load = avg_load; | ||
| 2322 | idlest = group; | ||
| 2323 | } | ||
| 2324 | } while (group = group->next, group != sd->groups); | ||
| 2325 | |||
| 2326 | if (!idlest || 100*this_load < imbalance*min_load) | ||
| 2327 | return NULL; | ||
| 2328 | return idlest; | ||
| 2329 | } | ||
| 2330 | |||
| 2331 | /* | ||
| 2332 | * find_idlest_cpu - find the idlest cpu among the cpus in group. | ||
| 2333 | */ | ||
| 2334 | static int | ||
| 2335 | find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu) | ||
| 2336 | { | ||
| 2337 | unsigned long load, min_load = ULONG_MAX; | ||
| 2338 | int idlest = -1; | ||
| 2339 | int i; | ||
| 2340 | |||
| 2341 | /* Traverse only the allowed CPUs */ | ||
| 2342 | for_each_cpu_and(i, sched_group_cpus(group), &p->cpus_allowed) { | ||
| 2343 | load = weighted_cpuload(i); | ||
| 2344 | |||
| 2345 | if (load < min_load || (load == min_load && i == this_cpu)) { | ||
| 2346 | min_load = load; | ||
| 2347 | idlest = i; | ||
| 2348 | } | ||
| 2349 | } | ||
| 2350 | |||
| 2351 | return idlest; | ||
| 2352 | } | ||
| 2353 | |||
| 2354 | /* | ||
| 2355 | * sched_balance_self: balance the current task (running on cpu) in domains | ||
| 2356 | * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and | ||
| 2357 | * SD_BALANCE_EXEC. | ||
| 2358 | * | ||
| 2359 | * Balance, ie. select the least loaded group. | ||
| 2360 | * | ||
| 2361 | * Returns the target CPU number, or the same CPU if no balancing is needed. | ||
| 2362 | * | ||
| 2363 | * preempt must be disabled. | ||
| 2364 | */ | ||
| 2365 | static int sched_balance_self(int cpu, int flag) | ||
| 2366 | { | ||
| 2367 | struct task_struct *t = current; | ||
| 2368 | struct sched_domain *tmp, *sd = NULL; | ||
| 2369 | |||
| 2370 | for_each_domain(cpu, tmp) { | ||
| 2371 | /* | ||
| 2372 | * If power savings logic is enabled for a domain, stop there. | ||
| 2373 | */ | ||
| 2374 | if (tmp->flags & SD_POWERSAVINGS_BALANCE) | ||
| 2375 | break; | ||
| 2376 | if (tmp->flags & flag) | ||
| 2377 | sd = tmp; | ||
| 2378 | } | ||
| 2379 | |||
| 2380 | if (sd) | ||
| 2381 | update_shares(sd); | ||
| 2382 | |||
| 2383 | while (sd) { | ||
| 2384 | struct sched_group *group; | ||
| 2385 | int new_cpu, weight; | ||
| 2386 | |||
| 2387 | if (!(sd->flags & flag)) { | ||
| 2388 | sd = sd->child; | ||
| 2389 | continue; | ||
| 2390 | } | ||
| 2391 | |||
| 2392 | group = find_idlest_group(sd, t, cpu); | ||
| 2393 | if (!group) { | ||
| 2394 | sd = sd->child; | ||
| 2395 | continue; | ||
| 2396 | } | ||
| 2397 | |||
| 2398 | new_cpu = find_idlest_cpu(group, t, cpu); | ||
| 2399 | if (new_cpu == -1 || new_cpu == cpu) { | ||
| 2400 | /* Now try balancing at a lower domain level of cpu */ | ||
| 2401 | sd = sd->child; | ||
| 2402 | continue; | ||
| 2403 | } | ||
| 2404 | |||
| 2405 | /* Now try balancing at a lower domain level of new_cpu */ | ||
| 2406 | cpu = new_cpu; | ||
| 2407 | weight = cpumask_weight(sched_domain_span(sd)); | ||
| 2408 | sd = NULL; | ||
| 2409 | for_each_domain(cpu, tmp) { | ||
| 2410 | if (weight <= cpumask_weight(sched_domain_span(tmp))) | ||
| 2411 | break; | ||
| 2412 | if (tmp->flags & flag) | ||
| 2413 | sd = tmp; | ||
| 2414 | } | ||
| 2415 | /* while loop will break here if sd == NULL */ | ||
| 2416 | } | ||
| 2417 | |||
| 2418 | return cpu; | ||
| 2419 | } | ||
| 2420 | |||
| 2421 | #endif /* CONFIG_SMP */ | 2272 | #endif /* CONFIG_SMP */ |
| 2422 | 2273 | ||
| 2423 | /** | 2274 | /** |
| @@ -2455,37 +2306,22 @@ void task_oncpu_function_call(struct task_struct *p, | |||
| 2455 | * | 2306 | * |
| 2456 | * returns failure only if the task is already active. | 2307 | * returns failure only if the task is already active. |
| 2457 | */ | 2308 | */ |
| 2458 | static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | 2309 | static int try_to_wake_up(struct task_struct *p, unsigned int state, |
| 2310 | int wake_flags) | ||
| 2459 | { | 2311 | { |
| 2460 | int cpu, orig_cpu, this_cpu, success = 0; | 2312 | int cpu, orig_cpu, this_cpu, success = 0; |
| 2461 | unsigned long flags; | 2313 | unsigned long flags; |
| 2462 | long old_state; | ||
| 2463 | struct rq *rq; | 2314 | struct rq *rq; |
| 2464 | 2315 | ||
| 2465 | if (!sched_feat(SYNC_WAKEUPS)) | 2316 | if (!sched_feat(SYNC_WAKEUPS)) |
| 2466 | sync = 0; | 2317 | wake_flags &= ~WF_SYNC; |
| 2467 | 2318 | ||
| 2468 | #ifdef CONFIG_SMP | 2319 | this_cpu = get_cpu(); |
| 2469 | if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) { | ||
| 2470 | struct sched_domain *sd; | ||
| 2471 | |||
| 2472 | this_cpu = raw_smp_processor_id(); | ||
| 2473 | cpu = task_cpu(p); | ||
| 2474 | |||
| 2475 | for_each_domain(this_cpu, sd) { | ||
| 2476 | if (cpumask_test_cpu(cpu, sched_domain_span(sd))) { | ||
| 2477 | update_shares(sd); | ||
| 2478 | break; | ||
| 2479 | } | ||
| 2480 | } | ||
| 2481 | } | ||
| 2482 | #endif | ||
| 2483 | 2320 | ||
| 2484 | smp_wmb(); | 2321 | smp_wmb(); |
| 2485 | rq = task_rq_lock(p, &flags); | 2322 | rq = task_rq_lock(p, &flags); |
| 2486 | update_rq_clock(rq); | 2323 | update_rq_clock(rq); |
| 2487 | old_state = p->state; | 2324 | if (!(p->state & state)) |
| 2488 | if (!(old_state & state)) | ||
| 2489 | goto out; | 2325 | goto out; |
| 2490 | 2326 | ||
| 2491 | if (p->se.on_rq) | 2327 | if (p->se.on_rq) |
| @@ -2493,27 +2329,29 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2493 | 2329 | ||
| 2494 | cpu = task_cpu(p); | 2330 | cpu = task_cpu(p); |
| 2495 | orig_cpu = cpu; | 2331 | orig_cpu = cpu; |
| 2496 | this_cpu = smp_processor_id(); | ||
| 2497 | 2332 | ||
| 2498 | #ifdef CONFIG_SMP | 2333 | #ifdef CONFIG_SMP |
| 2499 | if (unlikely(task_running(rq, p))) | 2334 | if (unlikely(task_running(rq, p))) |
| 2500 | goto out_activate; | 2335 | goto out_activate; |
| 2501 | 2336 | ||
| 2502 | cpu = p->sched_class->select_task_rq(p, sync); | 2337 | /* |
| 2503 | if (cpu != orig_cpu) { | 2338 | * In order to handle concurrent wakeups and release the rq->lock |
| 2339 | * we put the task in TASK_WAKING state. | ||
| 2340 | * | ||
| 2341 | * First fix up the nr_uninterruptible count: | ||
| 2342 | */ | ||
| 2343 | if (task_contributes_to_load(p)) | ||
| 2344 | rq->nr_uninterruptible--; | ||
| 2345 | p->state = TASK_WAKING; | ||
| 2346 | task_rq_unlock(rq, &flags); | ||
| 2347 | |||
| 2348 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | ||
| 2349 | if (cpu != orig_cpu) | ||
| 2504 | set_task_cpu(p, cpu); | 2350 | set_task_cpu(p, cpu); |
| 2505 | task_rq_unlock(rq, &flags); | ||
| 2506 | /* might preempt at this point */ | ||
| 2507 | rq = task_rq_lock(p, &flags); | ||
| 2508 | old_state = p->state; | ||
| 2509 | if (!(old_state & state)) | ||
| 2510 | goto out; | ||
| 2511 | if (p->se.on_rq) | ||
| 2512 | goto out_running; | ||
| 2513 | 2351 | ||
| 2514 | this_cpu = smp_processor_id(); | 2352 | rq = task_rq_lock(p, &flags); |
| 2515 | cpu = task_cpu(p); | 2353 | WARN_ON(p->state != TASK_WAKING); |
| 2516 | } | 2354 | cpu = task_cpu(p); |
| 2517 | 2355 | ||
| 2518 | #ifdef CONFIG_SCHEDSTATS | 2356 | #ifdef CONFIG_SCHEDSTATS |
| 2519 | schedstat_inc(rq, ttwu_count); | 2357 | schedstat_inc(rq, ttwu_count); |
| @@ -2533,7 +2371,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) | |||
| 2533 | out_activate: | 2371 | out_activate: |
| 2534 | #endif /* CONFIG_SMP */ | 2372 | #endif /* CONFIG_SMP */ |
| 2535 | schedstat_inc(p, se.nr_wakeups); | 2373 | schedstat_inc(p, se.nr_wakeups); |
| 2536 | if (sync) | 2374 | if (wake_flags & WF_SYNC) |
| 2537 | schedstat_inc(p, se.nr_wakeups_sync); | 2375 | schedstat_inc(p, se.nr_wakeups_sync); |
| 2538 | if (orig_cpu != cpu) | 2376 | if (orig_cpu != cpu) |
| 2539 | schedstat_inc(p, se.nr_wakeups_migrate); | 2377 | schedstat_inc(p, se.nr_wakeups_migrate); |
| @@ -2562,7 +2400,7 @@ out_activate: | |||
| 2562 | 2400 | ||
| 2563 | out_running: | 2401 | out_running: |
| 2564 | trace_sched_wakeup(rq, p, success); | 2402 | trace_sched_wakeup(rq, p, success); |
| 2565 | check_preempt_curr(rq, p, sync); | 2403 | check_preempt_curr(rq, p, wake_flags); |
| 2566 | 2404 | ||
| 2567 | p->state = TASK_RUNNING; | 2405 | p->state = TASK_RUNNING; |
| 2568 | #ifdef CONFIG_SMP | 2406 | #ifdef CONFIG_SMP |
| @@ -2571,6 +2409,7 @@ out_running: | |||
| 2571 | #endif | 2409 | #endif |
| 2572 | out: | 2410 | out: |
| 2573 | task_rq_unlock(rq, &flags); | 2411 | task_rq_unlock(rq, &flags); |
| 2412 | put_cpu(); | ||
| 2574 | 2413 | ||
| 2575 | return success; | 2414 | return success; |
| 2576 | } | 2415 | } |
| @@ -2613,6 +2452,7 @@ static void __sched_fork(struct task_struct *p) | |||
| 2613 | p->se.avg_overlap = 0; | 2452 | p->se.avg_overlap = 0; |
| 2614 | p->se.start_runtime = 0; | 2453 | p->se.start_runtime = 0; |
| 2615 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2454 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
| 2455 | p->se.avg_running = 0; | ||
| 2616 | 2456 | ||
| 2617 | #ifdef CONFIG_SCHEDSTATS | 2457 | #ifdef CONFIG_SCHEDSTATS |
| 2618 | p->se.wait_start = 0; | 2458 | p->se.wait_start = 0; |
| @@ -2674,28 +2514,18 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2674 | 2514 | ||
| 2675 | __sched_fork(p); | 2515 | __sched_fork(p); |
| 2676 | 2516 | ||
| 2677 | #ifdef CONFIG_SMP | ||
| 2678 | cpu = sched_balance_self(cpu, SD_BALANCE_FORK); | ||
| 2679 | #endif | ||
| 2680 | set_task_cpu(p, cpu); | ||
| 2681 | |||
| 2682 | /* | ||
| 2683 | * Make sure we do not leak PI boosting priority to the child. | ||
| 2684 | */ | ||
| 2685 | p->prio = current->normal_prio; | ||
| 2686 | |||
| 2687 | /* | 2517 | /* |
| 2688 | * Revert to default priority/policy on fork if requested. | 2518 | * Revert to default priority/policy on fork if requested. |
| 2689 | */ | 2519 | */ |
| 2690 | if (unlikely(p->sched_reset_on_fork)) { | 2520 | if (unlikely(p->sched_reset_on_fork)) { |
| 2691 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) | 2521 | if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { |
| 2692 | p->policy = SCHED_NORMAL; | 2522 | p->policy = SCHED_NORMAL; |
| 2693 | 2523 | p->normal_prio = p->static_prio; | |
| 2694 | if (p->normal_prio < DEFAULT_PRIO) | 2524 | } |
| 2695 | p->prio = DEFAULT_PRIO; | ||
| 2696 | 2525 | ||
| 2697 | if (PRIO_TO_NICE(p->static_prio) < 0) { | 2526 | if (PRIO_TO_NICE(p->static_prio) < 0) { |
| 2698 | p->static_prio = NICE_TO_PRIO(0); | 2527 | p->static_prio = NICE_TO_PRIO(0); |
| 2528 | p->normal_prio = p->static_prio; | ||
| 2699 | set_load_weight(p); | 2529 | set_load_weight(p); |
| 2700 | } | 2530 | } |
| 2701 | 2531 | ||
| @@ -2706,9 +2536,19 @@ void sched_fork(struct task_struct *p, int clone_flags) | |||
| 2706 | p->sched_reset_on_fork = 0; | 2536 | p->sched_reset_on_fork = 0; |
| 2707 | } | 2537 | } |
| 2708 | 2538 | ||
| 2539 | /* | ||
| 2540 | * Make sure we do not leak PI boosting priority to the child. | ||
| 2541 | */ | ||
| 2542 | p->prio = current->normal_prio; | ||
| 2543 | |||
| 2709 | if (!rt_prio(p->prio)) | 2544 | if (!rt_prio(p->prio)) |
| 2710 | p->sched_class = &fair_sched_class; | 2545 | p->sched_class = &fair_sched_class; |
| 2711 | 2546 | ||
| 2547 | #ifdef CONFIG_SMP | ||
| 2548 | cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0); | ||
| 2549 | #endif | ||
| 2550 | set_task_cpu(p, cpu); | ||
| 2551 | |||
| 2712 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) | 2552 | #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) |
| 2713 | if (likely(sched_info_on())) | 2553 | if (likely(sched_info_on())) |
| 2714 | memset(&p->sched_info, 0, sizeof(p->sched_info)); | 2554 | memset(&p->sched_info, 0, sizeof(p->sched_info)); |
| @@ -2741,8 +2581,6 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2741 | BUG_ON(p->state != TASK_RUNNING); | 2581 | BUG_ON(p->state != TASK_RUNNING); |
| 2742 | update_rq_clock(rq); | 2582 | update_rq_clock(rq); |
| 2743 | 2583 | ||
| 2744 | p->prio = effective_prio(p); | ||
| 2745 | |||
| 2746 | if (!p->sched_class->task_new || !current->se.on_rq) { | 2584 | if (!p->sched_class->task_new || !current->se.on_rq) { |
| 2747 | activate_task(rq, p, 0); | 2585 | activate_task(rq, p, 0); |
| 2748 | } else { | 2586 | } else { |
| @@ -2754,7 +2592,7 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags) | |||
| 2754 | inc_nr_running(rq); | 2592 | inc_nr_running(rq); |
| 2755 | } | 2593 | } |
| 2756 | trace_sched_wakeup_new(rq, p, 1); | 2594 | trace_sched_wakeup_new(rq, p, 1); |
| 2757 | check_preempt_curr(rq, p, 0); | 2595 | check_preempt_curr(rq, p, WF_FORK); |
| 2758 | #ifdef CONFIG_SMP | 2596 | #ifdef CONFIG_SMP |
| 2759 | if (p->sched_class->task_wake_up) | 2597 | if (p->sched_class->task_wake_up) |
| 2760 | p->sched_class->task_wake_up(rq, p); | 2598 | p->sched_class->task_wake_up(rq, p); |
| @@ -2878,7 +2716,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
| 2878 | */ | 2716 | */ |
| 2879 | prev_state = prev->state; | 2717 | prev_state = prev->state; |
| 2880 | finish_arch_switch(prev); | 2718 | finish_arch_switch(prev); |
| 2881 | perf_counter_task_sched_in(current, cpu_of(rq)); | 2719 | perf_event_task_sched_in(current, cpu_of(rq)); |
| 2882 | finish_lock_switch(rq, prev); | 2720 | finish_lock_switch(rq, prev); |
| 2883 | 2721 | ||
| 2884 | fire_sched_in_preempt_notifiers(current); | 2722 | fire_sched_in_preempt_notifiers(current); |
| @@ -3064,6 +2902,19 @@ unsigned long nr_iowait(void) | |||
| 3064 | return sum; | 2902 | return sum; |
| 3065 | } | 2903 | } |
| 3066 | 2904 | ||
| 2905 | unsigned long nr_iowait_cpu(void) | ||
| 2906 | { | ||
| 2907 | struct rq *this = this_rq(); | ||
| 2908 | return atomic_read(&this->nr_iowait); | ||
| 2909 | } | ||
| 2910 | |||
| 2911 | unsigned long this_cpu_load(void) | ||
| 2912 | { | ||
| 2913 | struct rq *this = this_rq(); | ||
| 2914 | return this->cpu_load[0]; | ||
| 2915 | } | ||
| 2916 | |||
| 2917 | |||
| 3067 | /* Variables and functions for calc_load */ | 2918 | /* Variables and functions for calc_load */ |
| 3068 | static atomic_long_t calc_load_tasks; | 2919 | static atomic_long_t calc_load_tasks; |
| 3069 | static unsigned long calc_load_update; | 2920 | static unsigned long calc_load_update; |
| @@ -3263,7 +3114,7 @@ out: | |||
| 3263 | void sched_exec(void) | 3114 | void sched_exec(void) |
| 3264 | { | 3115 | { |
| 3265 | int new_cpu, this_cpu = get_cpu(); | 3116 | int new_cpu, this_cpu = get_cpu(); |
| 3266 | new_cpu = sched_balance_self(this_cpu, SD_BALANCE_EXEC); | 3117 | new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0); |
| 3267 | put_cpu(); | 3118 | put_cpu(); |
| 3268 | if (new_cpu != this_cpu) | 3119 | if (new_cpu != this_cpu) |
| 3269 | sched_migrate_task(current, new_cpu); | 3120 | sched_migrate_task(current, new_cpu); |
| @@ -3683,11 +3534,6 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
| 3683 | *imbalance = sds->min_load_per_task; | 3534 | *imbalance = sds->min_load_per_task; |
| 3684 | sds->busiest = sds->group_min; | 3535 | sds->busiest = sds->group_min; |
| 3685 | 3536 | ||
| 3686 | if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) { | ||
| 3687 | cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu = | ||
| 3688 | group_first_cpu(sds->group_leader); | ||
| 3689 | } | ||
| 3690 | |||
| 3691 | return 1; | 3537 | return 1; |
| 3692 | 3538 | ||
| 3693 | } | 3539 | } |
| @@ -3711,7 +3557,18 @@ static inline int check_power_save_busiest_group(struct sd_lb_stats *sds, | |||
| 3711 | } | 3557 | } |
| 3712 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ | 3558 | #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */ |
| 3713 | 3559 | ||
| 3714 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | 3560 | |
| 3561 | unsigned long default_scale_freq_power(struct sched_domain *sd, int cpu) | ||
| 3562 | { | ||
| 3563 | return SCHED_LOAD_SCALE; | ||
| 3564 | } | ||
| 3565 | |||
| 3566 | unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
| 3567 | { | ||
| 3568 | return default_scale_freq_power(sd, cpu); | ||
| 3569 | } | ||
| 3570 | |||
| 3571 | unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu) | ||
| 3715 | { | 3572 | { |
| 3716 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); | 3573 | unsigned long weight = cpumask_weight(sched_domain_span(sd)); |
| 3717 | unsigned long smt_gain = sd->smt_gain; | 3574 | unsigned long smt_gain = sd->smt_gain; |
| @@ -3721,6 +3578,11 @@ unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | |||
| 3721 | return smt_gain; | 3578 | return smt_gain; |
| 3722 | } | 3579 | } |
| 3723 | 3580 | ||
| 3581 | unsigned long __weak arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
| 3582 | { | ||
| 3583 | return default_scale_smt_power(sd, cpu); | ||
| 3584 | } | ||
| 3585 | |||
| 3724 | unsigned long scale_rt_power(int cpu) | 3586 | unsigned long scale_rt_power(int cpu) |
| 3725 | { | 3587 | { |
| 3726 | struct rq *rq = cpu_rq(cpu); | 3588 | struct rq *rq = cpu_rq(cpu); |
| @@ -3745,10 +3607,19 @@ static void update_cpu_power(struct sched_domain *sd, int cpu) | |||
| 3745 | unsigned long power = SCHED_LOAD_SCALE; | 3607 | unsigned long power = SCHED_LOAD_SCALE; |
| 3746 | struct sched_group *sdg = sd->groups; | 3608 | struct sched_group *sdg = sd->groups; |
| 3747 | 3609 | ||
| 3748 | /* here we could scale based on cpufreq */ | 3610 | if (sched_feat(ARCH_POWER)) |
| 3611 | power *= arch_scale_freq_power(sd, cpu); | ||
| 3612 | else | ||
| 3613 | power *= default_scale_freq_power(sd, cpu); | ||
| 3614 | |||
| 3615 | power >>= SCHED_LOAD_SHIFT; | ||
| 3749 | 3616 | ||
| 3750 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { | 3617 | if ((sd->flags & SD_SHARE_CPUPOWER) && weight > 1) { |
| 3751 | power *= arch_scale_smt_power(sd, cpu); | 3618 | if (sched_feat(ARCH_POWER)) |
| 3619 | power *= arch_scale_smt_power(sd, cpu); | ||
| 3620 | else | ||
| 3621 | power *= default_scale_smt_power(sd, cpu); | ||
| 3622 | |||
| 3752 | power >>= SCHED_LOAD_SHIFT; | 3623 | power >>= SCHED_LOAD_SHIFT; |
| 3753 | } | 3624 | } |
| 3754 | 3625 | ||
| @@ -4161,26 +4032,6 @@ ret: | |||
| 4161 | return NULL; | 4032 | return NULL; |
| 4162 | } | 4033 | } |
| 4163 | 4034 | ||
| 4164 | static struct sched_group *group_of(int cpu) | ||
| 4165 | { | ||
| 4166 | struct sched_domain *sd = rcu_dereference(cpu_rq(cpu)->sd); | ||
| 4167 | |||
| 4168 | if (!sd) | ||
| 4169 | return NULL; | ||
| 4170 | |||
| 4171 | return sd->groups; | ||
| 4172 | } | ||
| 4173 | |||
| 4174 | static unsigned long power_of(int cpu) | ||
| 4175 | { | ||
| 4176 | struct sched_group *group = group_of(cpu); | ||
| 4177 | |||
| 4178 | if (!group) | ||
| 4179 | return SCHED_LOAD_SCALE; | ||
| 4180 | |||
| 4181 | return group->cpu_power; | ||
| 4182 | } | ||
| 4183 | |||
| 4184 | /* | 4035 | /* |
| 4185 | * find_busiest_queue - find the busiest runqueue among the cpus in group. | 4036 | * find_busiest_queue - find the busiest runqueue among the cpus in group. |
| 4186 | */ | 4037 | */ |
| @@ -5239,17 +5090,16 @@ void account_idle_time(cputime_t cputime) | |||
| 5239 | */ | 5090 | */ |
| 5240 | void account_process_tick(struct task_struct *p, int user_tick) | 5091 | void account_process_tick(struct task_struct *p, int user_tick) |
| 5241 | { | 5092 | { |
| 5242 | cputime_t one_jiffy = jiffies_to_cputime(1); | 5093 | cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); |
| 5243 | cputime_t one_jiffy_scaled = cputime_to_scaled(one_jiffy); | ||
| 5244 | struct rq *rq = this_rq(); | 5094 | struct rq *rq = this_rq(); |
| 5245 | 5095 | ||
| 5246 | if (user_tick) | 5096 | if (user_tick) |
| 5247 | account_user_time(p, one_jiffy, one_jiffy_scaled); | 5097 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
| 5248 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 5098 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| 5249 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | 5099 | account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, |
| 5250 | one_jiffy_scaled); | 5100 | one_jiffy_scaled); |
| 5251 | else | 5101 | else |
| 5252 | account_idle_time(one_jiffy); | 5102 | account_idle_time(cputime_one_jiffy); |
| 5253 | } | 5103 | } |
| 5254 | 5104 | ||
| 5255 | /* | 5105 | /* |
| @@ -5353,7 +5203,7 @@ void scheduler_tick(void) | |||
| 5353 | curr->sched_class->task_tick(rq, curr, 0); | 5203 | curr->sched_class->task_tick(rq, curr, 0); |
| 5354 | spin_unlock(&rq->lock); | 5204 | spin_unlock(&rq->lock); |
| 5355 | 5205 | ||
| 5356 | perf_counter_task_tick(curr, cpu); | 5206 | perf_event_task_tick(curr, cpu); |
| 5357 | 5207 | ||
| 5358 | #ifdef CONFIG_SMP | 5208 | #ifdef CONFIG_SMP |
| 5359 | rq->idle_at_tick = idle_cpu(cpu); | 5209 | rq->idle_at_tick = idle_cpu(cpu); |
| @@ -5465,14 +5315,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
| 5465 | #endif | 5315 | #endif |
| 5466 | } | 5316 | } |
| 5467 | 5317 | ||
| 5468 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5318 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
| 5469 | { | 5319 | { |
| 5470 | if (prev->state == TASK_RUNNING) { | 5320 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
| 5471 | u64 runtime = prev->se.sum_exec_runtime; | ||
| 5472 | 5321 | ||
| 5473 | runtime -= prev->se.prev_sum_exec_runtime; | 5322 | update_avg(&p->se.avg_running, runtime); |
| 5474 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
| 5475 | 5323 | ||
| 5324 | if (p->state == TASK_RUNNING) { | ||
| 5476 | /* | 5325 | /* |
| 5477 | * In order to avoid avg_overlap growing stale when we are | 5326 | * In order to avoid avg_overlap growing stale when we are |
| 5478 | * indeed overlapping and hence not getting put to sleep, grow | 5327 | * indeed overlapping and hence not getting put to sleep, grow |
| @@ -5482,9 +5331,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
| 5482 | * correlates to the amount of cache footprint a task can | 5331 | * correlates to the amount of cache footprint a task can |
| 5483 | * build up. | 5332 | * build up. |
| 5484 | */ | 5333 | */ |
| 5485 | update_avg(&prev->se.avg_overlap, runtime); | 5334 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
| 5335 | update_avg(&p->se.avg_overlap, runtime); | ||
| 5336 | } else { | ||
| 5337 | update_avg(&p->se.avg_running, 0); | ||
| 5486 | } | 5338 | } |
| 5487 | prev->sched_class->put_prev_task(rq, prev); | 5339 | p->sched_class->put_prev_task(rq, p); |
| 5488 | } | 5340 | } |
| 5489 | 5341 | ||
| 5490 | /* | 5342 | /* |
| @@ -5567,7 +5419,7 @@ need_resched_nonpreemptible: | |||
| 5567 | 5419 | ||
| 5568 | if (likely(prev != next)) { | 5420 | if (likely(prev != next)) { |
| 5569 | sched_info_switch(prev, next); | 5421 | sched_info_switch(prev, next); |
| 5570 | perf_counter_task_sched_out(prev, next, cpu); | 5422 | perf_event_task_sched_out(prev, next, cpu); |
| 5571 | 5423 | ||
| 5572 | rq->nr_switches++; | 5424 | rq->nr_switches++; |
| 5573 | rq->curr = next; | 5425 | rq->curr = next; |
| @@ -5716,10 +5568,10 @@ asmlinkage void __sched preempt_schedule_irq(void) | |||
| 5716 | 5568 | ||
| 5717 | #endif /* CONFIG_PREEMPT */ | 5569 | #endif /* CONFIG_PREEMPT */ |
| 5718 | 5570 | ||
| 5719 | int default_wake_function(wait_queue_t *curr, unsigned mode, int sync, | 5571 | int default_wake_function(wait_queue_t *curr, unsigned mode, int wake_flags, |
| 5720 | void *key) | 5572 | void *key) |
| 5721 | { | 5573 | { |
| 5722 | return try_to_wake_up(curr->private, mode, sync); | 5574 | return try_to_wake_up(curr->private, mode, wake_flags); |
| 5723 | } | 5575 | } |
| 5724 | EXPORT_SYMBOL(default_wake_function); | 5576 | EXPORT_SYMBOL(default_wake_function); |
| 5725 | 5577 | ||
| @@ -5733,14 +5585,14 @@ EXPORT_SYMBOL(default_wake_function); | |||
| 5733 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5585 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
| 5734 | */ | 5586 | */ |
| 5735 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5587 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
| 5736 | int nr_exclusive, int sync, void *key) | 5588 | int nr_exclusive, int wake_flags, void *key) |
| 5737 | { | 5589 | { |
| 5738 | wait_queue_t *curr, *next; | 5590 | wait_queue_t *curr, *next; |
| 5739 | 5591 | ||
| 5740 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { | 5592 | list_for_each_entry_safe(curr, next, &q->task_list, task_list) { |
| 5741 | unsigned flags = curr->flags; | 5593 | unsigned flags = curr->flags; |
| 5742 | 5594 | ||
| 5743 | if (curr->func(curr, mode, sync, key) && | 5595 | if (curr->func(curr, mode, wake_flags, key) && |
| 5744 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) | 5596 | (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) |
| 5745 | break; | 5597 | break; |
| 5746 | } | 5598 | } |
| @@ -5801,16 +5653,16 @@ void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | |||
| 5801 | int nr_exclusive, void *key) | 5653 | int nr_exclusive, void *key) |
| 5802 | { | 5654 | { |
| 5803 | unsigned long flags; | 5655 | unsigned long flags; |
| 5804 | int sync = 1; | 5656 | int wake_flags = WF_SYNC; |
| 5805 | 5657 | ||
| 5806 | if (unlikely(!q)) | 5658 | if (unlikely(!q)) |
| 5807 | return; | 5659 | return; |
| 5808 | 5660 | ||
| 5809 | if (unlikely(!nr_exclusive)) | 5661 | if (unlikely(!nr_exclusive)) |
| 5810 | sync = 0; | 5662 | wake_flags = 0; |
| 5811 | 5663 | ||
| 5812 | spin_lock_irqsave(&q->lock, flags); | 5664 | spin_lock_irqsave(&q->lock, flags); |
| 5813 | __wake_up_common(q, mode, nr_exclusive, sync, key); | 5665 | __wake_up_common(q, mode, nr_exclusive, wake_flags, key); |
| 5814 | spin_unlock_irqrestore(&q->lock, flags); | 5666 | spin_unlock_irqrestore(&q->lock, flags); |
| 5815 | } | 5667 | } |
| 5816 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); | 5668 | EXPORT_SYMBOL_GPL(__wake_up_sync_key); |
| @@ -6977,23 +6829,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, | |||
| 6977 | if (retval) | 6829 | if (retval) |
| 6978 | goto out_unlock; | 6830 | goto out_unlock; |
| 6979 | 6831 | ||
| 6980 | /* | 6832 | time_slice = p->sched_class->get_rr_interval(p); |
| 6981 | * Time slice is 0 for SCHED_FIFO tasks and for SCHED_OTHER | ||
| 6982 | * tasks that are on an otherwise idle runqueue: | ||
| 6983 | */ | ||
| 6984 | time_slice = 0; | ||
| 6985 | if (p->policy == SCHED_RR) { | ||
| 6986 | time_slice = DEF_TIMESLICE; | ||
| 6987 | } else if (p->policy != SCHED_FIFO) { | ||
| 6988 | struct sched_entity *se = &p->se; | ||
| 6989 | unsigned long flags; | ||
| 6990 | struct rq *rq; | ||
| 6991 | 6833 | ||
| 6992 | rq = task_rq_lock(p, &flags); | ||
| 6993 | if (rq->cfs.load.weight) | ||
| 6994 | time_slice = NS_TO_JIFFIES(sched_slice(&rq->cfs, se)); | ||
| 6995 | task_rq_unlock(rq, &flags); | ||
| 6996 | } | ||
| 6997 | read_unlock(&tasklist_lock); | 6834 | read_unlock(&tasklist_lock); |
| 6998 | jiffies_to_timespec(time_slice, &t); | 6835 | jiffies_to_timespec(time_slice, &t); |
| 6999 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; | 6836 | retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0; |
| @@ -7844,7 +7681,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 7844 | /* | 7681 | /* |
| 7845 | * Register at high priority so that task migration (migrate_all_tasks) | 7682 | * Register at high priority so that task migration (migrate_all_tasks) |
| 7846 | * happens before everything else. This has to be lower priority than | 7683 | * happens before everything else. This has to be lower priority than |
| 7847 | * the notifier in the perf_counter subsystem, though. | 7684 | * the notifier in the perf_event subsystem, though. |
| 7848 | */ | 7685 | */ |
| 7849 | static struct notifier_block __cpuinitdata migration_notifier = { | 7686 | static struct notifier_block __cpuinitdata migration_notifier = { |
| 7850 | .notifier_call = migration_call, | 7687 | .notifier_call = migration_call, |
| @@ -8000,9 +7837,7 @@ static int sd_degenerate(struct sched_domain *sd) | |||
| 8000 | } | 7837 | } |
| 8001 | 7838 | ||
| 8002 | /* Following flags don't use groups */ | 7839 | /* Following flags don't use groups */ |
| 8003 | if (sd->flags & (SD_WAKE_IDLE | | 7840 | if (sd->flags & (SD_WAKE_AFFINE)) |
| 8004 | SD_WAKE_AFFINE | | ||
| 8005 | SD_WAKE_BALANCE)) | ||
| 8006 | return 0; | 7841 | return 0; |
| 8007 | 7842 | ||
| 8008 | return 1; | 7843 | return 1; |
| @@ -8019,10 +7854,6 @@ sd_parent_degenerate(struct sched_domain *sd, struct sched_domain *parent) | |||
| 8019 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) | 7854 | if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent))) |
| 8020 | return 0; | 7855 | return 0; |
| 8021 | 7856 | ||
| 8022 | /* Does parent contain flags not in child? */ | ||
| 8023 | /* WAKE_BALANCE is a subset of WAKE_AFFINE */ | ||
| 8024 | if (cflags & SD_WAKE_AFFINE) | ||
| 8025 | pflags &= ~SD_WAKE_BALANCE; | ||
| 8026 | /* Flags needing groups don't count if only 1 group in parent */ | 7857 | /* Flags needing groups don't count if only 1 group in parent */ |
| 8027 | if (parent->groups == parent->groups->next) { | 7858 | if (parent->groups == parent->groups->next) { |
| 8028 | pflags &= ~(SD_LOAD_BALANCE | | 7859 | pflags &= ~(SD_LOAD_BALANCE | |
| @@ -8708,10 +8539,10 @@ static void set_domain_attribute(struct sched_domain *sd, | |||
| 8708 | request = attr->relax_domain_level; | 8539 | request = attr->relax_domain_level; |
| 8709 | if (request < sd->level) { | 8540 | if (request < sd->level) { |
| 8710 | /* turn off idle balance on this domain */ | 8541 | /* turn off idle balance on this domain */ |
| 8711 | sd->flags &= ~(SD_WAKE_IDLE|SD_BALANCE_NEWIDLE); | 8542 | sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
| 8712 | } else { | 8543 | } else { |
| 8713 | /* turn on idle balance on this domain */ | 8544 | /* turn on idle balance on this domain */ |
| 8714 | sd->flags |= (SD_WAKE_IDLE_FAR|SD_BALANCE_NEWIDLE); | 8545 | sd->flags |= (SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE); |
| 8715 | } | 8546 | } |
| 8716 | } | 8547 | } |
| 8717 | 8548 | ||
| @@ -9329,6 +9160,7 @@ void __init sched_init_smp(void) | |||
| 9329 | cpumask_var_t non_isolated_cpus; | 9160 | cpumask_var_t non_isolated_cpus; |
| 9330 | 9161 | ||
| 9331 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); | 9162 | alloc_cpumask_var(&non_isolated_cpus, GFP_KERNEL); |
| 9163 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | ||
| 9332 | 9164 | ||
| 9333 | #if defined(CONFIG_NUMA) | 9165 | #if defined(CONFIG_NUMA) |
| 9334 | sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), | 9166 | sched_group_nodes_bycpu = kzalloc(nr_cpu_ids * sizeof(void **), |
| @@ -9360,7 +9192,6 @@ void __init sched_init_smp(void) | |||
| 9360 | sched_init_granularity(); | 9192 | sched_init_granularity(); |
| 9361 | free_cpumask_var(non_isolated_cpus); | 9193 | free_cpumask_var(non_isolated_cpus); |
| 9362 | 9194 | ||
| 9363 | alloc_cpumask_var(&fallback_doms, GFP_KERNEL); | ||
| 9364 | init_sched_rt_class(); | 9195 | init_sched_rt_class(); |
| 9365 | } | 9196 | } |
| 9366 | #else | 9197 | #else |
| @@ -9707,7 +9538,7 @@ void __init sched_init(void) | |||
| 9707 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); | 9538 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
| 9708 | #endif /* SMP */ | 9539 | #endif /* SMP */ |
| 9709 | 9540 | ||
| 9710 | perf_counter_init(); | 9541 | perf_event_init(); |
| 9711 | 9542 | ||
| 9712 | scheduler_running = 1; | 9543 | scheduler_running = 1; |
| 9713 | } | 9544 | } |
| @@ -10479,7 +10310,7 @@ static int sched_rt_global_constraints(void) | |||
| 10479 | #endif /* CONFIG_RT_GROUP_SCHED */ | 10310 | #endif /* CONFIG_RT_GROUP_SCHED */ |
| 10480 | 10311 | ||
| 10481 | int sched_rt_handler(struct ctl_table *table, int write, | 10312 | int sched_rt_handler(struct ctl_table *table, int write, |
| 10482 | struct file *filp, void __user *buffer, size_t *lenp, | 10313 | void __user *buffer, size_t *lenp, |
| 10483 | loff_t *ppos) | 10314 | loff_t *ppos) |
| 10484 | { | 10315 | { |
| 10485 | int ret; | 10316 | int ret; |
| @@ -10490,7 +10321,7 @@ int sched_rt_handler(struct ctl_table *table, int write, | |||
| 10490 | old_period = sysctl_sched_rt_period; | 10321 | old_period = sysctl_sched_rt_period; |
| 10491 | old_runtime = sysctl_sched_rt_runtime; | 10322 | old_runtime = sysctl_sched_rt_runtime; |
| 10492 | 10323 | ||
| 10493 | ret = proc_dointvec(table, write, filp, buffer, lenp, ppos); | 10324 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
| 10494 | 10325 | ||
| 10495 | if (!ret && write) { | 10326 | if (!ret && write) { |
| 10496 | ret = sched_rt_global_constraints(); | 10327 | ret = sched_rt_global_constraints(); |
| @@ -10544,8 +10375,7 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
| 10544 | } | 10375 | } |
| 10545 | 10376 | ||
| 10546 | static int | 10377 | static int |
| 10547 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10378 | cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) |
| 10548 | struct task_struct *tsk) | ||
| 10549 | { | 10379 | { |
| 10550 | #ifdef CONFIG_RT_GROUP_SCHED | 10380 | #ifdef CONFIG_RT_GROUP_SCHED |
| 10551 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) | 10381 | if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) |
| @@ -10555,15 +10385,45 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | |||
| 10555 | if (tsk->sched_class != &fair_sched_class) | 10385 | if (tsk->sched_class != &fair_sched_class) |
| 10556 | return -EINVAL; | 10386 | return -EINVAL; |
| 10557 | #endif | 10387 | #endif |
| 10388 | return 0; | ||
| 10389 | } | ||
| 10558 | 10390 | ||
| 10391 | static int | ||
| 10392 | cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | ||
| 10393 | struct task_struct *tsk, bool threadgroup) | ||
| 10394 | { | ||
| 10395 | int retval = cpu_cgroup_can_attach_task(cgrp, tsk); | ||
| 10396 | if (retval) | ||
| 10397 | return retval; | ||
| 10398 | if (threadgroup) { | ||
| 10399 | struct task_struct *c; | ||
| 10400 | rcu_read_lock(); | ||
| 10401 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
| 10402 | retval = cpu_cgroup_can_attach_task(cgrp, c); | ||
| 10403 | if (retval) { | ||
| 10404 | rcu_read_unlock(); | ||
| 10405 | return retval; | ||
| 10406 | } | ||
| 10407 | } | ||
| 10408 | rcu_read_unlock(); | ||
| 10409 | } | ||
| 10559 | return 0; | 10410 | return 0; |
| 10560 | } | 10411 | } |
| 10561 | 10412 | ||
| 10562 | static void | 10413 | static void |
| 10563 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, | 10414 | cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, |
| 10564 | struct cgroup *old_cont, struct task_struct *tsk) | 10415 | struct cgroup *old_cont, struct task_struct *tsk, |
| 10416 | bool threadgroup) | ||
| 10565 | { | 10417 | { |
| 10566 | sched_move_task(tsk); | 10418 | sched_move_task(tsk); |
| 10419 | if (threadgroup) { | ||
| 10420 | struct task_struct *c; | ||
| 10421 | rcu_read_lock(); | ||
| 10422 | list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) { | ||
| 10423 | sched_move_task(c); | ||
| 10424 | } | ||
| 10425 | rcu_read_unlock(); | ||
| 10426 | } | ||
| 10567 | } | 10427 | } |
| 10568 | 10428 | ||
| 10569 | #ifdef CONFIG_FAIR_GROUP_SCHED | 10429 | #ifdef CONFIG_FAIR_GROUP_SCHED |
