diff options
| author | Takashi Iwai <tiwai@suse.de> | 2011-08-08 08:30:29 -0400 |
|---|---|---|
| committer | Takashi Iwai <tiwai@suse.de> | 2011-08-08 08:30:29 -0400 |
| commit | 0a2d31b62dba9b5b92a38c67c9cc42630513662a (patch) | |
| tree | f755d74ec85248de645e10c45ed1a2ed467530f6 /kernel/sched.c | |
| parent | 8039290a91c5dc4414093c086987a5d7738fe2fd (diff) | |
| parent | df944f66784e6d4f2f50739263a4947885d8b6ae (diff) | |
Merge branch 'fix/kconfig' into for-linus
Diffstat (limited to 'kernel/sched.c')
| -rw-r--r-- | kernel/sched.c | 492 |
1 files changed, 342 insertions, 150 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index cbb3a0eee58e..ccacdbdecf45 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -75,6 +75,9 @@ | |||
| 75 | #include <asm/tlb.h> | 75 | #include <asm/tlb.h> |
| 76 | #include <asm/irq_regs.h> | 76 | #include <asm/irq_regs.h> |
| 77 | #include <asm/mutex.h> | 77 | #include <asm/mutex.h> |
| 78 | #ifdef CONFIG_PARAVIRT | ||
| 79 | #include <asm/paravirt.h> | ||
| 80 | #endif | ||
| 78 | 81 | ||
| 79 | #include "sched_cpupri.h" | 82 | #include "sched_cpupri.h" |
| 80 | #include "workqueue_sched.h" | 83 | #include "workqueue_sched.h" |
| @@ -124,7 +127,7 @@ | |||
| 124 | 127 | ||
| 125 | static inline int rt_policy(int policy) | 128 | static inline int rt_policy(int policy) |
| 126 | { | 129 | { |
| 127 | if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR)) | 130 | if (policy == SCHED_FIFO || policy == SCHED_RR) |
| 128 | return 1; | 131 | return 1; |
| 129 | return 0; | 132 | return 0; |
| 130 | } | 133 | } |
| @@ -292,8 +295,8 @@ static DEFINE_SPINLOCK(task_group_lock); | |||
| 292 | * (The default weight is 1024 - so there's no practical | 295 | * (The default weight is 1024 - so there's no practical |
| 293 | * limitation from this.) | 296 | * limitation from this.) |
| 294 | */ | 297 | */ |
| 295 | #define MIN_SHARES 2 | 298 | #define MIN_SHARES (1UL << 1) |
| 296 | #define MAX_SHARES (1UL << (18 + SCHED_LOAD_RESOLUTION)) | 299 | #define MAX_SHARES (1UL << 18) |
| 297 | 300 | ||
| 298 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; | 301 | static int root_task_group_load = ROOT_TASK_GROUP_LOAD; |
| 299 | #endif | 302 | #endif |
| @@ -422,6 +425,7 @@ struct rt_rq { | |||
| 422 | */ | 425 | */ |
| 423 | struct root_domain { | 426 | struct root_domain { |
| 424 | atomic_t refcount; | 427 | atomic_t refcount; |
| 428 | atomic_t rto_count; | ||
| 425 | struct rcu_head rcu; | 429 | struct rcu_head rcu; |
| 426 | cpumask_var_t span; | 430 | cpumask_var_t span; |
| 427 | cpumask_var_t online; | 431 | cpumask_var_t online; |
| @@ -431,7 +435,6 @@ struct root_domain { | |||
| 431 | * one runnable RT task. | 435 | * one runnable RT task. |
| 432 | */ | 436 | */ |
| 433 | cpumask_var_t rto_mask; | 437 | cpumask_var_t rto_mask; |
| 434 | atomic_t rto_count; | ||
| 435 | struct cpupri cpupri; | 438 | struct cpupri cpupri; |
| 436 | }; | 439 | }; |
| 437 | 440 | ||
| @@ -528,6 +531,12 @@ struct rq { | |||
| 528 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 531 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| 529 | u64 prev_irq_time; | 532 | u64 prev_irq_time; |
| 530 | #endif | 533 | #endif |
| 534 | #ifdef CONFIG_PARAVIRT | ||
| 535 | u64 prev_steal_time; | ||
| 536 | #endif | ||
| 537 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
| 538 | u64 prev_steal_time_rq; | ||
| 539 | #endif | ||
| 531 | 540 | ||
| 532 | /* calc_load related fields */ | 541 | /* calc_load related fields */ |
| 533 | unsigned long calc_load_update; | 542 | unsigned long calc_load_update; |
| @@ -581,7 +590,6 @@ static inline int cpu_of(struct rq *rq) | |||
| 581 | 590 | ||
| 582 | #define rcu_dereference_check_sched_domain(p) \ | 591 | #define rcu_dereference_check_sched_domain(p) \ |
| 583 | rcu_dereference_check((p), \ | 592 | rcu_dereference_check((p), \ |
| 584 | rcu_read_lock_held() || \ | ||
| 585 | lockdep_is_held(&sched_domains_mutex)) | 593 | lockdep_is_held(&sched_domains_mutex)) |
| 586 | 594 | ||
| 587 | /* | 595 | /* |
| @@ -605,10 +613,10 @@ static inline int cpu_of(struct rq *rq) | |||
| 605 | /* | 613 | /* |
| 606 | * Return the group to which this tasks belongs. | 614 | * Return the group to which this tasks belongs. |
| 607 | * | 615 | * |
| 608 | * We use task_subsys_state_check() and extend the RCU verification | 616 | * We use task_subsys_state_check() and extend the RCU verification with |
| 609 | * with lockdep_is_held(&p->pi_lock) because cpu_cgroup_attach() | 617 | * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each |
| 610 | * holds that lock for each task it moves into the cgroup. Therefore | 618 | * task it moves into the cgroup. Therefore by holding either of those locks, |
| 611 | * by holding that lock, we pin the task to the current cgroup. | 619 | * we pin the task to the current cgroup. |
| 612 | */ | 620 | */ |
| 613 | static inline struct task_group *task_group(struct task_struct *p) | 621 | static inline struct task_group *task_group(struct task_struct *p) |
| 614 | { | 622 | { |
| @@ -616,7 +624,8 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
| 616 | struct cgroup_subsys_state *css; | 624 | struct cgroup_subsys_state *css; |
| 617 | 625 | ||
| 618 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | 626 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, |
| 619 | lockdep_is_held(&p->pi_lock)); | 627 | lockdep_is_held(&p->pi_lock) || |
| 628 | lockdep_is_held(&task_rq(p)->lock)); | ||
| 620 | tg = container_of(css, struct task_group, css); | 629 | tg = container_of(css, struct task_group, css); |
| 621 | 630 | ||
| 622 | return autogroup_task_group(p, tg); | 631 | return autogroup_task_group(p, tg); |
| @@ -1567,38 +1576,6 @@ static unsigned long cpu_avg_load_per_task(int cpu) | |||
| 1567 | return rq->avg_load_per_task; | 1576 | return rq->avg_load_per_task; |
| 1568 | } | 1577 | } |
| 1569 | 1578 | ||
| 1570 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 1571 | |||
| 1572 | /* | ||
| 1573 | * Compute the cpu's hierarchical load factor for each task group. | ||
| 1574 | * This needs to be done in a top-down fashion because the load of a child | ||
| 1575 | * group is a fraction of its parents load. | ||
| 1576 | */ | ||
| 1577 | static int tg_load_down(struct task_group *tg, void *data) | ||
| 1578 | { | ||
| 1579 | unsigned long load; | ||
| 1580 | long cpu = (long)data; | ||
| 1581 | |||
| 1582 | if (!tg->parent) { | ||
| 1583 | load = cpu_rq(cpu)->load.weight; | ||
| 1584 | } else { | ||
| 1585 | load = tg->parent->cfs_rq[cpu]->h_load; | ||
| 1586 | load *= tg->se[cpu]->load.weight; | ||
| 1587 | load /= tg->parent->cfs_rq[cpu]->load.weight + 1; | ||
| 1588 | } | ||
| 1589 | |||
| 1590 | tg->cfs_rq[cpu]->h_load = load; | ||
| 1591 | |||
| 1592 | return 0; | ||
| 1593 | } | ||
| 1594 | |||
| 1595 | static void update_h_load(long cpu) | ||
| 1596 | { | ||
| 1597 | walk_tg_tree(tg_load_down, tg_nop, (void *)cpu); | ||
| 1598 | } | ||
| 1599 | |||
| 1600 | #endif | ||
| 1601 | |||
| 1602 | #ifdef CONFIG_PREEMPT | 1579 | #ifdef CONFIG_PREEMPT |
| 1603 | 1580 | ||
| 1604 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | 1581 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); |
| @@ -1952,10 +1929,28 @@ void account_system_vtime(struct task_struct *curr) | |||
| 1952 | } | 1929 | } |
| 1953 | EXPORT_SYMBOL_GPL(account_system_vtime); | 1930 | EXPORT_SYMBOL_GPL(account_system_vtime); |
| 1954 | 1931 | ||
| 1955 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 1932 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ |
| 1933 | |||
| 1934 | #ifdef CONFIG_PARAVIRT | ||
| 1935 | static inline u64 steal_ticks(u64 steal) | ||
| 1956 | { | 1936 | { |
| 1957 | s64 irq_delta; | 1937 | if (unlikely(steal > NSEC_PER_SEC)) |
| 1938 | return div_u64(steal, TICK_NSEC); | ||
| 1958 | 1939 | ||
| 1940 | return __iter_div_u64_rem(steal, TICK_NSEC, &steal); | ||
| 1941 | } | ||
| 1942 | #endif | ||
| 1943 | |||
| 1944 | static void update_rq_clock_task(struct rq *rq, s64 delta) | ||
| 1945 | { | ||
| 1946 | /* | ||
| 1947 | * In theory, the compile should just see 0 here, and optimize out the call | ||
| 1948 | * to sched_rt_avg_update. But I don't trust it... | ||
| 1949 | */ | ||
| 1950 | #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) | ||
| 1951 | s64 steal = 0, irq_delta = 0; | ||
| 1952 | #endif | ||
| 1953 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1959 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; | 1954 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; |
| 1960 | 1955 | ||
| 1961 | /* | 1956 | /* |
| @@ -1978,12 +1973,35 @@ static void update_rq_clock_task(struct rq *rq, s64 delta) | |||
| 1978 | 1973 | ||
| 1979 | rq->prev_irq_time += irq_delta; | 1974 | rq->prev_irq_time += irq_delta; |
| 1980 | delta -= irq_delta; | 1975 | delta -= irq_delta; |
| 1976 | #endif | ||
| 1977 | #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING | ||
| 1978 | if (static_branch((¶virt_steal_rq_enabled))) { | ||
| 1979 | u64 st; | ||
| 1980 | |||
| 1981 | steal = paravirt_steal_clock(cpu_of(rq)); | ||
| 1982 | steal -= rq->prev_steal_time_rq; | ||
| 1983 | |||
| 1984 | if (unlikely(steal > delta)) | ||
| 1985 | steal = delta; | ||
| 1986 | |||
| 1987 | st = steal_ticks(steal); | ||
| 1988 | steal = st * TICK_NSEC; | ||
| 1989 | |||
| 1990 | rq->prev_steal_time_rq += steal; | ||
| 1991 | |||
| 1992 | delta -= steal; | ||
| 1993 | } | ||
| 1994 | #endif | ||
| 1995 | |||
| 1981 | rq->clock_task += delta; | 1996 | rq->clock_task += delta; |
| 1982 | 1997 | ||
| 1983 | if (irq_delta && sched_feat(NONIRQ_POWER)) | 1998 | #if defined(CONFIG_IRQ_TIME_ACCOUNTING) || defined(CONFIG_PARAVIRT_TIME_ACCOUNTING) |
| 1984 | sched_rt_avg_update(rq, irq_delta); | 1999 | if ((irq_delta + steal) && sched_feat(NONTASK_POWER)) |
| 2000 | sched_rt_avg_update(rq, irq_delta + steal); | ||
| 2001 | #endif | ||
| 1985 | } | 2002 | } |
| 1986 | 2003 | ||
| 2004 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
| 1987 | static int irqtime_account_hi_update(void) | 2005 | static int irqtime_account_hi_update(void) |
| 1988 | { | 2006 | { |
| 1989 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 2007 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| @@ -2018,12 +2036,7 @@ static int irqtime_account_si_update(void) | |||
| 2018 | 2036 | ||
| 2019 | #define sched_clock_irqtime (0) | 2037 | #define sched_clock_irqtime (0) |
| 2020 | 2038 | ||
| 2021 | static void update_rq_clock_task(struct rq *rq, s64 delta) | 2039 | #endif |
| 2022 | { | ||
| 2023 | rq->clock_task += delta; | ||
| 2024 | } | ||
| 2025 | |||
| 2026 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
| 2027 | 2040 | ||
| 2028 | #include "sched_idletask.c" | 2041 | #include "sched_idletask.c" |
| 2029 | #include "sched_fair.c" | 2042 | #include "sched_fair.c" |
| @@ -2200,6 +2213,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2200 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); | 2213 | !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)); |
| 2201 | 2214 | ||
| 2202 | #ifdef CONFIG_LOCKDEP | 2215 | #ifdef CONFIG_LOCKDEP |
| 2216 | /* | ||
| 2217 | * The caller should hold either p->pi_lock or rq->lock, when changing | ||
| 2218 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. | ||
| 2219 | * | ||
| 2220 | * sched_move_task() holds both and thus holding either pins the cgroup, | ||
| 2221 | * see set_task_rq(). | ||
| 2222 | * | ||
| 2223 | * Furthermore, all task_rq users should acquire both locks, see | ||
| 2224 | * task_rq_lock(). | ||
| 2225 | */ | ||
| 2203 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || | 2226 | WARN_ON_ONCE(debug_locks && !(lockdep_is_held(&p->pi_lock) || |
| 2204 | lockdep_is_held(&task_rq(p)->lock))); | 2227 | lockdep_is_held(&task_rq(p)->lock))); |
| 2205 | #endif | 2228 | #endif |
| @@ -2209,7 +2232,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 2209 | 2232 | ||
| 2210 | if (task_cpu(p) != new_cpu) { | 2233 | if (task_cpu(p) != new_cpu) { |
| 2211 | p->se.nr_migrations++; | 2234 | p->se.nr_migrations++; |
| 2212 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 1, NULL, 0); | 2235 | perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); |
| 2213 | } | 2236 | } |
| 2214 | 2237 | ||
| 2215 | __set_task_cpu(p, new_cpu); | 2238 | __set_task_cpu(p, new_cpu); |
| @@ -2447,6 +2470,10 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) | |||
| 2447 | } | 2470 | } |
| 2448 | rcu_read_unlock(); | 2471 | rcu_read_unlock(); |
| 2449 | } | 2472 | } |
| 2473 | |||
| 2474 | if (wake_flags & WF_MIGRATED) | ||
| 2475 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | ||
| 2476 | |||
| 2450 | #endif /* CONFIG_SMP */ | 2477 | #endif /* CONFIG_SMP */ |
| 2451 | 2478 | ||
| 2452 | schedstat_inc(rq, ttwu_count); | 2479 | schedstat_inc(rq, ttwu_count); |
| @@ -2455,9 +2482,6 @@ ttwu_stat(struct task_struct *p, int cpu, int wake_flags) | |||
| 2455 | if (wake_flags & WF_SYNC) | 2482 | if (wake_flags & WF_SYNC) |
| 2456 | schedstat_inc(p, se.statistics.nr_wakeups_sync); | 2483 | schedstat_inc(p, se.statistics.nr_wakeups_sync); |
| 2457 | 2484 | ||
| 2458 | if (cpu != task_cpu(p)) | ||
| 2459 | schedstat_inc(p, se.statistics.nr_wakeups_migrate); | ||
| 2460 | |||
| 2461 | #endif /* CONFIG_SCHEDSTATS */ | 2485 | #endif /* CONFIG_SCHEDSTATS */ |
| 2462 | } | 2486 | } |
| 2463 | 2487 | ||
| @@ -2485,7 +2509,7 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) | |||
| 2485 | if (p->sched_class->task_woken) | 2509 | if (p->sched_class->task_woken) |
| 2486 | p->sched_class->task_woken(rq, p); | 2510 | p->sched_class->task_woken(rq, p); |
| 2487 | 2511 | ||
| 2488 | if (unlikely(rq->idle_stamp)) { | 2512 | if (rq->idle_stamp) { |
| 2489 | u64 delta = rq->clock - rq->idle_stamp; | 2513 | u64 delta = rq->clock - rq->idle_stamp; |
| 2490 | u64 max = 2*sysctl_sched_migration_cost; | 2514 | u64 max = 2*sysctl_sched_migration_cost; |
| 2491 | 2515 | ||
| @@ -2532,13 +2556,9 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) | |||
| 2532 | } | 2556 | } |
| 2533 | 2557 | ||
| 2534 | #ifdef CONFIG_SMP | 2558 | #ifdef CONFIG_SMP |
| 2535 | static void sched_ttwu_pending(void) | 2559 | static void sched_ttwu_do_pending(struct task_struct *list) |
| 2536 | { | 2560 | { |
| 2537 | struct rq *rq = this_rq(); | 2561 | struct rq *rq = this_rq(); |
| 2538 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2539 | |||
| 2540 | if (!list) | ||
| 2541 | return; | ||
| 2542 | 2562 | ||
| 2543 | raw_spin_lock(&rq->lock); | 2563 | raw_spin_lock(&rq->lock); |
| 2544 | 2564 | ||
| @@ -2551,9 +2571,45 @@ static void sched_ttwu_pending(void) | |||
| 2551 | raw_spin_unlock(&rq->lock); | 2571 | raw_spin_unlock(&rq->lock); |
| 2552 | } | 2572 | } |
| 2553 | 2573 | ||
| 2574 | #ifdef CONFIG_HOTPLUG_CPU | ||
| 2575 | |||
| 2576 | static void sched_ttwu_pending(void) | ||
| 2577 | { | ||
| 2578 | struct rq *rq = this_rq(); | ||
| 2579 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2580 | |||
| 2581 | if (!list) | ||
| 2582 | return; | ||
| 2583 | |||
| 2584 | sched_ttwu_do_pending(list); | ||
| 2585 | } | ||
| 2586 | |||
| 2587 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
| 2588 | |||
| 2554 | void scheduler_ipi(void) | 2589 | void scheduler_ipi(void) |
| 2555 | { | 2590 | { |
| 2556 | sched_ttwu_pending(); | 2591 | struct rq *rq = this_rq(); |
| 2592 | struct task_struct *list = xchg(&rq->wake_list, NULL); | ||
| 2593 | |||
| 2594 | if (!list) | ||
| 2595 | return; | ||
| 2596 | |||
| 2597 | /* | ||
| 2598 | * Not all reschedule IPI handlers call irq_enter/irq_exit, since | ||
| 2599 | * traditionally all their work was done from the interrupt return | ||
| 2600 | * path. Now that we actually do some work, we need to make sure | ||
| 2601 | * we do call them. | ||
| 2602 | * | ||
| 2603 | * Some archs already do call them, luckily irq_enter/exit nest | ||
| 2604 | * properly. | ||
| 2605 | * | ||
| 2606 | * Arguably we should visit all archs and update all handlers, | ||
| 2607 | * however a fair share of IPIs are still resched only so this would | ||
| 2608 | * somewhat pessimize the simple resched case. | ||
| 2609 | */ | ||
| 2610 | irq_enter(); | ||
| 2611 | sched_ttwu_do_pending(list); | ||
| 2612 | irq_exit(); | ||
| 2557 | } | 2613 | } |
| 2558 | 2614 | ||
| 2559 | static void ttwu_queue_remote(struct task_struct *p, int cpu) | 2615 | static void ttwu_queue_remote(struct task_struct *p, int cpu) |
| @@ -2600,6 +2656,7 @@ static void ttwu_queue(struct task_struct *p, int cpu) | |||
| 2600 | 2656 | ||
| 2601 | #if defined(CONFIG_SMP) | 2657 | #if defined(CONFIG_SMP) |
| 2602 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { | 2658 | if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) { |
| 2659 | sched_clock_cpu(cpu); /* sync clocks x-cpu */ | ||
| 2603 | ttwu_queue_remote(p, cpu); | 2660 | ttwu_queue_remote(p, cpu); |
| 2604 | return; | 2661 | return; |
| 2605 | } | 2662 | } |
| @@ -2674,8 +2731,10 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) | |||
| 2674 | p->sched_class->task_waking(p); | 2731 | p->sched_class->task_waking(p); |
| 2675 | 2732 | ||
| 2676 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); | 2733 | cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags); |
| 2677 | if (task_cpu(p) != cpu) | 2734 | if (task_cpu(p) != cpu) { |
| 2735 | wake_flags |= WF_MIGRATED; | ||
| 2678 | set_task_cpu(p, cpu); | 2736 | set_task_cpu(p, cpu); |
| 2737 | } | ||
| 2679 | #endif /* CONFIG_SMP */ | 2738 | #endif /* CONFIG_SMP */ |
| 2680 | 2739 | ||
| 2681 | ttwu_queue(p, cpu); | 2740 | ttwu_queue(p, cpu); |
| @@ -2839,7 +2898,7 @@ void sched_fork(struct task_struct *p) | |||
| 2839 | #if defined(CONFIG_SMP) | 2898 | #if defined(CONFIG_SMP) |
| 2840 | p->on_cpu = 0; | 2899 | p->on_cpu = 0; |
| 2841 | #endif | 2900 | #endif |
| 2842 | #ifdef CONFIG_PREEMPT | 2901 | #ifdef CONFIG_PREEMPT_COUNT |
| 2843 | /* Want to start with kernel preemption disabled. */ | 2902 | /* Want to start with kernel preemption disabled. */ |
| 2844 | task_thread_info(p)->preempt_count = 1; | 2903 | task_thread_info(p)->preempt_count = 1; |
| 2845 | #endif | 2904 | #endif |
| @@ -3830,6 +3889,25 @@ void account_idle_time(cputime_t cputime) | |||
| 3830 | cpustat->idle = cputime64_add(cpustat->idle, cputime64); | 3889 | cpustat->idle = cputime64_add(cpustat->idle, cputime64); |
| 3831 | } | 3890 | } |
| 3832 | 3891 | ||
| 3892 | static __always_inline bool steal_account_process_tick(void) | ||
| 3893 | { | ||
| 3894 | #ifdef CONFIG_PARAVIRT | ||
| 3895 | if (static_branch(¶virt_steal_enabled)) { | ||
| 3896 | u64 steal, st = 0; | ||
| 3897 | |||
| 3898 | steal = paravirt_steal_clock(smp_processor_id()); | ||
| 3899 | steal -= this_rq()->prev_steal_time; | ||
| 3900 | |||
| 3901 | st = steal_ticks(steal); | ||
| 3902 | this_rq()->prev_steal_time += st * TICK_NSEC; | ||
| 3903 | |||
| 3904 | account_steal_time(st); | ||
| 3905 | return st; | ||
| 3906 | } | ||
| 3907 | #endif | ||
| 3908 | return false; | ||
| 3909 | } | ||
| 3910 | |||
| 3833 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING | 3911 | #ifndef CONFIG_VIRT_CPU_ACCOUNTING |
| 3834 | 3912 | ||
| 3835 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | 3913 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING |
| @@ -3861,6 +3939,9 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, | |||
| 3861 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); | 3939 | cputime64_t tmp = cputime_to_cputime64(cputime_one_jiffy); |
| 3862 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; | 3940 | struct cpu_usage_stat *cpustat = &kstat_this_cpu.cpustat; |
| 3863 | 3941 | ||
| 3942 | if (steal_account_process_tick()) | ||
| 3943 | return; | ||
| 3944 | |||
| 3864 | if (irqtime_account_hi_update()) { | 3945 | if (irqtime_account_hi_update()) { |
| 3865 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3946 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
| 3866 | } else if (irqtime_account_si_update()) { | 3947 | } else if (irqtime_account_si_update()) { |
| @@ -3914,6 +3995,9 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
| 3914 | return; | 3995 | return; |
| 3915 | } | 3996 | } |
| 3916 | 3997 | ||
| 3998 | if (steal_account_process_tick()) | ||
| 3999 | return; | ||
| 4000 | |||
| 3917 | if (user_tick) | 4001 | if (user_tick) |
| 3918 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); | 4002 | account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); |
| 3919 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) | 4003 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
| @@ -4291,11 +4375,8 @@ EXPORT_SYMBOL(schedule); | |||
| 4291 | 4375 | ||
| 4292 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | 4376 | static inline bool owner_running(struct mutex *lock, struct task_struct *owner) |
| 4293 | { | 4377 | { |
| 4294 | bool ret = false; | ||
| 4295 | |||
| 4296 | rcu_read_lock(); | ||
| 4297 | if (lock->owner != owner) | 4378 | if (lock->owner != owner) |
| 4298 | goto fail; | 4379 | return false; |
| 4299 | 4380 | ||
| 4300 | /* | 4381 | /* |
| 4301 | * Ensure we emit the owner->on_cpu, dereference _after_ checking | 4382 | * Ensure we emit the owner->on_cpu, dereference _after_ checking |
| @@ -4305,11 +4386,7 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner) | |||
| 4305 | */ | 4386 | */ |
| 4306 | barrier(); | 4387 | barrier(); |
| 4307 | 4388 | ||
| 4308 | ret = owner->on_cpu; | 4389 | return owner->on_cpu; |
| 4309 | fail: | ||
| 4310 | rcu_read_unlock(); | ||
| 4311 | |||
| 4312 | return ret; | ||
| 4313 | } | 4390 | } |
| 4314 | 4391 | ||
| 4315 | /* | 4392 | /* |
| @@ -4321,21 +4398,21 @@ int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner) | |||
| 4321 | if (!sched_feat(OWNER_SPIN)) | 4398 | if (!sched_feat(OWNER_SPIN)) |
| 4322 | return 0; | 4399 | return 0; |
| 4323 | 4400 | ||
| 4401 | rcu_read_lock(); | ||
| 4324 | while (owner_running(lock, owner)) { | 4402 | while (owner_running(lock, owner)) { |
| 4325 | if (need_resched()) | 4403 | if (need_resched()) |
| 4326 | return 0; | 4404 | break; |
| 4327 | 4405 | ||
| 4328 | arch_mutex_cpu_relax(); | 4406 | arch_mutex_cpu_relax(); |
| 4329 | } | 4407 | } |
| 4408 | rcu_read_unlock(); | ||
| 4330 | 4409 | ||
| 4331 | /* | 4410 | /* |
| 4332 | * If the owner changed to another task there is likely | 4411 | * We break out the loop above on need_resched() and when the |
| 4333 | * heavy contention, stop spinning. | 4412 | * owner changed, which is a sign for heavy contention. Return |
| 4413 | * success only when lock->owner is NULL. | ||
| 4334 | */ | 4414 | */ |
| 4335 | if (lock->owner) | 4415 | return lock->owner == NULL; |
| 4336 | return 0; | ||
| 4337 | |||
| 4338 | return 1; | ||
| 4339 | } | 4416 | } |
| 4340 | #endif | 4417 | #endif |
| 4341 | 4418 | ||
| @@ -6542,7 +6619,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 6542 | break; | 6619 | break; |
| 6543 | } | 6620 | } |
| 6544 | 6621 | ||
| 6545 | if (!group->cpu_power) { | 6622 | if (!group->sgp->power) { |
| 6546 | printk(KERN_CONT "\n"); | 6623 | printk(KERN_CONT "\n"); |
| 6547 | printk(KERN_ERR "ERROR: domain->cpu_power not " | 6624 | printk(KERN_ERR "ERROR: domain->cpu_power not " |
| 6548 | "set\n"); | 6625 | "set\n"); |
| @@ -6566,9 +6643,9 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, | |||
| 6566 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); | 6643 | cpulist_scnprintf(str, sizeof(str), sched_group_cpus(group)); |
| 6567 | 6644 | ||
| 6568 | printk(KERN_CONT " %s", str); | 6645 | printk(KERN_CONT " %s", str); |
| 6569 | if (group->cpu_power != SCHED_POWER_SCALE) { | 6646 | if (group->sgp->power != SCHED_POWER_SCALE) { |
| 6570 | printk(KERN_CONT " (cpu_power = %d)", | 6647 | printk(KERN_CONT " (cpu_power = %d)", |
| 6571 | group->cpu_power); | 6648 | group->sgp->power); |
| 6572 | } | 6649 | } |
| 6573 | 6650 | ||
| 6574 | group = group->next; | 6651 | group = group->next; |
| @@ -6759,11 +6836,39 @@ static struct root_domain *alloc_rootdomain(void) | |||
| 6759 | return rd; | 6836 | return rd; |
| 6760 | } | 6837 | } |
| 6761 | 6838 | ||
| 6839 | static void free_sched_groups(struct sched_group *sg, int free_sgp) | ||
| 6840 | { | ||
| 6841 | struct sched_group *tmp, *first; | ||
| 6842 | |||
| 6843 | if (!sg) | ||
| 6844 | return; | ||
| 6845 | |||
| 6846 | first = sg; | ||
| 6847 | do { | ||
| 6848 | tmp = sg->next; | ||
| 6849 | |||
| 6850 | if (free_sgp && atomic_dec_and_test(&sg->sgp->ref)) | ||
| 6851 | kfree(sg->sgp); | ||
| 6852 | |||
| 6853 | kfree(sg); | ||
| 6854 | sg = tmp; | ||
| 6855 | } while (sg != first); | ||
| 6856 | } | ||
| 6857 | |||
| 6762 | static void free_sched_domain(struct rcu_head *rcu) | 6858 | static void free_sched_domain(struct rcu_head *rcu) |
| 6763 | { | 6859 | { |
| 6764 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); | 6860 | struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu); |
| 6765 | if (atomic_dec_and_test(&sd->groups->ref)) | 6861 | |
| 6862 | /* | ||
| 6863 | * If its an overlapping domain it has private groups, iterate and | ||
| 6864 | * nuke them all. | ||
| 6865 | */ | ||
| 6866 | if (sd->flags & SD_OVERLAP) { | ||
| 6867 | free_sched_groups(sd->groups, 1); | ||
| 6868 | } else if (atomic_dec_and_test(&sd->groups->ref)) { | ||
| 6869 | kfree(sd->groups->sgp); | ||
| 6766 | kfree(sd->groups); | 6870 | kfree(sd->groups); |
| 6871 | } | ||
| 6767 | kfree(sd); | 6872 | kfree(sd); |
| 6768 | } | 6873 | } |
| 6769 | 6874 | ||
| @@ -6930,6 +7035,7 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
| 6930 | struct sd_data { | 7035 | struct sd_data { |
| 6931 | struct sched_domain **__percpu sd; | 7036 | struct sched_domain **__percpu sd; |
| 6932 | struct sched_group **__percpu sg; | 7037 | struct sched_group **__percpu sg; |
| 7038 | struct sched_group_power **__percpu sgp; | ||
| 6933 | }; | 7039 | }; |
| 6934 | 7040 | ||
| 6935 | struct s_data { | 7041 | struct s_data { |
| @@ -6949,15 +7055,73 @@ struct sched_domain_topology_level; | |||
| 6949 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); | 7055 | typedef struct sched_domain *(*sched_domain_init_f)(struct sched_domain_topology_level *tl, int cpu); |
| 6950 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); | 7056 | typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); |
| 6951 | 7057 | ||
| 7058 | #define SDTL_OVERLAP 0x01 | ||
| 7059 | |||
| 6952 | struct sched_domain_topology_level { | 7060 | struct sched_domain_topology_level { |
| 6953 | sched_domain_init_f init; | 7061 | sched_domain_init_f init; |
| 6954 | sched_domain_mask_f mask; | 7062 | sched_domain_mask_f mask; |
| 7063 | int flags; | ||
| 6955 | struct sd_data data; | 7064 | struct sd_data data; |
| 6956 | }; | 7065 | }; |
| 6957 | 7066 | ||
| 6958 | /* | 7067 | static int |
| 6959 | * Assumes the sched_domain tree is fully constructed | 7068 | build_overlap_sched_groups(struct sched_domain *sd, int cpu) |
| 6960 | */ | 7069 | { |
| 7070 | struct sched_group *first = NULL, *last = NULL, *groups = NULL, *sg; | ||
| 7071 | const struct cpumask *span = sched_domain_span(sd); | ||
| 7072 | struct cpumask *covered = sched_domains_tmpmask; | ||
| 7073 | struct sd_data *sdd = sd->private; | ||
| 7074 | struct sched_domain *child; | ||
| 7075 | int i; | ||
| 7076 | |||
| 7077 | cpumask_clear(covered); | ||
| 7078 | |||
| 7079 | for_each_cpu(i, span) { | ||
| 7080 | struct cpumask *sg_span; | ||
| 7081 | |||
| 7082 | if (cpumask_test_cpu(i, covered)) | ||
| 7083 | continue; | ||
| 7084 | |||
| 7085 | sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), | ||
| 7086 | GFP_KERNEL, cpu_to_node(i)); | ||
| 7087 | |||
| 7088 | if (!sg) | ||
| 7089 | goto fail; | ||
| 7090 | |||
| 7091 | sg_span = sched_group_cpus(sg); | ||
| 7092 | |||
| 7093 | child = *per_cpu_ptr(sdd->sd, i); | ||
| 7094 | if (child->child) { | ||
| 7095 | child = child->child; | ||
| 7096 | cpumask_copy(sg_span, sched_domain_span(child)); | ||
| 7097 | } else | ||
| 7098 | cpumask_set_cpu(i, sg_span); | ||
| 7099 | |||
| 7100 | cpumask_or(covered, covered, sg_span); | ||
| 7101 | |||
| 7102 | sg->sgp = *per_cpu_ptr(sdd->sgp, cpumask_first(sg_span)); | ||
| 7103 | atomic_inc(&sg->sgp->ref); | ||
| 7104 | |||
| 7105 | if (cpumask_test_cpu(cpu, sg_span)) | ||
| 7106 | groups = sg; | ||
| 7107 | |||
| 7108 | if (!first) | ||
| 7109 | first = sg; | ||
| 7110 | if (last) | ||
| 7111 | last->next = sg; | ||
| 7112 | last = sg; | ||
| 7113 | last->next = first; | ||
| 7114 | } | ||
| 7115 | sd->groups = groups; | ||
| 7116 | |||
| 7117 | return 0; | ||
| 7118 | |||
| 7119 | fail: | ||
| 7120 | free_sched_groups(first, 0); | ||
| 7121 | |||
| 7122 | return -ENOMEM; | ||
| 7123 | } | ||
| 7124 | |||
| 6961 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | 7125 | static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) |
| 6962 | { | 7126 | { |
| 6963 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); | 7127 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu); |
| @@ -6966,24 +7130,24 @@ static int get_group(int cpu, struct sd_data *sdd, struct sched_group **sg) | |||
| 6966 | if (child) | 7130 | if (child) |
| 6967 | cpu = cpumask_first(sched_domain_span(child)); | 7131 | cpu = cpumask_first(sched_domain_span(child)); |
| 6968 | 7132 | ||
| 6969 | if (sg) | 7133 | if (sg) { |
| 6970 | *sg = *per_cpu_ptr(sdd->sg, cpu); | 7134 | *sg = *per_cpu_ptr(sdd->sg, cpu); |
| 7135 | (*sg)->sgp = *per_cpu_ptr(sdd->sgp, cpu); | ||
| 7136 | atomic_set(&(*sg)->sgp->ref, 1); /* for claim_allocations */ | ||
| 7137 | } | ||
| 6971 | 7138 | ||
| 6972 | return cpu; | 7139 | return cpu; |
| 6973 | } | 7140 | } |
| 6974 | 7141 | ||
| 6975 | /* | 7142 | /* |
| 6976 | * build_sched_groups takes the cpumask we wish to span, and a pointer | ||
| 6977 | * to a function which identifies what group(along with sched group) a CPU | ||
| 6978 | * belongs to. The return value of group_fn must be a >= 0 and < nr_cpu_ids | ||
| 6979 | * (due to the fact that we keep track of groups covered with a struct cpumask). | ||
| 6980 | * | ||
| 6981 | * build_sched_groups will build a circular linked list of the groups | 7143 | * build_sched_groups will build a circular linked list of the groups |
| 6982 | * covered by the given span, and will set each group's ->cpumask correctly, | 7144 | * covered by the given span, and will set each group's ->cpumask correctly, |
| 6983 | * and ->cpu_power to 0. | 7145 | * and ->cpu_power to 0. |
| 7146 | * | ||
| 7147 | * Assumes the sched_domain tree is fully constructed | ||
| 6984 | */ | 7148 | */ |
| 6985 | static void | 7149 | static int |
| 6986 | build_sched_groups(struct sched_domain *sd) | 7150 | build_sched_groups(struct sched_domain *sd, int cpu) |
| 6987 | { | 7151 | { |
| 6988 | struct sched_group *first = NULL, *last = NULL; | 7152 | struct sched_group *first = NULL, *last = NULL; |
| 6989 | struct sd_data *sdd = sd->private; | 7153 | struct sd_data *sdd = sd->private; |
| @@ -6991,6 +7155,12 @@ build_sched_groups(struct sched_domain *sd) | |||
| 6991 | struct cpumask *covered; | 7155 | struct cpumask *covered; |
| 6992 | int i; | 7156 | int i; |
| 6993 | 7157 | ||
| 7158 | get_group(cpu, sdd, &sd->groups); | ||
| 7159 | atomic_inc(&sd->groups->ref); | ||
| 7160 | |||
| 7161 | if (cpu != cpumask_first(sched_domain_span(sd))) | ||
| 7162 | return 0; | ||
| 7163 | |||
| 6994 | lockdep_assert_held(&sched_domains_mutex); | 7164 | lockdep_assert_held(&sched_domains_mutex); |
| 6995 | covered = sched_domains_tmpmask; | 7165 | covered = sched_domains_tmpmask; |
| 6996 | 7166 | ||
| @@ -7005,7 +7175,7 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7005 | continue; | 7175 | continue; |
| 7006 | 7176 | ||
| 7007 | cpumask_clear(sched_group_cpus(sg)); | 7177 | cpumask_clear(sched_group_cpus(sg)); |
| 7008 | sg->cpu_power = 0; | 7178 | sg->sgp->power = 0; |
| 7009 | 7179 | ||
| 7010 | for_each_cpu(j, span) { | 7180 | for_each_cpu(j, span) { |
| 7011 | if (get_group(j, sdd, NULL) != group) | 7181 | if (get_group(j, sdd, NULL) != group) |
| @@ -7022,6 +7192,8 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7022 | last = sg; | 7192 | last = sg; |
| 7023 | } | 7193 | } |
| 7024 | last->next = first; | 7194 | last->next = first; |
| 7195 | |||
| 7196 | return 0; | ||
| 7025 | } | 7197 | } |
| 7026 | 7198 | ||
| 7027 | /* | 7199 | /* |
| @@ -7036,12 +7208,17 @@ build_sched_groups(struct sched_domain *sd) | |||
| 7036 | */ | 7208 | */ |
| 7037 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) | 7209 | static void init_sched_groups_power(int cpu, struct sched_domain *sd) |
| 7038 | { | 7210 | { |
| 7039 | WARN_ON(!sd || !sd->groups); | 7211 | struct sched_group *sg = sd->groups; |
| 7040 | 7212 | ||
| 7041 | if (cpu != group_first_cpu(sd->groups)) | 7213 | WARN_ON(!sd || !sg); |
| 7042 | return; | ||
| 7043 | 7214 | ||
| 7044 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | 7215 | do { |
| 7216 | sg->group_weight = cpumask_weight(sched_group_cpus(sg)); | ||
| 7217 | sg = sg->next; | ||
| 7218 | } while (sg != sd->groups); | ||
| 7219 | |||
| 7220 | if (cpu != group_first_cpu(sg)) | ||
| 7221 | return; | ||
| 7045 | 7222 | ||
| 7046 | update_group_power(sd, cpu); | 7223 | update_group_power(sd, cpu); |
| 7047 | } | 7224 | } |
| @@ -7162,15 +7339,15 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
| 7162 | static void claim_allocations(int cpu, struct sched_domain *sd) | 7339 | static void claim_allocations(int cpu, struct sched_domain *sd) |
| 7163 | { | 7340 | { |
| 7164 | struct sd_data *sdd = sd->private; | 7341 | struct sd_data *sdd = sd->private; |
| 7165 | struct sched_group *sg = sd->groups; | ||
| 7166 | 7342 | ||
| 7167 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); | 7343 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd); |
| 7168 | *per_cpu_ptr(sdd->sd, cpu) = NULL; | 7344 | *per_cpu_ptr(sdd->sd, cpu) = NULL; |
| 7169 | 7345 | ||
| 7170 | if (cpu == cpumask_first(sched_group_cpus(sg))) { | 7346 | if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref)) |
| 7171 | WARN_ON_ONCE(*per_cpu_ptr(sdd->sg, cpu) != sg); | ||
| 7172 | *per_cpu_ptr(sdd->sg, cpu) = NULL; | 7347 | *per_cpu_ptr(sdd->sg, cpu) = NULL; |
| 7173 | } | 7348 | |
| 7349 | if (atomic_read(&(*per_cpu_ptr(sdd->sgp, cpu))->ref)) | ||
| 7350 | *per_cpu_ptr(sdd->sgp, cpu) = NULL; | ||
| 7174 | } | 7351 | } |
| 7175 | 7352 | ||
| 7176 | #ifdef CONFIG_SCHED_SMT | 7353 | #ifdef CONFIG_SCHED_SMT |
| @@ -7195,7 +7372,7 @@ static struct sched_domain_topology_level default_topology[] = { | |||
| 7195 | #endif | 7372 | #endif |
| 7196 | { sd_init_CPU, cpu_cpu_mask, }, | 7373 | { sd_init_CPU, cpu_cpu_mask, }, |
| 7197 | #ifdef CONFIG_NUMA | 7374 | #ifdef CONFIG_NUMA |
| 7198 | { sd_init_NODE, cpu_node_mask, }, | 7375 | { sd_init_NODE, cpu_node_mask, SDTL_OVERLAP, }, |
| 7199 | { sd_init_ALLNODES, cpu_allnodes_mask, }, | 7376 | { sd_init_ALLNODES, cpu_allnodes_mask, }, |
| 7200 | #endif | 7377 | #endif |
| 7201 | { NULL, }, | 7378 | { NULL, }, |
| @@ -7219,9 +7396,14 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 7219 | if (!sdd->sg) | 7396 | if (!sdd->sg) |
| 7220 | return -ENOMEM; | 7397 | return -ENOMEM; |
| 7221 | 7398 | ||
| 7399 | sdd->sgp = alloc_percpu(struct sched_group_power *); | ||
| 7400 | if (!sdd->sgp) | ||
| 7401 | return -ENOMEM; | ||
| 7402 | |||
| 7222 | for_each_cpu(j, cpu_map) { | 7403 | for_each_cpu(j, cpu_map) { |
| 7223 | struct sched_domain *sd; | 7404 | struct sched_domain *sd; |
| 7224 | struct sched_group *sg; | 7405 | struct sched_group *sg; |
| 7406 | struct sched_group_power *sgp; | ||
| 7225 | 7407 | ||
| 7226 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), | 7408 | sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(), |
| 7227 | GFP_KERNEL, cpu_to_node(j)); | 7409 | GFP_KERNEL, cpu_to_node(j)); |
| @@ -7236,6 +7418,13 @@ static int __sdt_alloc(const struct cpumask *cpu_map) | |||
| 7236 | return -ENOMEM; | 7418 | return -ENOMEM; |
| 7237 | 7419 | ||
| 7238 | *per_cpu_ptr(sdd->sg, j) = sg; | 7420 | *per_cpu_ptr(sdd->sg, j) = sg; |
| 7421 | |||
| 7422 | sgp = kzalloc_node(sizeof(struct sched_group_power), | ||
| 7423 | GFP_KERNEL, cpu_to_node(j)); | ||
| 7424 | if (!sgp) | ||
| 7425 | return -ENOMEM; | ||
| 7426 | |||
| 7427 | *per_cpu_ptr(sdd->sgp, j) = sgp; | ||
| 7239 | } | 7428 | } |
| 7240 | } | 7429 | } |
| 7241 | 7430 | ||
| @@ -7251,11 +7440,15 @@ static void __sdt_free(const struct cpumask *cpu_map) | |||
| 7251 | struct sd_data *sdd = &tl->data; | 7440 | struct sd_data *sdd = &tl->data; |
| 7252 | 7441 | ||
| 7253 | for_each_cpu(j, cpu_map) { | 7442 | for_each_cpu(j, cpu_map) { |
| 7254 | kfree(*per_cpu_ptr(sdd->sd, j)); | 7443 | struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j); |
| 7444 | if (sd && (sd->flags & SD_OVERLAP)) | ||
| 7445 | free_sched_groups(sd->groups, 0); | ||
| 7255 | kfree(*per_cpu_ptr(sdd->sg, j)); | 7446 | kfree(*per_cpu_ptr(sdd->sg, j)); |
| 7447 | kfree(*per_cpu_ptr(sdd->sgp, j)); | ||
| 7256 | } | 7448 | } |
| 7257 | free_percpu(sdd->sd); | 7449 | free_percpu(sdd->sd); |
| 7258 | free_percpu(sdd->sg); | 7450 | free_percpu(sdd->sg); |
| 7451 | free_percpu(sdd->sgp); | ||
| 7259 | } | 7452 | } |
| 7260 | } | 7453 | } |
| 7261 | 7454 | ||
| @@ -7301,8 +7494,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
| 7301 | struct sched_domain_topology_level *tl; | 7494 | struct sched_domain_topology_level *tl; |
| 7302 | 7495 | ||
| 7303 | sd = NULL; | 7496 | sd = NULL; |
| 7304 | for (tl = sched_domain_topology; tl->init; tl++) | 7497 | for (tl = sched_domain_topology; tl->init; tl++) { |
| 7305 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); | 7498 | sd = build_sched_domain(tl, &d, cpu_map, attr, sd, i); |
| 7499 | if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) | ||
| 7500 | sd->flags |= SD_OVERLAP; | ||
| 7501 | if (cpumask_equal(cpu_map, sched_domain_span(sd))) | ||
| 7502 | break; | ||
| 7503 | } | ||
| 7306 | 7504 | ||
| 7307 | while (sd->child) | 7505 | while (sd->child) |
| 7308 | sd = sd->child; | 7506 | sd = sd->child; |
| @@ -7314,13 +7512,13 @@ static int build_sched_domains(const struct cpumask *cpu_map, | |||
| 7314 | for_each_cpu(i, cpu_map) { | 7512 | for_each_cpu(i, cpu_map) { |
| 7315 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { | 7513 | for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { |
| 7316 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); | 7514 | sd->span_weight = cpumask_weight(sched_domain_span(sd)); |
| 7317 | get_group(i, sd->private, &sd->groups); | 7515 | if (sd->flags & SD_OVERLAP) { |
| 7318 | atomic_inc(&sd->groups->ref); | 7516 | if (build_overlap_sched_groups(sd, i)) |
| 7319 | 7517 | goto error; | |
| 7320 | if (i != cpumask_first(sched_domain_span(sd))) | 7518 | } else { |
| 7321 | continue; | 7519 | if (build_sched_groups(sd, i)) |
| 7322 | 7520 | goto error; | |
| 7323 | build_sched_groups(sd); | 7521 | } |
| 7324 | } | 7522 | } |
| 7325 | } | 7523 | } |
| 7326 | 7524 | ||
| @@ -7730,18 +7928,14 @@ int in_sched_functions(unsigned long addr) | |||
| 7730 | && addr < (unsigned long)__sched_text_end); | 7928 | && addr < (unsigned long)__sched_text_end); |
| 7731 | } | 7929 | } |
| 7732 | 7930 | ||
| 7733 | static void init_cfs_rq(struct cfs_rq *cfs_rq, struct rq *rq) | 7931 | static void init_cfs_rq(struct cfs_rq *cfs_rq) |
| 7734 | { | 7932 | { |
| 7735 | cfs_rq->tasks_timeline = RB_ROOT; | 7933 | cfs_rq->tasks_timeline = RB_ROOT; |
| 7736 | INIT_LIST_HEAD(&cfs_rq->tasks); | 7934 | INIT_LIST_HEAD(&cfs_rq->tasks); |
| 7737 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
| 7738 | cfs_rq->rq = rq; | ||
| 7739 | /* allow initial update_cfs_load() to truncate */ | ||
| 7740 | #ifdef CONFIG_SMP | ||
| 7741 | cfs_rq->load_stamp = 1; | ||
| 7742 | #endif | ||
| 7743 | #endif | ||
| 7744 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); | 7935 | cfs_rq->min_vruntime = (u64)(-(1LL << 20)); |
| 7936 | #ifndef CONFIG_64BIT | ||
| 7937 | cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; | ||
| 7938 | #endif | ||
| 7745 | } | 7939 | } |
| 7746 | 7940 | ||
| 7747 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | 7941 | static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) |
| @@ -7757,27 +7951,18 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq) | |||
| 7757 | /* delimiter for bitsearch: */ | 7951 | /* delimiter for bitsearch: */ |
| 7758 | __set_bit(MAX_RT_PRIO, array->bitmap); | 7952 | __set_bit(MAX_RT_PRIO, array->bitmap); |
| 7759 | 7953 | ||
| 7760 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 7954 | #if defined CONFIG_SMP |
| 7761 | rt_rq->highest_prio.curr = MAX_RT_PRIO; | 7955 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
| 7762 | #ifdef CONFIG_SMP | ||
| 7763 | rt_rq->highest_prio.next = MAX_RT_PRIO; | 7956 | rt_rq->highest_prio.next = MAX_RT_PRIO; |
| 7764 | #endif | ||
| 7765 | #endif | ||
| 7766 | #ifdef CONFIG_SMP | ||
| 7767 | rt_rq->rt_nr_migratory = 0; | 7957 | rt_rq->rt_nr_migratory = 0; |
| 7768 | rt_rq->overloaded = 0; | 7958 | rt_rq->overloaded = 0; |
| 7769 | plist_head_init_raw(&rt_rq->pushable_tasks, &rq->lock); | 7959 | plist_head_init(&rt_rq->pushable_tasks); |
| 7770 | #endif | 7960 | #endif |
| 7771 | 7961 | ||
| 7772 | rt_rq->rt_time = 0; | 7962 | rt_rq->rt_time = 0; |
| 7773 | rt_rq->rt_throttled = 0; | 7963 | rt_rq->rt_throttled = 0; |
| 7774 | rt_rq->rt_runtime = 0; | 7964 | rt_rq->rt_runtime = 0; |
| 7775 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); | 7965 | raw_spin_lock_init(&rt_rq->rt_runtime_lock); |
| 7776 | |||
| 7777 | #ifdef CONFIG_RT_GROUP_SCHED | ||
| 7778 | rt_rq->rt_nr_boosted = 0; | ||
| 7779 | rt_rq->rq = rq; | ||
| 7780 | #endif | ||
| 7781 | } | 7966 | } |
| 7782 | 7967 | ||
| 7783 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7968 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| @@ -7786,11 +7971,17 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq, | |||
| 7786 | struct sched_entity *parent) | 7971 | struct sched_entity *parent) |
| 7787 | { | 7972 | { |
| 7788 | struct rq *rq = cpu_rq(cpu); | 7973 | struct rq *rq = cpu_rq(cpu); |
| 7789 | tg->cfs_rq[cpu] = cfs_rq; | 7974 | |
| 7790 | init_cfs_rq(cfs_rq, rq); | ||
| 7791 | cfs_rq->tg = tg; | 7975 | cfs_rq->tg = tg; |
| 7976 | cfs_rq->rq = rq; | ||
| 7977 | #ifdef CONFIG_SMP | ||
| 7978 | /* allow initial update_cfs_load() to truncate */ | ||
| 7979 | cfs_rq->load_stamp = 1; | ||
| 7980 | #endif | ||
| 7792 | 7981 | ||
| 7982 | tg->cfs_rq[cpu] = cfs_rq; | ||
| 7793 | tg->se[cpu] = se; | 7983 | tg->se[cpu] = se; |
| 7984 | |||
| 7794 | /* se could be NULL for root_task_group */ | 7985 | /* se could be NULL for root_task_group */ |
| 7795 | if (!se) | 7986 | if (!se) |
| 7796 | return; | 7987 | return; |
| @@ -7813,12 +8004,14 @@ static void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, | |||
| 7813 | { | 8004 | { |
| 7814 | struct rq *rq = cpu_rq(cpu); | 8005 | struct rq *rq = cpu_rq(cpu); |
| 7815 | 8006 | ||
| 7816 | tg->rt_rq[cpu] = rt_rq; | 8007 | rt_rq->highest_prio.curr = MAX_RT_PRIO; |
| 7817 | init_rt_rq(rt_rq, rq); | 8008 | rt_rq->rt_nr_boosted = 0; |
| 8009 | rt_rq->rq = rq; | ||
| 7818 | rt_rq->tg = tg; | 8010 | rt_rq->tg = tg; |
| 7819 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
| 7820 | 8011 | ||
| 8012 | tg->rt_rq[cpu] = rt_rq; | ||
| 7821 | tg->rt_se[cpu] = rt_se; | 8013 | tg->rt_se[cpu] = rt_se; |
| 8014 | |||
| 7822 | if (!rt_se) | 8015 | if (!rt_se) |
| 7823 | return; | 8016 | return; |
| 7824 | 8017 | ||
| @@ -7900,7 +8093,7 @@ void __init sched_init(void) | |||
| 7900 | rq->nr_running = 0; | 8093 | rq->nr_running = 0; |
| 7901 | rq->calc_load_active = 0; | 8094 | rq->calc_load_active = 0; |
| 7902 | rq->calc_load_update = jiffies + LOAD_FREQ; | 8095 | rq->calc_load_update = jiffies + LOAD_FREQ; |
| 7903 | init_cfs_rq(&rq->cfs, rq); | 8096 | init_cfs_rq(&rq->cfs); |
| 7904 | init_rt_rq(&rq->rt, rq); | 8097 | init_rt_rq(&rq->rt, rq); |
| 7905 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8098 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7906 | root_task_group.shares = root_task_group_load; | 8099 | root_task_group.shares = root_task_group_load; |
| @@ -7971,7 +8164,7 @@ void __init sched_init(void) | |||
| 7971 | #endif | 8164 | #endif |
| 7972 | 8165 | ||
| 7973 | #ifdef CONFIG_RT_MUTEXES | 8166 | #ifdef CONFIG_RT_MUTEXES |
| 7974 | plist_head_init_raw(&init_task.pi_waiters, &init_task.pi_lock); | 8167 | plist_head_init(&init_task.pi_waiters); |
| 7975 | #endif | 8168 | #endif |
| 7976 | 8169 | ||
| 7977 | /* | 8170 | /* |
| @@ -8014,7 +8207,7 @@ void __init sched_init(void) | |||
| 8014 | scheduler_running = 1; | 8207 | scheduler_running = 1; |
| 8015 | } | 8208 | } |
| 8016 | 8209 | ||
| 8017 | #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP | 8210 | #ifdef CONFIG_DEBUG_ATOMIC_SLEEP |
| 8018 | static inline int preempt_count_equals(int preempt_offset) | 8211 | static inline int preempt_count_equals(int preempt_offset) |
| 8019 | { | 8212 | { |
| 8020 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); | 8213 | int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); |
| @@ -8024,7 +8217,6 @@ static inline int preempt_count_equals(int preempt_offset) | |||
| 8024 | 8217 | ||
| 8025 | void __might_sleep(const char *file, int line, int preempt_offset) | 8218 | void __might_sleep(const char *file, int line, int preempt_offset) |
| 8026 | { | 8219 | { |
| 8027 | #ifdef in_atomic | ||
| 8028 | static unsigned long prev_jiffy; /* ratelimiting */ | 8220 | static unsigned long prev_jiffy; /* ratelimiting */ |
| 8029 | 8221 | ||
| 8030 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || | 8222 | if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || |
| @@ -8046,7 +8238,6 @@ void __might_sleep(const char *file, int line, int preempt_offset) | |||
| 8046 | if (irqs_disabled()) | 8238 | if (irqs_disabled()) |
| 8047 | print_irqtrace_events(current); | 8239 | print_irqtrace_events(current); |
| 8048 | dump_stack(); | 8240 | dump_stack(); |
| 8049 | #endif | ||
| 8050 | } | 8241 | } |
| 8051 | EXPORT_SYMBOL(__might_sleep); | 8242 | EXPORT_SYMBOL(__might_sleep); |
| 8052 | #endif | 8243 | #endif |
| @@ -8205,6 +8396,7 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8205 | if (!se) | 8396 | if (!se) |
| 8206 | goto err_free_rq; | 8397 | goto err_free_rq; |
| 8207 | 8398 | ||
| 8399 | init_cfs_rq(cfs_rq); | ||
| 8208 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); | 8400 | init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]); |
| 8209 | } | 8401 | } |
| 8210 | 8402 | ||
| @@ -8232,7 +8424,7 @@ static inline void unregister_fair_sched_group(struct task_group *tg, int cpu) | |||
| 8232 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); | 8424 | list_del_leaf_cfs_rq(tg->cfs_rq[cpu]); |
| 8233 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 8425 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
| 8234 | } | 8426 | } |
| 8235 | #else /* !CONFG_FAIR_GROUP_SCHED */ | 8427 | #else /* !CONFIG_FAIR_GROUP_SCHED */ |
| 8236 | static inline void free_fair_sched_group(struct task_group *tg) | 8428 | static inline void free_fair_sched_group(struct task_group *tg) |
| 8237 | { | 8429 | { |
| 8238 | } | 8430 | } |
| @@ -8253,7 +8445,8 @@ static void free_rt_sched_group(struct task_group *tg) | |||
| 8253 | { | 8445 | { |
| 8254 | int i; | 8446 | int i; |
| 8255 | 8447 | ||
| 8256 | destroy_rt_bandwidth(&tg->rt_bandwidth); | 8448 | if (tg->rt_se) |
| 8449 | destroy_rt_bandwidth(&tg->rt_bandwidth); | ||
| 8257 | 8450 | ||
| 8258 | for_each_possible_cpu(i) { | 8451 | for_each_possible_cpu(i) { |
| 8259 | if (tg->rt_rq) | 8452 | if (tg->rt_rq) |
| @@ -8294,6 +8487,8 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
| 8294 | if (!rt_se) | 8487 | if (!rt_se) |
| 8295 | goto err_free_rq; | 8488 | goto err_free_rq; |
| 8296 | 8489 | ||
| 8490 | init_rt_rq(rt_rq, cpu_rq(i)); | ||
| 8491 | rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; | ||
| 8297 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); | 8492 | init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); |
| 8298 | } | 8493 | } |
| 8299 | 8494 | ||
| @@ -8435,10 +8630,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
| 8435 | if (!tg->se[0]) | 8630 | if (!tg->se[0]) |
| 8436 | return -EINVAL; | 8631 | return -EINVAL; |
| 8437 | 8632 | ||
| 8438 | if (shares < MIN_SHARES) | 8633 | shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); |
| 8439 | shares = MIN_SHARES; | ||
| 8440 | else if (shares > MAX_SHARES) | ||
| 8441 | shares = MAX_SHARES; | ||
| 8442 | 8634 | ||
| 8443 | mutex_lock(&shares_mutex); | 8635 | mutex_lock(&shares_mutex); |
| 8444 | if (tg->shares == shares) | 8636 | if (tg->shares == shares) |
