diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 447 |
1 files changed, 380 insertions, 67 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 9fe3774a0fd3..8fb88a906aaa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/completion.h> | 39 | #include <linux/completion.h> |
40 | #include <linux/kernel_stat.h> | 40 | #include <linux/kernel_stat.h> |
41 | #include <linux/debug_locks.h> | 41 | #include <linux/debug_locks.h> |
42 | #include <linux/perf_counter.h> | ||
42 | #include <linux/security.h> | 43 | #include <linux/security.h> |
43 | #include <linux/notifier.h> | 44 | #include <linux/notifier.h> |
44 | #include <linux/profile.h> | 45 | #include <linux/profile.h> |
@@ -68,17 +69,18 @@ | |||
68 | #include <linux/pagemap.h> | 69 | #include <linux/pagemap.h> |
69 | #include <linux/hrtimer.h> | 70 | #include <linux/hrtimer.h> |
70 | #include <linux/tick.h> | 71 | #include <linux/tick.h> |
71 | #include <linux/bootmem.h> | ||
72 | #include <linux/debugfs.h> | 72 | #include <linux/debugfs.h> |
73 | #include <linux/ctype.h> | 73 | #include <linux/ctype.h> |
74 | #include <linux/ftrace.h> | 74 | #include <linux/ftrace.h> |
75 | #include <trace/sched.h> | ||
76 | 75 | ||
77 | #include <asm/tlb.h> | 76 | #include <asm/tlb.h> |
78 | #include <asm/irq_regs.h> | 77 | #include <asm/irq_regs.h> |
79 | 78 | ||
80 | #include "sched_cpupri.h" | 79 | #include "sched_cpupri.h" |
81 | 80 | ||
81 | #define CREATE_TRACE_POINTS | ||
82 | #include <trace/events/sched.h> | ||
83 | |||
82 | /* | 84 | /* |
83 | * Convert user-nice values [ -20 ... 0 ... 19 ] | 85 | * Convert user-nice values [ -20 ... 0 ... 19 ] |
84 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], | 86 | * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], |
@@ -118,12 +120,6 @@ | |||
118 | */ | 120 | */ |
119 | #define RUNTIME_INF ((u64)~0ULL) | 121 | #define RUNTIME_INF ((u64)~0ULL) |
120 | 122 | ||
121 | DEFINE_TRACE(sched_wait_task); | ||
122 | DEFINE_TRACE(sched_wakeup); | ||
123 | DEFINE_TRACE(sched_wakeup_new); | ||
124 | DEFINE_TRACE(sched_switch); | ||
125 | DEFINE_TRACE(sched_migrate_task); | ||
126 | |||
127 | #ifdef CONFIG_SMP | 123 | #ifdef CONFIG_SMP |
128 | 124 | ||
129 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | 125 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); |
@@ -584,6 +580,7 @@ struct rq { | |||
584 | struct load_weight load; | 580 | struct load_weight load; |
585 | unsigned long nr_load_updates; | 581 | unsigned long nr_load_updates; |
586 | u64 nr_switches; | 582 | u64 nr_switches; |
583 | u64 nr_migrations_in; | ||
587 | 584 | ||
588 | struct cfs_rq cfs; | 585 | struct cfs_rq cfs; |
589 | struct rt_rq rt; | 586 | struct rt_rq rt; |
@@ -630,6 +627,10 @@ struct rq { | |||
630 | struct list_head migration_queue; | 627 | struct list_head migration_queue; |
631 | #endif | 628 | #endif |
632 | 629 | ||
630 | /* calc_load related fields */ | ||
631 | unsigned long calc_load_update; | ||
632 | long calc_load_active; | ||
633 | |||
633 | #ifdef CONFIG_SCHED_HRTICK | 634 | #ifdef CONFIG_SCHED_HRTICK |
634 | #ifdef CONFIG_SMP | 635 | #ifdef CONFIG_SMP |
635 | int hrtick_csd_pending; | 636 | int hrtick_csd_pending; |
@@ -692,7 +693,7 @@ static inline int cpu_of(struct rq *rq) | |||
692 | #define task_rq(p) cpu_rq(task_cpu(p)) | 693 | #define task_rq(p) cpu_rq(task_cpu(p)) |
693 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) | 694 | #define cpu_curr(cpu) (cpu_rq(cpu)->curr) |
694 | 695 | ||
695 | static inline void update_rq_clock(struct rq *rq) | 696 | inline void update_rq_clock(struct rq *rq) |
696 | { | 697 | { |
697 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 698 | rq->clock = sched_clock_cpu(cpu_of(rq)); |
698 | } | 699 | } |
@@ -1728,6 +1729,8 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares) | |||
1728 | } | 1729 | } |
1729 | #endif | 1730 | #endif |
1730 | 1731 | ||
1732 | static void calc_load_account_active(struct rq *this_rq); | ||
1733 | |||
1731 | #include "sched_stats.h" | 1734 | #include "sched_stats.h" |
1732 | #include "sched_idletask.c" | 1735 | #include "sched_idletask.c" |
1733 | #include "sched_fair.c" | 1736 | #include "sched_fair.c" |
@@ -1958,7 +1961,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1958 | 1961 | ||
1959 | clock_offset = old_rq->clock - new_rq->clock; | 1962 | clock_offset = old_rq->clock - new_rq->clock; |
1960 | 1963 | ||
1961 | trace_sched_migrate_task(p, task_cpu(p), new_cpu); | 1964 | trace_sched_migrate_task(p, new_cpu); |
1962 | 1965 | ||
1963 | #ifdef CONFIG_SCHEDSTATS | 1966 | #ifdef CONFIG_SCHEDSTATS |
1964 | if (p->se.wait_start) | 1967 | if (p->se.wait_start) |
@@ -1967,12 +1970,16 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
1967 | p->se.sleep_start -= clock_offset; | 1970 | p->se.sleep_start -= clock_offset; |
1968 | if (p->se.block_start) | 1971 | if (p->se.block_start) |
1969 | p->se.block_start -= clock_offset; | 1972 | p->se.block_start -= clock_offset; |
1973 | #endif | ||
1970 | if (old_cpu != new_cpu) { | 1974 | if (old_cpu != new_cpu) { |
1971 | schedstat_inc(p, se.nr_migrations); | 1975 | p->se.nr_migrations++; |
1976 | new_rq->nr_migrations_in++; | ||
1977 | #ifdef CONFIG_SCHEDSTATS | ||
1972 | if (task_hot(p, old_rq->clock, NULL)) | 1978 | if (task_hot(p, old_rq->clock, NULL)) |
1973 | schedstat_inc(p, se.nr_forced2_migrations); | 1979 | schedstat_inc(p, se.nr_forced2_migrations); |
1974 | } | ||
1975 | #endif | 1980 | #endif |
1981 | perf_counter_task_migration(p, new_cpu); | ||
1982 | } | ||
1976 | p->se.vruntime -= old_cfsrq->min_vruntime - | 1983 | p->se.vruntime -= old_cfsrq->min_vruntime - |
1977 | new_cfsrq->min_vruntime; | 1984 | new_cfsrq->min_vruntime; |
1978 | 1985 | ||
@@ -2015,6 +2022,49 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) | |||
2015 | } | 2022 | } |
2016 | 2023 | ||
2017 | /* | 2024 | /* |
2025 | * wait_task_context_switch - wait for a thread to complete at least one | ||
2026 | * context switch. | ||
2027 | * | ||
2028 | * @p must not be current. | ||
2029 | */ | ||
2030 | void wait_task_context_switch(struct task_struct *p) | ||
2031 | { | ||
2032 | unsigned long nvcsw, nivcsw, flags; | ||
2033 | int running; | ||
2034 | struct rq *rq; | ||
2035 | |||
2036 | nvcsw = p->nvcsw; | ||
2037 | nivcsw = p->nivcsw; | ||
2038 | for (;;) { | ||
2039 | /* | ||
2040 | * The runqueue is assigned before the actual context | ||
2041 | * switch. We need to take the runqueue lock. | ||
2042 | * | ||
2043 | * We could check initially without the lock but it is | ||
2044 | * very likely that we need to take the lock in every | ||
2045 | * iteration. | ||
2046 | */ | ||
2047 | rq = task_rq_lock(p, &flags); | ||
2048 | running = task_running(rq, p); | ||
2049 | task_rq_unlock(rq, &flags); | ||
2050 | |||
2051 | if (likely(!running)) | ||
2052 | break; | ||
2053 | /* | ||
2054 | * The switch count is incremented before the actual | ||
2055 | * context switch. We thus wait for two switches to be | ||
2056 | * sure at least one completed. | ||
2057 | */ | ||
2058 | if ((p->nvcsw - nvcsw) > 1) | ||
2059 | break; | ||
2060 | if ((p->nivcsw - nivcsw) > 1) | ||
2061 | break; | ||
2062 | |||
2063 | cpu_relax(); | ||
2064 | } | ||
2065 | } | ||
2066 | |||
2067 | /* | ||
2018 | * wait_task_inactive - wait for a thread to unschedule. | 2068 | * wait_task_inactive - wait for a thread to unschedule. |
2019 | * | 2069 | * |
2020 | * If @match_state is nonzero, it's the @p->state value just checked and | 2070 | * If @match_state is nonzero, it's the @p->state value just checked and |
@@ -2142,6 +2192,7 @@ void kick_process(struct task_struct *p) | |||
2142 | smp_send_reschedule(cpu); | 2192 | smp_send_reschedule(cpu); |
2143 | preempt_enable(); | 2193 | preempt_enable(); |
2144 | } | 2194 | } |
2195 | EXPORT_SYMBOL_GPL(kick_process); | ||
2145 | 2196 | ||
2146 | /* | 2197 | /* |
2147 | * Return a low guess at the load of a migration-source cpu weighted | 2198 | * Return a low guess at the load of a migration-source cpu weighted |
@@ -2324,6 +2375,27 @@ static int sched_balance_self(int cpu, int flag) | |||
2324 | 2375 | ||
2325 | #endif /* CONFIG_SMP */ | 2376 | #endif /* CONFIG_SMP */ |
2326 | 2377 | ||
2378 | /** | ||
2379 | * task_oncpu_function_call - call a function on the cpu on which a task runs | ||
2380 | * @p: the task to evaluate | ||
2381 | * @func: the function to be called | ||
2382 | * @info: the function call argument | ||
2383 | * | ||
2384 | * Calls the function @func when the task is currently running. This might | ||
2385 | * be on the current CPU, which just calls the function directly | ||
2386 | */ | ||
2387 | void task_oncpu_function_call(struct task_struct *p, | ||
2388 | void (*func) (void *info), void *info) | ||
2389 | { | ||
2390 | int cpu; | ||
2391 | |||
2392 | preempt_disable(); | ||
2393 | cpu = task_cpu(p); | ||
2394 | if (task_curr(p)) | ||
2395 | smp_call_function_single(cpu, func, info, 1); | ||
2396 | preempt_enable(); | ||
2397 | } | ||
2398 | |||
2327 | /*** | 2399 | /*** |
2328 | * try_to_wake_up - wake up a thread | 2400 | * try_to_wake_up - wake up a thread |
2329 | * @p: the to-be-woken-up thread | 2401 | * @p: the to-be-woken-up thread |
@@ -2458,6 +2530,17 @@ out: | |||
2458 | return success; | 2530 | return success; |
2459 | } | 2531 | } |
2460 | 2532 | ||
2533 | /** | ||
2534 | * wake_up_process - Wake up a specific process | ||
2535 | * @p: The process to be woken up. | ||
2536 | * | ||
2537 | * Attempt to wake up the nominated process and move it to the set of runnable | ||
2538 | * processes. Returns 1 if the process was woken up, 0 if it was already | ||
2539 | * running. | ||
2540 | * | ||
2541 | * It may be assumed that this function implies a write memory barrier before | ||
2542 | * changing the task state if and only if any tasks are woken up. | ||
2543 | */ | ||
2461 | int wake_up_process(struct task_struct *p) | 2544 | int wake_up_process(struct task_struct *p) |
2462 | { | 2545 | { |
2463 | return try_to_wake_up(p, TASK_ALL, 0); | 2546 | return try_to_wake_up(p, TASK_ALL, 0); |
@@ -2480,6 +2563,7 @@ static void __sched_fork(struct task_struct *p) | |||
2480 | p->se.exec_start = 0; | 2563 | p->se.exec_start = 0; |
2481 | p->se.sum_exec_runtime = 0; | 2564 | p->se.sum_exec_runtime = 0; |
2482 | p->se.prev_sum_exec_runtime = 0; | 2565 | p->se.prev_sum_exec_runtime = 0; |
2566 | p->se.nr_migrations = 0; | ||
2483 | p->se.last_wakeup = 0; | 2567 | p->se.last_wakeup = 0; |
2484 | p->se.avg_overlap = 0; | 2568 | p->se.avg_overlap = 0; |
2485 | p->se.start_runtime = 0; | 2569 | p->se.start_runtime = 0; |
@@ -2710,6 +2794,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) | |||
2710 | */ | 2794 | */ |
2711 | prev_state = prev->state; | 2795 | prev_state = prev->state; |
2712 | finish_arch_switch(prev); | 2796 | finish_arch_switch(prev); |
2797 | perf_counter_task_sched_in(current, cpu_of(rq)); | ||
2713 | finish_lock_switch(rq, prev); | 2798 | finish_lock_switch(rq, prev); |
2714 | #ifdef CONFIG_SMP | 2799 | #ifdef CONFIG_SMP |
2715 | if (post_schedule) | 2800 | if (post_schedule) |
@@ -2766,7 +2851,7 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2766 | * combine the page table reload and the switch backend into | 2851 | * combine the page table reload and the switch backend into |
2767 | * one hypercall. | 2852 | * one hypercall. |
2768 | */ | 2853 | */ |
2769 | arch_enter_lazy_cpu_mode(); | 2854 | arch_start_context_switch(prev); |
2770 | 2855 | ||
2771 | if (unlikely(!mm)) { | 2856 | if (unlikely(!mm)) { |
2772 | next->active_mm = oldmm; | 2857 | next->active_mm = oldmm; |
@@ -2856,19 +2941,81 @@ unsigned long nr_iowait(void) | |||
2856 | return sum; | 2941 | return sum; |
2857 | } | 2942 | } |
2858 | 2943 | ||
2859 | unsigned long nr_active(void) | 2944 | /* Variables and functions for calc_load */ |
2945 | static atomic_long_t calc_load_tasks; | ||
2946 | static unsigned long calc_load_update; | ||
2947 | unsigned long avenrun[3]; | ||
2948 | EXPORT_SYMBOL(avenrun); | ||
2949 | |||
2950 | /** | ||
2951 | * get_avenrun - get the load average array | ||
2952 | * @loads: pointer to dest load array | ||
2953 | * @offset: offset to add | ||
2954 | * @shift: shift count to shift the result left | ||
2955 | * | ||
2956 | * These values are estimates at best, so no need for locking. | ||
2957 | */ | ||
2958 | void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | ||
2860 | { | 2959 | { |
2861 | unsigned long i, running = 0, uninterruptible = 0; | 2960 | loads[0] = (avenrun[0] + offset) << shift; |
2961 | loads[1] = (avenrun[1] + offset) << shift; | ||
2962 | loads[2] = (avenrun[2] + offset) << shift; | ||
2963 | } | ||
2862 | 2964 | ||
2863 | for_each_online_cpu(i) { | 2965 | static unsigned long |
2864 | running += cpu_rq(i)->nr_running; | 2966 | calc_load(unsigned long load, unsigned long exp, unsigned long active) |
2865 | uninterruptible += cpu_rq(i)->nr_uninterruptible; | 2967 | { |
2866 | } | 2968 | load *= exp; |
2969 | load += active * (FIXED_1 - exp); | ||
2970 | return load >> FSHIFT; | ||
2971 | } | ||
2867 | 2972 | ||
2868 | if (unlikely((long)uninterruptible < 0)) | 2973 | /* |
2869 | uninterruptible = 0; | 2974 | * calc_load - update the avenrun load estimates 10 ticks after the |
2975 | * CPUs have updated calc_load_tasks. | ||
2976 | */ | ||
2977 | void calc_global_load(void) | ||
2978 | { | ||
2979 | unsigned long upd = calc_load_update + 10; | ||
2980 | long active; | ||
2870 | 2981 | ||
2871 | return running + uninterruptible; | 2982 | if (time_before(jiffies, upd)) |
2983 | return; | ||
2984 | |||
2985 | active = atomic_long_read(&calc_load_tasks); | ||
2986 | active = active > 0 ? active * FIXED_1 : 0; | ||
2987 | |||
2988 | avenrun[0] = calc_load(avenrun[0], EXP_1, active); | ||
2989 | avenrun[1] = calc_load(avenrun[1], EXP_5, active); | ||
2990 | avenrun[2] = calc_load(avenrun[2], EXP_15, active); | ||
2991 | |||
2992 | calc_load_update += LOAD_FREQ; | ||
2993 | } | ||
2994 | |||
2995 | /* | ||
2996 | * Either called from update_cpu_load() or from a cpu going idle | ||
2997 | */ | ||
2998 | static void calc_load_account_active(struct rq *this_rq) | ||
2999 | { | ||
3000 | long nr_active, delta; | ||
3001 | |||
3002 | nr_active = this_rq->nr_running; | ||
3003 | nr_active += (long) this_rq->nr_uninterruptible; | ||
3004 | |||
3005 | if (nr_active != this_rq->calc_load_active) { | ||
3006 | delta = nr_active - this_rq->calc_load_active; | ||
3007 | this_rq->calc_load_active = nr_active; | ||
3008 | atomic_long_add(delta, &calc_load_tasks); | ||
3009 | } | ||
3010 | } | ||
3011 | |||
3012 | /* | ||
3013 | * Externally visible per-cpu scheduler statistics: | ||
3014 | * cpu_nr_migrations(cpu) - number of migrations into that cpu | ||
3015 | */ | ||
3016 | u64 cpu_nr_migrations(int cpu) | ||
3017 | { | ||
3018 | return cpu_rq(cpu)->nr_migrations_in; | ||
2872 | } | 3019 | } |
2873 | 3020 | ||
2874 | /* | 3021 | /* |
@@ -2899,6 +3046,11 @@ static void update_cpu_load(struct rq *this_rq) | |||
2899 | new_load += scale-1; | 3046 | new_load += scale-1; |
2900 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; | 3047 | this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i; |
2901 | } | 3048 | } |
3049 | |||
3050 | if (time_after_eq(jiffies, this_rq->calc_load_update)) { | ||
3051 | this_rq->calc_load_update += LOAD_FREQ; | ||
3052 | calc_load_account_active(this_rq); | ||
3053 | } | ||
2902 | } | 3054 | } |
2903 | 3055 | ||
2904 | #ifdef CONFIG_SMP | 3056 | #ifdef CONFIG_SMP |
@@ -4240,6 +4392,7 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
4240 | static struct { | 4392 | static struct { |
4241 | atomic_t load_balancer; | 4393 | atomic_t load_balancer; |
4242 | cpumask_var_t cpu_mask; | 4394 | cpumask_var_t cpu_mask; |
4395 | cpumask_var_t ilb_grp_nohz_mask; | ||
4243 | } nohz ____cacheline_aligned = { | 4396 | } nohz ____cacheline_aligned = { |
4244 | .load_balancer = ATOMIC_INIT(-1), | 4397 | .load_balancer = ATOMIC_INIT(-1), |
4245 | }; | 4398 | }; |
@@ -4249,6 +4402,121 @@ int get_nohz_load_balancer(void) | |||
4249 | return atomic_read(&nohz.load_balancer); | 4402 | return atomic_read(&nohz.load_balancer); |
4250 | } | 4403 | } |
4251 | 4404 | ||
4405 | #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) | ||
4406 | /** | ||
4407 | * lowest_flag_domain - Return lowest sched_domain containing flag. | ||
4408 | * @cpu: The cpu whose lowest level of sched domain is to | ||
4409 | * be returned. | ||
4410 | * @flag: The flag to check for the lowest sched_domain | ||
4411 | * for the given cpu. | ||
4412 | * | ||
4413 | * Returns the lowest sched_domain of a cpu which contains the given flag. | ||
4414 | */ | ||
4415 | static inline struct sched_domain *lowest_flag_domain(int cpu, int flag) | ||
4416 | { | ||
4417 | struct sched_domain *sd; | ||
4418 | |||
4419 | for_each_domain(cpu, sd) | ||
4420 | if (sd && (sd->flags & flag)) | ||
4421 | break; | ||
4422 | |||
4423 | return sd; | ||
4424 | } | ||
4425 | |||
4426 | /** | ||
4427 | * for_each_flag_domain - Iterates over sched_domains containing the flag. | ||
4428 | * @cpu: The cpu whose domains we're iterating over. | ||
4429 | * @sd: variable holding the value of the power_savings_sd | ||
4430 | * for cpu. | ||
4431 | * @flag: The flag to filter the sched_domains to be iterated. | ||
4432 | * | ||
4433 | * Iterates over all the scheduler domains for a given cpu that has the 'flag' | ||
4434 | * set, starting from the lowest sched_domain to the highest. | ||
4435 | */ | ||
4436 | #define for_each_flag_domain(cpu, sd, flag) \ | ||
4437 | for (sd = lowest_flag_domain(cpu, flag); \ | ||
4438 | (sd && (sd->flags & flag)); sd = sd->parent) | ||
4439 | |||
4440 | /** | ||
4441 | * is_semi_idle_group - Checks if the given sched_group is semi-idle. | ||
4442 | * @ilb_group: group to be checked for semi-idleness | ||
4443 | * | ||
4444 | * Returns: 1 if the group is semi-idle. 0 otherwise. | ||
4445 | * | ||
4446 | * We define a sched_group to be semi idle if it has atleast one idle-CPU | ||
4447 | * and atleast one non-idle CPU. This helper function checks if the given | ||
4448 | * sched_group is semi-idle or not. | ||
4449 | */ | ||
4450 | static inline int is_semi_idle_group(struct sched_group *ilb_group) | ||
4451 | { | ||
4452 | cpumask_and(nohz.ilb_grp_nohz_mask, nohz.cpu_mask, | ||
4453 | sched_group_cpus(ilb_group)); | ||
4454 | |||
4455 | /* | ||
4456 | * A sched_group is semi-idle when it has atleast one busy cpu | ||
4457 | * and atleast one idle cpu. | ||
4458 | */ | ||
4459 | if (cpumask_empty(nohz.ilb_grp_nohz_mask)) | ||
4460 | return 0; | ||
4461 | |||
4462 | if (cpumask_equal(nohz.ilb_grp_nohz_mask, sched_group_cpus(ilb_group))) | ||
4463 | return 0; | ||
4464 | |||
4465 | return 1; | ||
4466 | } | ||
4467 | /** | ||
4468 | * find_new_ilb - Finds the optimum idle load balancer for nomination. | ||
4469 | * @cpu: The cpu which is nominating a new idle_load_balancer. | ||
4470 | * | ||
4471 | * Returns: Returns the id of the idle load balancer if it exists, | ||
4472 | * Else, returns >= nr_cpu_ids. | ||
4473 | * | ||
4474 | * This algorithm picks the idle load balancer such that it belongs to a | ||
4475 | * semi-idle powersavings sched_domain. The idea is to try and avoid | ||
4476 | * completely idle packages/cores just for the purpose of idle load balancing | ||
4477 | * when there are other idle cpu's which are better suited for that job. | ||
4478 | */ | ||
4479 | static int find_new_ilb(int cpu) | ||
4480 | { | ||
4481 | struct sched_domain *sd; | ||
4482 | struct sched_group *ilb_group; | ||
4483 | |||
4484 | /* | ||
4485 | * Have idle load balancer selection from semi-idle packages only | ||
4486 | * when power-aware load balancing is enabled | ||
4487 | */ | ||
4488 | if (!(sched_smt_power_savings || sched_mc_power_savings)) | ||
4489 | goto out_done; | ||
4490 | |||
4491 | /* | ||
4492 | * Optimize for the case when we have no idle CPUs or only one | ||
4493 | * idle CPU. Don't walk the sched_domain hierarchy in such cases | ||
4494 | */ | ||
4495 | if (cpumask_weight(nohz.cpu_mask) < 2) | ||
4496 | goto out_done; | ||
4497 | |||
4498 | for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) { | ||
4499 | ilb_group = sd->groups; | ||
4500 | |||
4501 | do { | ||
4502 | if (is_semi_idle_group(ilb_group)) | ||
4503 | return cpumask_first(nohz.ilb_grp_nohz_mask); | ||
4504 | |||
4505 | ilb_group = ilb_group->next; | ||
4506 | |||
4507 | } while (ilb_group != sd->groups); | ||
4508 | } | ||
4509 | |||
4510 | out_done: | ||
4511 | return cpumask_first(nohz.cpu_mask); | ||
4512 | } | ||
4513 | #else /* (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */ | ||
4514 | static inline int find_new_ilb(int call_cpu) | ||
4515 | { | ||
4516 | return cpumask_first(nohz.cpu_mask); | ||
4517 | } | ||
4518 | #endif | ||
4519 | |||
4252 | /* | 4520 | /* |
4253 | * This routine will try to nominate the ilb (idle load balancing) | 4521 | * This routine will try to nominate the ilb (idle load balancing) |
4254 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle | 4522 | * owner among the cpus whose ticks are stopped. ilb owner will do the idle |
@@ -4303,8 +4571,24 @@ int select_nohz_load_balancer(int stop_tick) | |||
4303 | /* make me the ilb owner */ | 4571 | /* make me the ilb owner */ |
4304 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) | 4572 | if (atomic_cmpxchg(&nohz.load_balancer, -1, cpu) == -1) |
4305 | return 1; | 4573 | return 1; |
4306 | } else if (atomic_read(&nohz.load_balancer) == cpu) | 4574 | } else if (atomic_read(&nohz.load_balancer) == cpu) { |
4575 | int new_ilb; | ||
4576 | |||
4577 | if (!(sched_smt_power_savings || | ||
4578 | sched_mc_power_savings)) | ||
4579 | return 1; | ||
4580 | /* | ||
4581 | * Check to see if there is a more power-efficient | ||
4582 | * ilb. | ||
4583 | */ | ||
4584 | new_ilb = find_new_ilb(cpu); | ||
4585 | if (new_ilb < nr_cpu_ids && new_ilb != cpu) { | ||
4586 | atomic_set(&nohz.load_balancer, -1); | ||
4587 | resched_cpu(new_ilb); | ||
4588 | return 0; | ||
4589 | } | ||
4307 | return 1; | 4590 | return 1; |
4591 | } | ||
4308 | } else { | 4592 | } else { |
4309 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) | 4593 | if (!cpumask_test_cpu(cpu, nohz.cpu_mask)) |
4310 | return 0; | 4594 | return 0; |
@@ -4473,15 +4757,7 @@ static inline void trigger_load_balance(struct rq *rq, int cpu) | |||
4473 | } | 4757 | } |
4474 | 4758 | ||
4475 | if (atomic_read(&nohz.load_balancer) == -1) { | 4759 | if (atomic_read(&nohz.load_balancer) == -1) { |
4476 | /* | 4760 | int ilb = find_new_ilb(cpu); |
4477 | * simple selection for now: Nominate the | ||
4478 | * first cpu in the nohz list to be the next | ||
4479 | * ilb owner. | ||
4480 | * | ||
4481 | * TBD: Traverse the sched domains and nominate | ||
4482 | * the nearest cpu in the nohz.cpu_mask. | ||
4483 | */ | ||
4484 | int ilb = cpumask_first(nohz.cpu_mask); | ||
4485 | 4761 | ||
4486 | if (ilb < nr_cpu_ids) | 4762 | if (ilb < nr_cpu_ids) |
4487 | resched_cpu(ilb); | 4763 | resched_cpu(ilb); |
@@ -4737,7 +5013,7 @@ void account_process_tick(struct task_struct *p, int user_tick) | |||
4737 | 5013 | ||
4738 | if (user_tick) | 5014 | if (user_tick) |
4739 | account_user_time(p, one_jiffy, one_jiffy_scaled); | 5015 | account_user_time(p, one_jiffy, one_jiffy_scaled); |
4740 | else if (p != rq->idle) | 5016 | else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) |
4741 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, | 5017 | account_system_time(p, HARDIRQ_OFFSET, one_jiffy, |
4742 | one_jiffy_scaled); | 5018 | one_jiffy_scaled); |
4743 | else | 5019 | else |
@@ -4845,6 +5121,8 @@ void scheduler_tick(void) | |||
4845 | curr->sched_class->task_tick(rq, curr, 0); | 5121 | curr->sched_class->task_tick(rq, curr, 0); |
4846 | spin_unlock(&rq->lock); | 5122 | spin_unlock(&rq->lock); |
4847 | 5123 | ||
5124 | perf_counter_task_tick(curr, cpu); | ||
5125 | |||
4848 | #ifdef CONFIG_SMP | 5126 | #ifdef CONFIG_SMP |
4849 | rq->idle_at_tick = idle_cpu(cpu); | 5127 | rq->idle_at_tick = idle_cpu(cpu); |
4850 | trigger_load_balance(rq, cpu); | 5128 | trigger_load_balance(rq, cpu); |
@@ -5012,13 +5290,15 @@ pick_next_task(struct rq *rq) | |||
5012 | /* | 5290 | /* |
5013 | * schedule() is the main scheduler function. | 5291 | * schedule() is the main scheduler function. |
5014 | */ | 5292 | */ |
5015 | asmlinkage void __sched __schedule(void) | 5293 | asmlinkage void __sched schedule(void) |
5016 | { | 5294 | { |
5017 | struct task_struct *prev, *next; | 5295 | struct task_struct *prev, *next; |
5018 | unsigned long *switch_count; | 5296 | unsigned long *switch_count; |
5019 | struct rq *rq; | 5297 | struct rq *rq; |
5020 | int cpu; | 5298 | int cpu; |
5021 | 5299 | ||
5300 | need_resched: | ||
5301 | preempt_disable(); | ||
5022 | cpu = smp_processor_id(); | 5302 | cpu = smp_processor_id(); |
5023 | rq = cpu_rq(cpu); | 5303 | rq = cpu_rq(cpu); |
5024 | rcu_qsctr_inc(cpu); | 5304 | rcu_qsctr_inc(cpu); |
@@ -5058,6 +5338,7 @@ need_resched_nonpreemptible: | |||
5058 | 5338 | ||
5059 | if (likely(prev != next)) { | 5339 | if (likely(prev != next)) { |
5060 | sched_info_switch(prev, next); | 5340 | sched_info_switch(prev, next); |
5341 | perf_counter_task_sched_out(prev, next, cpu); | ||
5061 | 5342 | ||
5062 | rq->nr_switches++; | 5343 | rq->nr_switches++; |
5063 | rq->curr = next; | 5344 | rq->curr = next; |
@@ -5075,15 +5356,9 @@ need_resched_nonpreemptible: | |||
5075 | 5356 | ||
5076 | if (unlikely(reacquire_kernel_lock(current) < 0)) | 5357 | if (unlikely(reacquire_kernel_lock(current) < 0)) |
5077 | goto need_resched_nonpreemptible; | 5358 | goto need_resched_nonpreemptible; |
5078 | } | ||
5079 | 5359 | ||
5080 | asmlinkage void __sched schedule(void) | ||
5081 | { | ||
5082 | need_resched: | ||
5083 | preempt_disable(); | ||
5084 | __schedule(); | ||
5085 | preempt_enable_no_resched(); | 5360 | preempt_enable_no_resched(); |
5086 | if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) | 5361 | if (need_resched()) |
5087 | goto need_resched; | 5362 | goto need_resched; |
5088 | } | 5363 | } |
5089 | EXPORT_SYMBOL(schedule); | 5364 | EXPORT_SYMBOL(schedule); |
@@ -5226,7 +5501,7 @@ EXPORT_SYMBOL(default_wake_function); | |||
5226 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns | 5501 | * started to run but is not in state TASK_RUNNING. try_to_wake_up() returns |
5227 | * zero in this (rare) case, and we handle it by continuing to scan the queue. | 5502 | * zero in this (rare) case, and we handle it by continuing to scan the queue. |
5228 | */ | 5503 | */ |
5229 | void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | 5504 | static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, |
5230 | int nr_exclusive, int sync, void *key) | 5505 | int nr_exclusive, int sync, void *key) |
5231 | { | 5506 | { |
5232 | wait_queue_t *curr, *next; | 5507 | wait_queue_t *curr, *next; |
@@ -5246,6 +5521,9 @@ void __wake_up_common(wait_queue_head_t *q, unsigned int mode, | |||
5246 | * @mode: which threads | 5521 | * @mode: which threads |
5247 | * @nr_exclusive: how many wake-one or wake-many threads to wake up | 5522 | * @nr_exclusive: how many wake-one or wake-many threads to wake up |
5248 | * @key: is directly passed to the wakeup function | 5523 | * @key: is directly passed to the wakeup function |
5524 | * | ||
5525 | * It may be assumed that this function implies a write memory barrier before | ||
5526 | * changing the task state if and only if any tasks are woken up. | ||
5249 | */ | 5527 | */ |
5250 | void __wake_up(wait_queue_head_t *q, unsigned int mode, | 5528 | void __wake_up(wait_queue_head_t *q, unsigned int mode, |
5251 | int nr_exclusive, void *key) | 5529 | int nr_exclusive, void *key) |
@@ -5284,6 +5562,9 @@ void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key) | |||
5284 | * with each other. This can prevent needless bouncing between CPUs. | 5562 | * with each other. This can prevent needless bouncing between CPUs. |
5285 | * | 5563 | * |
5286 | * On UP it can prevent extra preemption. | 5564 | * On UP it can prevent extra preemption. |
5565 | * | ||
5566 | * It may be assumed that this function implies a write memory barrier before | ||
5567 | * changing the task state if and only if any tasks are woken up. | ||
5287 | */ | 5568 | */ |
5288 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, | 5569 | void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, |
5289 | int nr_exclusive, void *key) | 5570 | int nr_exclusive, void *key) |
@@ -5320,6 +5601,9 @@ EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ | |||
5320 | * awakened in the same order in which they were queued. | 5601 | * awakened in the same order in which they were queued. |
5321 | * | 5602 | * |
5322 | * See also complete_all(), wait_for_completion() and related routines. | 5603 | * See also complete_all(), wait_for_completion() and related routines. |
5604 | * | ||
5605 | * It may be assumed that this function implies a write memory barrier before | ||
5606 | * changing the task state if and only if any tasks are woken up. | ||
5323 | */ | 5607 | */ |
5324 | void complete(struct completion *x) | 5608 | void complete(struct completion *x) |
5325 | { | 5609 | { |
@@ -5337,6 +5621,9 @@ EXPORT_SYMBOL(complete); | |||
5337 | * @x: holds the state of this particular completion | 5621 | * @x: holds the state of this particular completion |
5338 | * | 5622 | * |
5339 | * This will wake up all threads waiting on this particular completion event. | 5623 | * This will wake up all threads waiting on this particular completion event. |
5624 | * | ||
5625 | * It may be assumed that this function implies a write memory barrier before | ||
5626 | * changing the task state if and only if any tasks are woken up. | ||
5340 | */ | 5627 | */ |
5341 | void complete_all(struct completion *x) | 5628 | void complete_all(struct completion *x) |
5342 | { | 5629 | { |
@@ -6495,8 +6782,9 @@ void sched_show_task(struct task_struct *p) | |||
6495 | #ifdef CONFIG_DEBUG_STACK_USAGE | 6782 | #ifdef CONFIG_DEBUG_STACK_USAGE |
6496 | free = stack_not_used(p); | 6783 | free = stack_not_used(p); |
6497 | #endif | 6784 | #endif |
6498 | printk(KERN_CONT "%5lu %5d %6d\n", free, | 6785 | printk(KERN_CONT "%5lu %5d %6d 0x%08lx\n", free, |
6499 | task_pid_nr(p), task_pid_nr(p->real_parent)); | 6786 | task_pid_nr(p), task_pid_nr(p->real_parent), |
6787 | (unsigned long)task_thread_info(p)->flags); | ||
6500 | 6788 | ||
6501 | show_stack(p, NULL); | 6789 | show_stack(p, NULL); |
6502 | } | 6790 | } |
@@ -6975,6 +7263,14 @@ static void migrate_dead_tasks(unsigned int dead_cpu) | |||
6975 | 7263 | ||
6976 | } | 7264 | } |
6977 | } | 7265 | } |
7266 | |||
7267 | /* | ||
7268 | * remove the tasks which were accounted by rq from calc_load_tasks. | ||
7269 | */ | ||
7270 | static void calc_global_load_remove(struct rq *rq) | ||
7271 | { | ||
7272 | atomic_long_sub(rq->calc_load_active, &calc_load_tasks); | ||
7273 | } | ||
6978 | #endif /* CONFIG_HOTPLUG_CPU */ | 7274 | #endif /* CONFIG_HOTPLUG_CPU */ |
6979 | 7275 | ||
6980 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) | 7276 | #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) |
@@ -7209,6 +7505,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7209 | /* Update our root-domain */ | 7505 | /* Update our root-domain */ |
7210 | rq = cpu_rq(cpu); | 7506 | rq = cpu_rq(cpu); |
7211 | spin_lock_irqsave(&rq->lock, flags); | 7507 | spin_lock_irqsave(&rq->lock, flags); |
7508 | rq->calc_load_update = calc_load_update; | ||
7509 | rq->calc_load_active = 0; | ||
7212 | if (rq->rd) { | 7510 | if (rq->rd) { |
7213 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); | 7511 | BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); |
7214 | 7512 | ||
@@ -7248,7 +7546,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7248 | cpuset_unlock(); | 7546 | cpuset_unlock(); |
7249 | migrate_nr_uninterruptible(rq); | 7547 | migrate_nr_uninterruptible(rq); |
7250 | BUG_ON(rq->nr_running != 0); | 7548 | BUG_ON(rq->nr_running != 0); |
7251 | 7549 | calc_global_load_remove(rq); | |
7252 | /* | 7550 | /* |
7253 | * No need to migrate the tasks: it was best-effort if | 7551 | * No need to migrate the tasks: it was best-effort if |
7254 | * they didn't take sched_hotcpu_mutex. Just wake up | 7552 | * they didn't take sched_hotcpu_mutex. Just wake up |
@@ -7284,8 +7582,10 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
7284 | return NOTIFY_OK; | 7582 | return NOTIFY_OK; |
7285 | } | 7583 | } |
7286 | 7584 | ||
7287 | /* Register at highest priority so that task migration (migrate_all_tasks) | 7585 | /* |
7288 | * happens before everything else. | 7586 | * Register at high priority so that task migration (migrate_all_tasks) |
7587 | * happens before everything else. This has to be lower priority than | ||
7588 | * the notifier in the perf_counter subsystem, though. | ||
7289 | */ | 7589 | */ |
7290 | static struct notifier_block __cpuinitdata migration_notifier = { | 7590 | static struct notifier_block __cpuinitdata migration_notifier = { |
7291 | .notifier_call = migration_call, | 7591 | .notifier_call = migration_call, |
@@ -7530,24 +7830,21 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) | |||
7530 | 7830 | ||
7531 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) | 7831 | static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) |
7532 | { | 7832 | { |
7833 | gfp_t gfp = GFP_KERNEL; | ||
7834 | |||
7533 | memset(rd, 0, sizeof(*rd)); | 7835 | memset(rd, 0, sizeof(*rd)); |
7534 | 7836 | ||
7535 | if (bootmem) { | 7837 | if (bootmem) |
7536 | alloc_bootmem_cpumask_var(&def_root_domain.span); | 7838 | gfp = GFP_NOWAIT; |
7537 | alloc_bootmem_cpumask_var(&def_root_domain.online); | ||
7538 | alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); | ||
7539 | cpupri_init(&rd->cpupri, true); | ||
7540 | return 0; | ||
7541 | } | ||
7542 | 7839 | ||
7543 | if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) | 7840 | if (!alloc_cpumask_var(&rd->span, gfp)) |
7544 | goto out; | 7841 | goto out; |
7545 | if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) | 7842 | if (!alloc_cpumask_var(&rd->online, gfp)) |
7546 | goto free_span; | 7843 | goto free_span; |
7547 | if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) | 7844 | if (!alloc_cpumask_var(&rd->rto_mask, gfp)) |
7548 | goto free_online; | 7845 | goto free_online; |
7549 | 7846 | ||
7550 | if (cpupri_init(&rd->cpupri, false) != 0) | 7847 | if (cpupri_init(&rd->cpupri, bootmem) != 0) |
7551 | goto free_rto_mask; | 7848 | goto free_rto_mask; |
7552 | return 0; | 7849 | return 0; |
7553 | 7850 | ||
@@ -7758,8 +8055,9 @@ int sched_smt_power_savings = 0, sched_mc_power_savings = 0; | |||
7758 | 8055 | ||
7759 | /* | 8056 | /* |
7760 | * The cpus mask in sched_group and sched_domain hangs off the end. | 8057 | * The cpus mask in sched_group and sched_domain hangs off the end. |
7761 | * FIXME: use cpumask_var_t or dynamic percpu alloc to avoid wasting space | 8058 | * |
7762 | * for nr_cpu_ids < CONFIG_NR_CPUS. | 8059 | * ( See the the comments in include/linux/sched.h:struct sched_group |
8060 | * and struct sched_domain. ) | ||
7763 | */ | 8061 | */ |
7764 | struct static_sched_group { | 8062 | struct static_sched_group { |
7765 | struct sched_group sg; | 8063 | struct sched_group sg; |
@@ -7880,7 +8178,7 @@ static void init_numa_sched_groups_power(struct sched_group *group_head) | |||
7880 | struct sched_domain *sd; | 8178 | struct sched_domain *sd; |
7881 | 8179 | ||
7882 | sd = &per_cpu(phys_domains, j).sd; | 8180 | sd = &per_cpu(phys_domains, j).sd; |
7883 | if (j != cpumask_first(sched_group_cpus(sd->groups))) { | 8181 | if (j != group_first_cpu(sd->groups)) { |
7884 | /* | 8182 | /* |
7885 | * Only add "power" once for each | 8183 | * Only add "power" once for each |
7886 | * physical package. | 8184 | * physical package. |
@@ -7958,7 +8256,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
7958 | 8256 | ||
7959 | WARN_ON(!sd || !sd->groups); | 8257 | WARN_ON(!sd || !sd->groups); |
7960 | 8258 | ||
7961 | if (cpu != cpumask_first(sched_group_cpus(sd->groups))) | 8259 | if (cpu != group_first_cpu(sd->groups)) |
7962 | return; | 8260 | return; |
7963 | 8261 | ||
7964 | child = sd->child; | 8262 | child = sd->child; |
@@ -8872,7 +9170,7 @@ void __init sched_init(void) | |||
8872 | * we use alloc_bootmem(). | 9170 | * we use alloc_bootmem(). |
8873 | */ | 9171 | */ |
8874 | if (alloc_size) { | 9172 | if (alloc_size) { |
8875 | ptr = (unsigned long)alloc_bootmem(alloc_size); | 9173 | ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); |
8876 | 9174 | ||
8877 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9175 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8878 | init_task_group.se = (struct sched_entity **)ptr; | 9176 | init_task_group.se = (struct sched_entity **)ptr; |
@@ -8945,6 +9243,8 @@ void __init sched_init(void) | |||
8945 | rq = cpu_rq(i); | 9243 | rq = cpu_rq(i); |
8946 | spin_lock_init(&rq->lock); | 9244 | spin_lock_init(&rq->lock); |
8947 | rq->nr_running = 0; | 9245 | rq->nr_running = 0; |
9246 | rq->calc_load_active = 0; | ||
9247 | rq->calc_load_update = jiffies + LOAD_FREQ; | ||
8948 | init_cfs_rq(&rq->cfs, rq); | 9248 | init_cfs_rq(&rq->cfs, rq); |
8949 | init_rt_rq(&rq->rt, rq); | 9249 | init_rt_rq(&rq->rt, rq); |
8950 | #ifdef CONFIG_FAIR_GROUP_SCHED | 9250 | #ifdef CONFIG_FAIR_GROUP_SCHED |
@@ -8965,7 +9265,7 @@ void __init sched_init(void) | |||
8965 | * 1024) and two child groups A0 and A1 (of weight 1024 each), | 9265 | * 1024) and two child groups A0 and A1 (of weight 1024 each), |
8966 | * then A0's share of the cpu resource is: | 9266 | * then A0's share of the cpu resource is: |
8967 | * | 9267 | * |
8968 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% | 9268 | * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% |
8969 | * | 9269 | * |
8970 | * We achieve this by letting init_task_group's tasks sit | 9270 | * We achieve this by letting init_task_group's tasks sit |
8971 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). | 9271 | * directly in rq->cfs (i.e init_task_group->se[] = NULL). |
@@ -9052,20 +9352,26 @@ void __init sched_init(void) | |||
9052 | * when this runqueue becomes "idle". | 9352 | * when this runqueue becomes "idle". |
9053 | */ | 9353 | */ |
9054 | init_idle(current, smp_processor_id()); | 9354 | init_idle(current, smp_processor_id()); |
9355 | |||
9356 | calc_load_update = jiffies + LOAD_FREQ; | ||
9357 | |||
9055 | /* | 9358 | /* |
9056 | * During early bootup we pretend to be a normal task: | 9359 | * During early bootup we pretend to be a normal task: |
9057 | */ | 9360 | */ |
9058 | current->sched_class = &fair_sched_class; | 9361 | current->sched_class = &fair_sched_class; |
9059 | 9362 | ||
9060 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ | 9363 | /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ |
9061 | alloc_bootmem_cpumask_var(&nohz_cpu_mask); | 9364 | alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); |
9062 | #ifdef CONFIG_SMP | 9365 | #ifdef CONFIG_SMP |
9063 | #ifdef CONFIG_NO_HZ | 9366 | #ifdef CONFIG_NO_HZ |
9064 | alloc_bootmem_cpumask_var(&nohz.cpu_mask); | 9367 | alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); |
9368 | alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); | ||
9065 | #endif | 9369 | #endif |
9066 | alloc_bootmem_cpumask_var(&cpu_isolated_map); | 9370 | alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); |
9067 | #endif /* SMP */ | 9371 | #endif /* SMP */ |
9068 | 9372 | ||
9373 | perf_counter_init(); | ||
9374 | |||
9069 | scheduler_running = 1; | 9375 | scheduler_running = 1; |
9070 | } | 9376 | } |
9071 | 9377 | ||
@@ -9807,6 +10113,13 @@ static int sched_rt_global_constraints(void) | |||
9807 | if (sysctl_sched_rt_period <= 0) | 10113 | if (sysctl_sched_rt_period <= 0) |
9808 | return -EINVAL; | 10114 | return -EINVAL; |
9809 | 10115 | ||
10116 | /* | ||
10117 | * There's always some RT tasks in the root group | ||
10118 | * -- migration, kstopmachine etc.. | ||
10119 | */ | ||
10120 | if (sysctl_sched_rt_runtime == 0) | ||
10121 | return -EBUSY; | ||
10122 | |||
9810 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); | 10123 | spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); |
9811 | for_each_possible_cpu(i) { | 10124 | for_each_possible_cpu(i) { |
9812 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; | 10125 | struct rt_rq *rt_rq = &cpu_rq(i)->rt; |