diff options
Diffstat (limited to 'kernel/sched.c')
-rw-r--r-- | kernel/sched.c | 557 |
1 files changed, 480 insertions, 77 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index d1e8889872a1..e6f8f1254319 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -426,9 +426,7 @@ struct root_domain { | |||
426 | */ | 426 | */ |
427 | cpumask_var_t rto_mask; | 427 | cpumask_var_t rto_mask; |
428 | atomic_t rto_count; | 428 | atomic_t rto_count; |
429 | #ifdef CONFIG_SMP | ||
430 | struct cpupri cpupri; | 429 | struct cpupri cpupri; |
431 | #endif | ||
432 | }; | 430 | }; |
433 | 431 | ||
434 | /* | 432 | /* |
@@ -437,7 +435,7 @@ struct root_domain { | |||
437 | */ | 435 | */ |
438 | static struct root_domain def_root_domain; | 436 | static struct root_domain def_root_domain; |
439 | 437 | ||
440 | #endif | 438 | #endif /* CONFIG_SMP */ |
441 | 439 | ||
442 | /* | 440 | /* |
443 | * This is the main, per-CPU runqueue data structure. | 441 | * This is the main, per-CPU runqueue data structure. |
@@ -488,11 +486,12 @@ struct rq { | |||
488 | */ | 486 | */ |
489 | unsigned long nr_uninterruptible; | 487 | unsigned long nr_uninterruptible; |
490 | 488 | ||
491 | struct task_struct *curr, *idle; | 489 | struct task_struct *curr, *idle, *stop; |
492 | unsigned long next_balance; | 490 | unsigned long next_balance; |
493 | struct mm_struct *prev_mm; | 491 | struct mm_struct *prev_mm; |
494 | 492 | ||
495 | u64 clock; | 493 | u64 clock; |
494 | u64 clock_task; | ||
496 | 495 | ||
497 | atomic_t nr_iowait; | 496 | atomic_t nr_iowait; |
498 | 497 | ||
@@ -520,6 +519,10 @@ struct rq { | |||
520 | u64 avg_idle; | 519 | u64 avg_idle; |
521 | #endif | 520 | #endif |
522 | 521 | ||
522 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
523 | u64 prev_irq_time; | ||
524 | #endif | ||
525 | |||
523 | /* calc_load related fields */ | 526 | /* calc_load related fields */ |
524 | unsigned long calc_load_update; | 527 | unsigned long calc_load_update; |
525 | long calc_load_active; | 528 | long calc_load_active; |
@@ -557,18 +560,8 @@ struct rq { | |||
557 | 560 | ||
558 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); | 561 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); |
559 | 562 | ||
560 | static inline | ||
561 | void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
562 | { | ||
563 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
564 | 563 | ||
565 | /* | 564 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags); |
566 | * A queue event has occurred, and we're going to schedule. In | ||
567 | * this case, we can save a useless back to back clock update. | ||
568 | */ | ||
569 | if (test_tsk_need_resched(p)) | ||
570 | rq->skip_clock_update = 1; | ||
571 | } | ||
572 | 565 | ||
573 | static inline int cpu_of(struct rq *rq) | 566 | static inline int cpu_of(struct rq *rq) |
574 | { | 567 | { |
@@ -643,10 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p) | |||
643 | 636 | ||
644 | #endif /* CONFIG_CGROUP_SCHED */ | 637 | #endif /* CONFIG_CGROUP_SCHED */ |
645 | 638 | ||
646 | inline void update_rq_clock(struct rq *rq) | 639 | static void update_rq_clock_task(struct rq *rq, s64 delta); |
640 | |||
641 | static void update_rq_clock(struct rq *rq) | ||
647 | { | 642 | { |
648 | if (!rq->skip_clock_update) | 643 | s64 delta; |
649 | rq->clock = sched_clock_cpu(cpu_of(rq)); | 644 | |
645 | if (rq->skip_clock_update) | ||
646 | return; | ||
647 | |||
648 | delta = sched_clock_cpu(cpu_of(rq)) - rq->clock; | ||
649 | rq->clock += delta; | ||
650 | update_rq_clock_task(rq, delta); | ||
650 | } | 651 | } |
651 | 652 | ||
652 | /* | 653 | /* |
@@ -723,7 +724,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
723 | size_t cnt, loff_t *ppos) | 724 | size_t cnt, loff_t *ppos) |
724 | { | 725 | { |
725 | char buf[64]; | 726 | char buf[64]; |
726 | char *cmp = buf; | 727 | char *cmp; |
727 | int neg = 0; | 728 | int neg = 0; |
728 | int i; | 729 | int i; |
729 | 730 | ||
@@ -734,6 +735,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
734 | return -EFAULT; | 735 | return -EFAULT; |
735 | 736 | ||
736 | buf[cnt] = 0; | 737 | buf[cnt] = 0; |
738 | cmp = strstrip(buf); | ||
737 | 739 | ||
738 | if (strncmp(buf, "NO_", 3) == 0) { | 740 | if (strncmp(buf, "NO_", 3) == 0) { |
739 | neg = 1; | 741 | neg = 1; |
@@ -741,9 +743,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf, | |||
741 | } | 743 | } |
742 | 744 | ||
743 | for (i = 0; sched_feat_names[i]; i++) { | 745 | for (i = 0; sched_feat_names[i]; i++) { |
744 | int len = strlen(sched_feat_names[i]); | 746 | if (strcmp(cmp, sched_feat_names[i]) == 0) { |
745 | |||
746 | if (strncmp(cmp, sched_feat_names[i], len) == 0) { | ||
747 | if (neg) | 747 | if (neg) |
748 | sysctl_sched_features &= ~(1UL << i); | 748 | sysctl_sched_features &= ~(1UL << i); |
749 | else | 749 | else |
@@ -1840,7 +1840,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu) | |||
1840 | 1840 | ||
1841 | static const struct sched_class rt_sched_class; | 1841 | static const struct sched_class rt_sched_class; |
1842 | 1842 | ||
1843 | #define sched_class_highest (&rt_sched_class) | 1843 | #define sched_class_highest (&stop_sched_class) |
1844 | #define for_each_class(class) \ | 1844 | #define for_each_class(class) \ |
1845 | for (class = sched_class_highest; class; class = class->next) | 1845 | for (class = sched_class_highest; class; class = class->next) |
1846 | 1846 | ||
@@ -1858,12 +1858,6 @@ static void dec_nr_running(struct rq *rq) | |||
1858 | 1858 | ||
1859 | static void set_load_weight(struct task_struct *p) | 1859 | static void set_load_weight(struct task_struct *p) |
1860 | { | 1860 | { |
1861 | if (task_has_rt_policy(p)) { | ||
1862 | p->se.load.weight = 0; | ||
1863 | p->se.load.inv_weight = WMULT_CONST; | ||
1864 | return; | ||
1865 | } | ||
1866 | |||
1867 | /* | 1861 | /* |
1868 | * SCHED_IDLE tasks get minimal weight: | 1862 | * SCHED_IDLE tasks get minimal weight: |
1869 | */ | 1863 | */ |
@@ -1917,13 +1911,193 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags) | |||
1917 | dec_nr_running(rq); | 1911 | dec_nr_running(rq); |
1918 | } | 1912 | } |
1919 | 1913 | ||
1914 | #ifdef CONFIG_IRQ_TIME_ACCOUNTING | ||
1915 | |||
1916 | /* | ||
1917 | * There are no locks covering percpu hardirq/softirq time. | ||
1918 | * They are only modified in account_system_vtime, on corresponding CPU | ||
1919 | * with interrupts disabled. So, writes are safe. | ||
1920 | * They are read and saved off onto struct rq in update_rq_clock(). | ||
1921 | * This may result in other CPU reading this CPU's irq time and can | ||
1922 | * race with irq/account_system_vtime on this CPU. We would either get old | ||
1923 | * or new value with a side effect of accounting a slice of irq time to wrong | ||
1924 | * task when irq is in progress while we read rq->clock. That is a worthy | ||
1925 | * compromise in place of having locks on each irq in account_system_time. | ||
1926 | */ | ||
1927 | static DEFINE_PER_CPU(u64, cpu_hardirq_time); | ||
1928 | static DEFINE_PER_CPU(u64, cpu_softirq_time); | ||
1929 | |||
1930 | static DEFINE_PER_CPU(u64, irq_start_time); | ||
1931 | static int sched_clock_irqtime; | ||
1932 | |||
1933 | void enable_sched_clock_irqtime(void) | ||
1934 | { | ||
1935 | sched_clock_irqtime = 1; | ||
1936 | } | ||
1937 | |||
1938 | void disable_sched_clock_irqtime(void) | ||
1939 | { | ||
1940 | sched_clock_irqtime = 0; | ||
1941 | } | ||
1942 | |||
1943 | #ifndef CONFIG_64BIT | ||
1944 | static DEFINE_PER_CPU(seqcount_t, irq_time_seq); | ||
1945 | |||
1946 | static inline void irq_time_write_begin(void) | ||
1947 | { | ||
1948 | __this_cpu_inc(irq_time_seq.sequence); | ||
1949 | smp_wmb(); | ||
1950 | } | ||
1951 | |||
1952 | static inline void irq_time_write_end(void) | ||
1953 | { | ||
1954 | smp_wmb(); | ||
1955 | __this_cpu_inc(irq_time_seq.sequence); | ||
1956 | } | ||
1957 | |||
1958 | static inline u64 irq_time_read(int cpu) | ||
1959 | { | ||
1960 | u64 irq_time; | ||
1961 | unsigned seq; | ||
1962 | |||
1963 | do { | ||
1964 | seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu)); | ||
1965 | irq_time = per_cpu(cpu_softirq_time, cpu) + | ||
1966 | per_cpu(cpu_hardirq_time, cpu); | ||
1967 | } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq)); | ||
1968 | |||
1969 | return irq_time; | ||
1970 | } | ||
1971 | #else /* CONFIG_64BIT */ | ||
1972 | static inline void irq_time_write_begin(void) | ||
1973 | { | ||
1974 | } | ||
1975 | |||
1976 | static inline void irq_time_write_end(void) | ||
1977 | { | ||
1978 | } | ||
1979 | |||
1980 | static inline u64 irq_time_read(int cpu) | ||
1981 | { | ||
1982 | return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu); | ||
1983 | } | ||
1984 | #endif /* CONFIG_64BIT */ | ||
1985 | |||
1986 | /* | ||
1987 | * Called before incrementing preempt_count on {soft,}irq_enter | ||
1988 | * and before decrementing preempt_count on {soft,}irq_exit. | ||
1989 | */ | ||
1990 | void account_system_vtime(struct task_struct *curr) | ||
1991 | { | ||
1992 | unsigned long flags; | ||
1993 | s64 delta; | ||
1994 | int cpu; | ||
1995 | |||
1996 | if (!sched_clock_irqtime) | ||
1997 | return; | ||
1998 | |||
1999 | local_irq_save(flags); | ||
2000 | |||
2001 | cpu = smp_processor_id(); | ||
2002 | delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); | ||
2003 | __this_cpu_add(irq_start_time, delta); | ||
2004 | |||
2005 | irq_time_write_begin(); | ||
2006 | /* | ||
2007 | * We do not account for softirq time from ksoftirqd here. | ||
2008 | * We want to continue accounting softirq time to ksoftirqd thread | ||
2009 | * in that case, so as not to confuse scheduler with a special task | ||
2010 | * that do not consume any time, but still wants to run. | ||
2011 | */ | ||
2012 | if (hardirq_count()) | ||
2013 | __this_cpu_add(cpu_hardirq_time, delta); | ||
2014 | else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD)) | ||
2015 | __this_cpu_add(cpu_softirq_time, delta); | ||
2016 | |||
2017 | irq_time_write_end(); | ||
2018 | local_irq_restore(flags); | ||
2019 | } | ||
2020 | EXPORT_SYMBOL_GPL(account_system_vtime); | ||
2021 | |||
2022 | static void update_rq_clock_task(struct rq *rq, s64 delta) | ||
2023 | { | ||
2024 | s64 irq_delta; | ||
2025 | |||
2026 | irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time; | ||
2027 | |||
2028 | /* | ||
2029 | * Since irq_time is only updated on {soft,}irq_exit, we might run into | ||
2030 | * this case when a previous update_rq_clock() happened inside a | ||
2031 | * {soft,}irq region. | ||
2032 | * | ||
2033 | * When this happens, we stop ->clock_task and only update the | ||
2034 | * prev_irq_time stamp to account for the part that fit, so that a next | ||
2035 | * update will consume the rest. This ensures ->clock_task is | ||
2036 | * monotonic. | ||
2037 | * | ||
2038 | * It does however cause some slight miss-attribution of {soft,}irq | ||
2039 | * time, a more accurate solution would be to update the irq_time using | ||
2040 | * the current rq->clock timestamp, except that would require using | ||
2041 | * atomic ops. | ||
2042 | */ | ||
2043 | if (irq_delta > delta) | ||
2044 | irq_delta = delta; | ||
2045 | |||
2046 | rq->prev_irq_time += irq_delta; | ||
2047 | delta -= irq_delta; | ||
2048 | rq->clock_task += delta; | ||
2049 | |||
2050 | if (irq_delta && sched_feat(NONIRQ_POWER)) | ||
2051 | sched_rt_avg_update(rq, irq_delta); | ||
2052 | } | ||
2053 | |||
2054 | #else /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
2055 | |||
2056 | static void update_rq_clock_task(struct rq *rq, s64 delta) | ||
2057 | { | ||
2058 | rq->clock_task += delta; | ||
2059 | } | ||
2060 | |||
2061 | #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ | ||
2062 | |||
1920 | #include "sched_idletask.c" | 2063 | #include "sched_idletask.c" |
1921 | #include "sched_fair.c" | 2064 | #include "sched_fair.c" |
1922 | #include "sched_rt.c" | 2065 | #include "sched_rt.c" |
2066 | #include "sched_stoptask.c" | ||
1923 | #ifdef CONFIG_SCHED_DEBUG | 2067 | #ifdef CONFIG_SCHED_DEBUG |
1924 | # include "sched_debug.c" | 2068 | # include "sched_debug.c" |
1925 | #endif | 2069 | #endif |
1926 | 2070 | ||
2071 | void sched_set_stop_task(int cpu, struct task_struct *stop) | ||
2072 | { | ||
2073 | struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 }; | ||
2074 | struct task_struct *old_stop = cpu_rq(cpu)->stop; | ||
2075 | |||
2076 | if (stop) { | ||
2077 | /* | ||
2078 | * Make it appear like a SCHED_FIFO task, its something | ||
2079 | * userspace knows about and won't get confused about. | ||
2080 | * | ||
2081 | * Also, it will make PI more or less work without too | ||
2082 | * much confusion -- but then, stop work should not | ||
2083 | * rely on PI working anyway. | ||
2084 | */ | ||
2085 | sched_setscheduler_nocheck(stop, SCHED_FIFO, ¶m); | ||
2086 | |||
2087 | stop->sched_class = &stop_sched_class; | ||
2088 | } | ||
2089 | |||
2090 | cpu_rq(cpu)->stop = stop; | ||
2091 | |||
2092 | if (old_stop) { | ||
2093 | /* | ||
2094 | * Reset it back to a normal scheduling class so that | ||
2095 | * it can die in pieces. | ||
2096 | */ | ||
2097 | old_stop->sched_class = &rt_sched_class; | ||
2098 | } | ||
2099 | } | ||
2100 | |||
1927 | /* | 2101 | /* |
1928 | * __normal_prio - return the priority that is based on the static prio | 2102 | * __normal_prio - return the priority that is based on the static prio |
1929 | */ | 2103 | */ |
@@ -1991,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p, | |||
1991 | p->sched_class->prio_changed(rq, p, oldprio, running); | 2165 | p->sched_class->prio_changed(rq, p, oldprio, running); |
1992 | } | 2166 | } |
1993 | 2167 | ||
2168 | static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) | ||
2169 | { | ||
2170 | const struct sched_class *class; | ||
2171 | |||
2172 | if (p->sched_class == rq->curr->sched_class) { | ||
2173 | rq->curr->sched_class->check_preempt_curr(rq, p, flags); | ||
2174 | } else { | ||
2175 | for_each_class(class) { | ||
2176 | if (class == rq->curr->sched_class) | ||
2177 | break; | ||
2178 | if (class == p->sched_class) { | ||
2179 | resched_task(rq->curr); | ||
2180 | break; | ||
2181 | } | ||
2182 | } | ||
2183 | } | ||
2184 | |||
2185 | /* | ||
2186 | * A queue event has occurred, and we're going to schedule. In | ||
2187 | * this case, we can save a useless back to back clock update. | ||
2188 | */ | ||
2189 | if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr)) | ||
2190 | rq->skip_clock_update = 1; | ||
2191 | } | ||
2192 | |||
1994 | #ifdef CONFIG_SMP | 2193 | #ifdef CONFIG_SMP |
1995 | /* | 2194 | /* |
1996 | * Is this task likely cache-hot: | 2195 | * Is this task likely cache-hot: |
@@ -2003,6 +2202,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd) | |||
2003 | if (p->sched_class != &fair_sched_class) | 2202 | if (p->sched_class != &fair_sched_class) |
2004 | return 0; | 2203 | return 0; |
2005 | 2204 | ||
2205 | if (unlikely(p->policy == SCHED_IDLE)) | ||
2206 | return 0; | ||
2207 | |||
2006 | /* | 2208 | /* |
2007 | * Buddy candidates are cache hot: | 2209 | * Buddy candidates are cache hot: |
2008 | */ | 2210 | */ |
@@ -2852,14 +3054,14 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
2852 | */ | 3054 | */ |
2853 | arch_start_context_switch(prev); | 3055 | arch_start_context_switch(prev); |
2854 | 3056 | ||
2855 | if (likely(!mm)) { | 3057 | if (!mm) { |
2856 | next->active_mm = oldmm; | 3058 | next->active_mm = oldmm; |
2857 | atomic_inc(&oldmm->mm_count); | 3059 | atomic_inc(&oldmm->mm_count); |
2858 | enter_lazy_tlb(oldmm, next); | 3060 | enter_lazy_tlb(oldmm, next); |
2859 | } else | 3061 | } else |
2860 | switch_mm(oldmm, mm, next); | 3062 | switch_mm(oldmm, mm, next); |
2861 | 3063 | ||
2862 | if (likely(!prev->mm)) { | 3064 | if (!prev->mm) { |
2863 | prev->active_mm = NULL; | 3065 | prev->active_mm = NULL; |
2864 | rq->prev_mm = oldmm; | 3066 | rq->prev_mm = oldmm; |
2865 | } | 3067 | } |
@@ -2974,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq) | |||
2974 | return delta; | 3176 | return delta; |
2975 | } | 3177 | } |
2976 | 3178 | ||
3179 | static unsigned long | ||
3180 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3181 | { | ||
3182 | load *= exp; | ||
3183 | load += active * (FIXED_1 - exp); | ||
3184 | load += 1UL << (FSHIFT - 1); | ||
3185 | return load >> FSHIFT; | ||
3186 | } | ||
3187 | |||
2977 | #ifdef CONFIG_NO_HZ | 3188 | #ifdef CONFIG_NO_HZ |
2978 | /* | 3189 | /* |
2979 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. | 3190 | * For NO_HZ we delay the active fold to the next LOAD_FREQ update. |
@@ -3003,6 +3214,128 @@ static long calc_load_fold_idle(void) | |||
3003 | 3214 | ||
3004 | return delta; | 3215 | return delta; |
3005 | } | 3216 | } |
3217 | |||
3218 | /** | ||
3219 | * fixed_power_int - compute: x^n, in O(log n) time | ||
3220 | * | ||
3221 | * @x: base of the power | ||
3222 | * @frac_bits: fractional bits of @x | ||
3223 | * @n: power to raise @x to. | ||
3224 | * | ||
3225 | * By exploiting the relation between the definition of the natural power | ||
3226 | * function: x^n := x*x*...*x (x multiplied by itself for n times), and | ||
3227 | * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i, | ||
3228 | * (where: n_i \elem {0, 1}, the binary vector representing n), | ||
3229 | * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is | ||
3230 | * of course trivially computable in O(log_2 n), the length of our binary | ||
3231 | * vector. | ||
3232 | */ | ||
3233 | static unsigned long | ||
3234 | fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n) | ||
3235 | { | ||
3236 | unsigned long result = 1UL << frac_bits; | ||
3237 | |||
3238 | if (n) for (;;) { | ||
3239 | if (n & 1) { | ||
3240 | result *= x; | ||
3241 | result += 1UL << (frac_bits - 1); | ||
3242 | result >>= frac_bits; | ||
3243 | } | ||
3244 | n >>= 1; | ||
3245 | if (!n) | ||
3246 | break; | ||
3247 | x *= x; | ||
3248 | x += 1UL << (frac_bits - 1); | ||
3249 | x >>= frac_bits; | ||
3250 | } | ||
3251 | |||
3252 | return result; | ||
3253 | } | ||
3254 | |||
3255 | /* | ||
3256 | * a1 = a0 * e + a * (1 - e) | ||
3257 | * | ||
3258 | * a2 = a1 * e + a * (1 - e) | ||
3259 | * = (a0 * e + a * (1 - e)) * e + a * (1 - e) | ||
3260 | * = a0 * e^2 + a * (1 - e) * (1 + e) | ||
3261 | * | ||
3262 | * a3 = a2 * e + a * (1 - e) | ||
3263 | * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e) | ||
3264 | * = a0 * e^3 + a * (1 - e) * (1 + e + e^2) | ||
3265 | * | ||
3266 | * ... | ||
3267 | * | ||
3268 | * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1] | ||
3269 | * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e) | ||
3270 | * = a0 * e^n + a * (1 - e^n) | ||
3271 | * | ||
3272 | * [1] application of the geometric series: | ||
3273 | * | ||
3274 | * n 1 - x^(n+1) | ||
3275 | * S_n := \Sum x^i = ------------- | ||
3276 | * i=0 1 - x | ||
3277 | */ | ||
3278 | static unsigned long | ||
3279 | calc_load_n(unsigned long load, unsigned long exp, | ||
3280 | unsigned long active, unsigned int n) | ||
3281 | { | ||
3282 | |||
3283 | return calc_load(load, fixed_power_int(exp, FSHIFT, n), active); | ||
3284 | } | ||
3285 | |||
3286 | /* | ||
3287 | * NO_HZ can leave us missing all per-cpu ticks calling | ||
3288 | * calc_load_account_active(), but since an idle CPU folds its delta into | ||
3289 | * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold | ||
3290 | * in the pending idle delta if our idle period crossed a load cycle boundary. | ||
3291 | * | ||
3292 | * Once we've updated the global active value, we need to apply the exponential | ||
3293 | * weights adjusted to the number of cycles missed. | ||
3294 | */ | ||
3295 | static void calc_global_nohz(unsigned long ticks) | ||
3296 | { | ||
3297 | long delta, active, n; | ||
3298 | |||
3299 | if (time_before(jiffies, calc_load_update)) | ||
3300 | return; | ||
3301 | |||
3302 | /* | ||
3303 | * If we crossed a calc_load_update boundary, make sure to fold | ||
3304 | * any pending idle changes, the respective CPUs might have | ||
3305 | * missed the tick driven calc_load_account_active() update | ||
3306 | * due to NO_HZ. | ||
3307 | */ | ||
3308 | delta = calc_load_fold_idle(); | ||
3309 | if (delta) | ||
3310 | atomic_long_add(delta, &calc_load_tasks); | ||
3311 | |||
3312 | /* | ||
3313 | * If we were idle for multiple load cycles, apply them. | ||
3314 | */ | ||
3315 | if (ticks >= LOAD_FREQ) { | ||
3316 | n = ticks / LOAD_FREQ; | ||
3317 | |||
3318 | active = atomic_long_read(&calc_load_tasks); | ||
3319 | active = active > 0 ? active * FIXED_1 : 0; | ||
3320 | |||
3321 | avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); | ||
3322 | avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); | ||
3323 | avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); | ||
3324 | |||
3325 | calc_load_update += n * LOAD_FREQ; | ||
3326 | } | ||
3327 | |||
3328 | /* | ||
3329 | * Its possible the remainder of the above division also crosses | ||
3330 | * a LOAD_FREQ period, the regular check in calc_global_load() | ||
3331 | * which comes after this will take care of that. | ||
3332 | * | ||
3333 | * Consider us being 11 ticks before a cycle completion, and us | ||
3334 | * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will | ||
3335 | * age us 4 cycles, and the test in calc_global_load() will | ||
3336 | * pick up the final one. | ||
3337 | */ | ||
3338 | } | ||
3006 | #else | 3339 | #else |
3007 | static void calc_load_account_idle(struct rq *this_rq) | 3340 | static void calc_load_account_idle(struct rq *this_rq) |
3008 | { | 3341 | { |
@@ -3012,6 +3345,10 @@ static inline long calc_load_fold_idle(void) | |||
3012 | { | 3345 | { |
3013 | return 0; | 3346 | return 0; |
3014 | } | 3347 | } |
3348 | |||
3349 | static void calc_global_nohz(unsigned long ticks) | ||
3350 | { | ||
3351 | } | ||
3015 | #endif | 3352 | #endif |
3016 | 3353 | ||
3017 | /** | 3354 | /** |
@@ -3029,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) | |||
3029 | loads[2] = (avenrun[2] + offset) << shift; | 3366 | loads[2] = (avenrun[2] + offset) << shift; |
3030 | } | 3367 | } |
3031 | 3368 | ||
3032 | static unsigned long | ||
3033 | calc_load(unsigned long load, unsigned long exp, unsigned long active) | ||
3034 | { | ||
3035 | load *= exp; | ||
3036 | load += active * (FIXED_1 - exp); | ||
3037 | return load >> FSHIFT; | ||
3038 | } | ||
3039 | |||
3040 | /* | 3369 | /* |
3041 | * calc_load - update the avenrun load estimates 10 ticks after the | 3370 | * calc_load - update the avenrun load estimates 10 ticks after the |
3042 | * CPUs have updated calc_load_tasks. | 3371 | * CPUs have updated calc_load_tasks. |
3043 | */ | 3372 | */ |
3044 | void calc_global_load(void) | 3373 | void calc_global_load(unsigned long ticks) |
3045 | { | 3374 | { |
3046 | unsigned long upd = calc_load_update + 10; | ||
3047 | long active; | 3375 | long active; |
3048 | 3376 | ||
3049 | if (time_before(jiffies, upd)) | 3377 | calc_global_nohz(ticks); |
3378 | |||
3379 | if (time_before(jiffies, calc_load_update + 10)) | ||
3050 | return; | 3380 | return; |
3051 | 3381 | ||
3052 | active = atomic_long_read(&calc_load_tasks); | 3382 | active = atomic_long_read(&calc_load_tasks); |
@@ -3248,7 +3578,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq) | |||
3248 | 3578 | ||
3249 | if (task_current(rq, p)) { | 3579 | if (task_current(rq, p)) { |
3250 | update_rq_clock(rq); | 3580 | update_rq_clock(rq); |
3251 | ns = rq->clock - p->se.exec_start; | 3581 | ns = rq->clock_task - p->se.exec_start; |
3252 | if ((s64)ns < 0) | 3582 | if ((s64)ns < 0) |
3253 | ns = 0; | 3583 | ns = 0; |
3254 | } | 3584 | } |
@@ -3397,7 +3727,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset, | |||
3397 | tmp = cputime_to_cputime64(cputime); | 3727 | tmp = cputime_to_cputime64(cputime); |
3398 | if (hardirq_count() - hardirq_offset) | 3728 | if (hardirq_count() - hardirq_offset) |
3399 | cpustat->irq = cputime64_add(cpustat->irq, tmp); | 3729 | cpustat->irq = cputime64_add(cpustat->irq, tmp); |
3400 | else if (softirq_count()) | 3730 | else if (in_serving_softirq()) |
3401 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); | 3731 | cpustat->softirq = cputime64_add(cpustat->softirq, tmp); |
3402 | else | 3732 | else |
3403 | cpustat->system = cputime64_add(cpustat->system, tmp); | 3733 | cpustat->system = cputime64_add(cpustat->system, tmp); |
@@ -3584,7 +3914,7 @@ void scheduler_tick(void) | |||
3584 | curr->sched_class->task_tick(rq, curr, 0); | 3914 | curr->sched_class->task_tick(rq, curr, 0); |
3585 | raw_spin_unlock(&rq->lock); | 3915 | raw_spin_unlock(&rq->lock); |
3586 | 3916 | ||
3587 | perf_event_task_tick(curr); | 3917 | perf_event_task_tick(); |
3588 | 3918 | ||
3589 | #ifdef CONFIG_SMP | 3919 | #ifdef CONFIG_SMP |
3590 | rq->idle_at_tick = idle_cpu(cpu); | 3920 | rq->idle_at_tick = idle_cpu(cpu); |
@@ -3700,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
3700 | { | 4030 | { |
3701 | if (prev->se.on_rq) | 4031 | if (prev->se.on_rq) |
3702 | update_rq_clock(rq); | 4032 | update_rq_clock(rq); |
3703 | rq->skip_clock_update = 0; | ||
3704 | prev->sched_class->put_prev_task(rq, prev); | 4033 | prev->sched_class->put_prev_task(rq, prev); |
3705 | } | 4034 | } |
3706 | 4035 | ||
@@ -3723,17 +4052,13 @@ pick_next_task(struct rq *rq) | |||
3723 | return p; | 4052 | return p; |
3724 | } | 4053 | } |
3725 | 4054 | ||
3726 | class = sched_class_highest; | 4055 | for_each_class(class) { |
3727 | for ( ; ; ) { | ||
3728 | p = class->pick_next_task(rq); | 4056 | p = class->pick_next_task(rq); |
3729 | if (p) | 4057 | if (p) |
3730 | return p; | 4058 | return p; |
3731 | /* | ||
3732 | * Will never be NULL as the idle class always | ||
3733 | * returns a non-NULL p: | ||
3734 | */ | ||
3735 | class = class->next; | ||
3736 | } | 4059 | } |
4060 | |||
4061 | BUG(); /* the idle class will always have a runnable task */ | ||
3737 | } | 4062 | } |
3738 | 4063 | ||
3739 | /* | 4064 | /* |
@@ -3762,7 +4087,6 @@ need_resched_nonpreemptible: | |||
3762 | hrtick_clear(rq); | 4087 | hrtick_clear(rq); |
3763 | 4088 | ||
3764 | raw_spin_lock_irq(&rq->lock); | 4089 | raw_spin_lock_irq(&rq->lock); |
3765 | clear_tsk_need_resched(prev); | ||
3766 | 4090 | ||
3767 | switch_count = &prev->nivcsw; | 4091 | switch_count = &prev->nivcsw; |
3768 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { | 4092 | if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { |
@@ -3794,6 +4118,8 @@ need_resched_nonpreemptible: | |||
3794 | 4118 | ||
3795 | put_prev_task(rq, prev); | 4119 | put_prev_task(rq, prev); |
3796 | next = pick_next_task(rq); | 4120 | next = pick_next_task(rq); |
4121 | clear_tsk_need_resched(prev); | ||
4122 | rq->skip_clock_update = 0; | ||
3797 | 4123 | ||
3798 | if (likely(prev != next)) { | 4124 | if (likely(prev != next)) { |
3799 | sched_info_switch(prev, next); | 4125 | sched_info_switch(prev, next); |
@@ -4358,6 +4684,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) | |||
4358 | 4684 | ||
4359 | rq = task_rq_lock(p, &flags); | 4685 | rq = task_rq_lock(p, &flags); |
4360 | 4686 | ||
4687 | trace_sched_pi_setprio(p, prio); | ||
4361 | oldprio = p->prio; | 4688 | oldprio = p->prio; |
4362 | prev_class = p->sched_class; | 4689 | prev_class = p->sched_class; |
4363 | on_rq = p->se.on_rq; | 4690 | on_rq = p->se.on_rq; |
@@ -4645,7 +4972,7 @@ recheck: | |||
4645 | } | 4972 | } |
4646 | 4973 | ||
4647 | if (user) { | 4974 | if (user) { |
4648 | retval = security_task_setscheduler(p, policy, param); | 4975 | retval = security_task_setscheduler(p); |
4649 | if (retval) | 4976 | if (retval) |
4650 | return retval; | 4977 | return retval; |
4651 | } | 4978 | } |
@@ -4661,6 +4988,15 @@ recheck: | |||
4661 | */ | 4988 | */ |
4662 | rq = __task_rq_lock(p); | 4989 | rq = __task_rq_lock(p); |
4663 | 4990 | ||
4991 | /* | ||
4992 | * Changing the policy of the stop threads its a very bad idea | ||
4993 | */ | ||
4994 | if (p == rq->stop) { | ||
4995 | __task_rq_unlock(rq); | ||
4996 | raw_spin_unlock_irqrestore(&p->pi_lock, flags); | ||
4997 | return -EINVAL; | ||
4998 | } | ||
4999 | |||
4664 | #ifdef CONFIG_RT_GROUP_SCHED | 5000 | #ifdef CONFIG_RT_GROUP_SCHED |
4665 | if (user) { | 5001 | if (user) { |
4666 | /* | 5002 | /* |
@@ -4887,13 +5223,13 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask) | |||
4887 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) | 5223 | if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) |
4888 | goto out_unlock; | 5224 | goto out_unlock; |
4889 | 5225 | ||
4890 | retval = security_task_setscheduler(p, 0, NULL); | 5226 | retval = security_task_setscheduler(p); |
4891 | if (retval) | 5227 | if (retval) |
4892 | goto out_unlock; | 5228 | goto out_unlock; |
4893 | 5229 | ||
4894 | cpuset_cpus_allowed(p, cpus_allowed); | 5230 | cpuset_cpus_allowed(p, cpus_allowed); |
4895 | cpumask_and(new_mask, in_mask, cpus_allowed); | 5231 | cpumask_and(new_mask, in_mask, cpus_allowed); |
4896 | again: | 5232 | again: |
4897 | retval = set_cpus_allowed_ptr(p, new_mask); | 5233 | retval = set_cpus_allowed_ptr(p, new_mask); |
4898 | 5234 | ||
4899 | if (!retval) { | 5235 | if (!retval) { |
@@ -6526,6 +6862,7 @@ struct s_data { | |||
6526 | cpumask_var_t nodemask; | 6862 | cpumask_var_t nodemask; |
6527 | cpumask_var_t this_sibling_map; | 6863 | cpumask_var_t this_sibling_map; |
6528 | cpumask_var_t this_core_map; | 6864 | cpumask_var_t this_core_map; |
6865 | cpumask_var_t this_book_map; | ||
6529 | cpumask_var_t send_covered; | 6866 | cpumask_var_t send_covered; |
6530 | cpumask_var_t tmpmask; | 6867 | cpumask_var_t tmpmask; |
6531 | struct sched_group **sched_group_nodes; | 6868 | struct sched_group **sched_group_nodes; |
@@ -6537,6 +6874,7 @@ enum s_alloc { | |||
6537 | sa_rootdomain, | 6874 | sa_rootdomain, |
6538 | sa_tmpmask, | 6875 | sa_tmpmask, |
6539 | sa_send_covered, | 6876 | sa_send_covered, |
6877 | sa_this_book_map, | ||
6540 | sa_this_core_map, | 6878 | sa_this_core_map, |
6541 | sa_this_sibling_map, | 6879 | sa_this_sibling_map, |
6542 | sa_nodemask, | 6880 | sa_nodemask, |
@@ -6572,31 +6910,48 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map, | |||
6572 | #ifdef CONFIG_SCHED_MC | 6910 | #ifdef CONFIG_SCHED_MC |
6573 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); | 6911 | static DEFINE_PER_CPU(struct static_sched_domain, core_domains); |
6574 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); | 6912 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); |
6575 | #endif /* CONFIG_SCHED_MC */ | ||
6576 | 6913 | ||
6577 | #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT) | ||
6578 | static int | 6914 | static int |
6579 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6915 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, |
6580 | struct sched_group **sg, struct cpumask *mask) | 6916 | struct sched_group **sg, struct cpumask *mask) |
6581 | { | 6917 | { |
6582 | int group; | 6918 | int group; |
6583 | 6919 | #ifdef CONFIG_SCHED_SMT | |
6584 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | 6920 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); |
6585 | group = cpumask_first(mask); | 6921 | group = cpumask_first(mask); |
6922 | #else | ||
6923 | group = cpu; | ||
6924 | #endif | ||
6586 | if (sg) | 6925 | if (sg) |
6587 | *sg = &per_cpu(sched_group_core, group).sg; | 6926 | *sg = &per_cpu(sched_group_core, group).sg; |
6588 | return group; | 6927 | return group; |
6589 | } | 6928 | } |
6590 | #elif defined(CONFIG_SCHED_MC) | 6929 | #endif /* CONFIG_SCHED_MC */ |
6930 | |||
6931 | /* | ||
6932 | * book sched-domains: | ||
6933 | */ | ||
6934 | #ifdef CONFIG_SCHED_BOOK | ||
6935 | static DEFINE_PER_CPU(struct static_sched_domain, book_domains); | ||
6936 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_book); | ||
6937 | |||
6591 | static int | 6938 | static int |
6592 | cpu_to_core_group(int cpu, const struct cpumask *cpu_map, | 6939 | cpu_to_book_group(int cpu, const struct cpumask *cpu_map, |
6593 | struct sched_group **sg, struct cpumask *unused) | 6940 | struct sched_group **sg, struct cpumask *mask) |
6594 | { | 6941 | { |
6942 | int group = cpu; | ||
6943 | #ifdef CONFIG_SCHED_MC | ||
6944 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | ||
6945 | group = cpumask_first(mask); | ||
6946 | #elif defined(CONFIG_SCHED_SMT) | ||
6947 | cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); | ||
6948 | group = cpumask_first(mask); | ||
6949 | #endif | ||
6595 | if (sg) | 6950 | if (sg) |
6596 | *sg = &per_cpu(sched_group_core, cpu).sg; | 6951 | *sg = &per_cpu(sched_group_book, group).sg; |
6597 | return cpu; | 6952 | return group; |
6598 | } | 6953 | } |
6599 | #endif | 6954 | #endif /* CONFIG_SCHED_BOOK */ |
6600 | 6955 | ||
6601 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); | 6956 | static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); |
6602 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); | 6957 | static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); |
@@ -6606,7 +6961,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map, | |||
6606 | struct sched_group **sg, struct cpumask *mask) | 6961 | struct sched_group **sg, struct cpumask *mask) |
6607 | { | 6962 | { |
6608 | int group; | 6963 | int group; |
6609 | #ifdef CONFIG_SCHED_MC | 6964 | #ifdef CONFIG_SCHED_BOOK |
6965 | cpumask_and(mask, cpu_book_mask(cpu), cpu_map); | ||
6966 | group = cpumask_first(mask); | ||
6967 | #elif defined(CONFIG_SCHED_MC) | ||
6610 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); | 6968 | cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); |
6611 | group = cpumask_first(mask); | 6969 | group = cpumask_first(mask); |
6612 | #elif defined(CONFIG_SCHED_SMT) | 6970 | #elif defined(CONFIG_SCHED_SMT) |
@@ -6802,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) | |||
6802 | if (cpu != group_first_cpu(sd->groups)) | 7160 | if (cpu != group_first_cpu(sd->groups)) |
6803 | return; | 7161 | return; |
6804 | 7162 | ||
7163 | sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups)); | ||
7164 | |||
6805 | child = sd->child; | 7165 | child = sd->child; |
6806 | 7166 | ||
6807 | sd->groups->cpu_power = 0; | 7167 | sd->groups->cpu_power = 0; |
@@ -6867,6 +7227,9 @@ SD_INIT_FUNC(CPU) | |||
6867 | #ifdef CONFIG_SCHED_MC | 7227 | #ifdef CONFIG_SCHED_MC |
6868 | SD_INIT_FUNC(MC) | 7228 | SD_INIT_FUNC(MC) |
6869 | #endif | 7229 | #endif |
7230 | #ifdef CONFIG_SCHED_BOOK | ||
7231 | SD_INIT_FUNC(BOOK) | ||
7232 | #endif | ||
6870 | 7233 | ||
6871 | static int default_relax_domain_level = -1; | 7234 | static int default_relax_domain_level = -1; |
6872 | 7235 | ||
@@ -6916,6 +7279,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what, | |||
6916 | free_cpumask_var(d->tmpmask); /* fall through */ | 7279 | free_cpumask_var(d->tmpmask); /* fall through */ |
6917 | case sa_send_covered: | 7280 | case sa_send_covered: |
6918 | free_cpumask_var(d->send_covered); /* fall through */ | 7281 | free_cpumask_var(d->send_covered); /* fall through */ |
7282 | case sa_this_book_map: | ||
7283 | free_cpumask_var(d->this_book_map); /* fall through */ | ||
6919 | case sa_this_core_map: | 7284 | case sa_this_core_map: |
6920 | free_cpumask_var(d->this_core_map); /* fall through */ | 7285 | free_cpumask_var(d->this_core_map); /* fall through */ |
6921 | case sa_this_sibling_map: | 7286 | case sa_this_sibling_map: |
@@ -6962,8 +7327,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d, | |||
6962 | return sa_nodemask; | 7327 | return sa_nodemask; |
6963 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) | 7328 | if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) |
6964 | return sa_this_sibling_map; | 7329 | return sa_this_sibling_map; |
6965 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | 7330 | if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL)) |
6966 | return sa_this_core_map; | 7331 | return sa_this_core_map; |
7332 | if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) | ||
7333 | return sa_this_book_map; | ||
6967 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) | 7334 | if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) |
6968 | return sa_send_covered; | 7335 | return sa_send_covered; |
6969 | d->rd = alloc_rootdomain(); | 7336 | d->rd = alloc_rootdomain(); |
@@ -7021,6 +7388,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d, | |||
7021 | return sd; | 7388 | return sd; |
7022 | } | 7389 | } |
7023 | 7390 | ||
7391 | static struct sched_domain *__build_book_sched_domain(struct s_data *d, | ||
7392 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | ||
7393 | struct sched_domain *parent, int i) | ||
7394 | { | ||
7395 | struct sched_domain *sd = parent; | ||
7396 | #ifdef CONFIG_SCHED_BOOK | ||
7397 | sd = &per_cpu(book_domains, i).sd; | ||
7398 | SD_INIT(sd, BOOK); | ||
7399 | set_domain_attribute(sd, attr); | ||
7400 | cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i)); | ||
7401 | sd->parent = parent; | ||
7402 | parent->child = sd; | ||
7403 | cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask); | ||
7404 | #endif | ||
7405 | return sd; | ||
7406 | } | ||
7407 | |||
7024 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, | 7408 | static struct sched_domain *__build_mc_sched_domain(struct s_data *d, |
7025 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, | 7409 | const struct cpumask *cpu_map, struct sched_domain_attr *attr, |
7026 | struct sched_domain *parent, int i) | 7410 | struct sched_domain *parent, int i) |
@@ -7078,6 +7462,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l, | |||
7078 | d->send_covered, d->tmpmask); | 7462 | d->send_covered, d->tmpmask); |
7079 | break; | 7463 | break; |
7080 | #endif | 7464 | #endif |
7465 | #ifdef CONFIG_SCHED_BOOK | ||
7466 | case SD_LV_BOOK: /* set up book groups */ | ||
7467 | cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu)); | ||
7468 | if (cpu == cpumask_first(d->this_book_map)) | ||
7469 | init_sched_build_groups(d->this_book_map, cpu_map, | ||
7470 | &cpu_to_book_group, | ||
7471 | d->send_covered, d->tmpmask); | ||
7472 | break; | ||
7473 | #endif | ||
7081 | case SD_LV_CPU: /* set up physical groups */ | 7474 | case SD_LV_CPU: /* set up physical groups */ |
7082 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); | 7475 | cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); |
7083 | if (!cpumask_empty(d->nodemask)) | 7476 | if (!cpumask_empty(d->nodemask)) |
@@ -7125,12 +7518,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7125 | 7518 | ||
7126 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); | 7519 | sd = __build_numa_sched_domains(&d, cpu_map, attr, i); |
7127 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); | 7520 | sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); |
7521 | sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i); | ||
7128 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); | 7522 | sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); |
7129 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); | 7523 | sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); |
7130 | } | 7524 | } |
7131 | 7525 | ||
7132 | for_each_cpu(i, cpu_map) { | 7526 | for_each_cpu(i, cpu_map) { |
7133 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); | 7527 | build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); |
7528 | build_sched_groups(&d, SD_LV_BOOK, cpu_map, i); | ||
7134 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); | 7529 | build_sched_groups(&d, SD_LV_MC, cpu_map, i); |
7135 | } | 7530 | } |
7136 | 7531 | ||
@@ -7161,6 +7556,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7161 | init_sched_groups_power(i, sd); | 7556 | init_sched_groups_power(i, sd); |
7162 | } | 7557 | } |
7163 | #endif | 7558 | #endif |
7559 | #ifdef CONFIG_SCHED_BOOK | ||
7560 | for_each_cpu(i, cpu_map) { | ||
7561 | sd = &per_cpu(book_domains, i).sd; | ||
7562 | init_sched_groups_power(i, sd); | ||
7563 | } | ||
7564 | #endif | ||
7164 | 7565 | ||
7165 | for_each_cpu(i, cpu_map) { | 7566 | for_each_cpu(i, cpu_map) { |
7166 | sd = &per_cpu(phys_domains, i).sd; | 7567 | sd = &per_cpu(phys_domains, i).sd; |
@@ -7186,6 +7587,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map, | |||
7186 | sd = &per_cpu(cpu_domains, i).sd; | 7587 | sd = &per_cpu(cpu_domains, i).sd; |
7187 | #elif defined(CONFIG_SCHED_MC) | 7588 | #elif defined(CONFIG_SCHED_MC) |
7188 | sd = &per_cpu(core_domains, i).sd; | 7589 | sd = &per_cpu(core_domains, i).sd; |
7590 | #elif defined(CONFIG_SCHED_BOOK) | ||
7591 | sd = &per_cpu(book_domains, i).sd; | ||
7189 | #else | 7592 | #else |
7190 | sd = &per_cpu(phys_domains, i).sd; | 7593 | sd = &per_cpu(phys_domains, i).sd; |
7191 | #endif | 7594 | #endif |
@@ -8090,9 +8493,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent) | |||
8090 | 8493 | ||
8091 | return 1; | 8494 | return 1; |
8092 | 8495 | ||
8093 | err_free_rq: | 8496 | err_free_rq: |
8094 | kfree(cfs_rq); | 8497 | kfree(cfs_rq); |
8095 | err: | 8498 | err: |
8096 | return 0; | 8499 | return 0; |
8097 | } | 8500 | } |
8098 | 8501 | ||
@@ -8180,9 +8583,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) | |||
8180 | 8583 | ||
8181 | return 1; | 8584 | return 1; |
8182 | 8585 | ||
8183 | err_free_rq: | 8586 | err_free_rq: |
8184 | kfree(rt_rq); | 8587 | kfree(rt_rq); |
8185 | err: | 8588 | err: |
8186 | return 0; | 8589 | return 0; |
8187 | } | 8590 | } |
8188 | 8591 | ||
@@ -8309,12 +8712,12 @@ void sched_move_task(struct task_struct *tsk) | |||
8309 | if (unlikely(running)) | 8712 | if (unlikely(running)) |
8310 | tsk->sched_class->put_prev_task(rq, tsk); | 8713 | tsk->sched_class->put_prev_task(rq, tsk); |
8311 | 8714 | ||
8312 | set_task_rq(tsk, task_cpu(tsk)); | ||
8313 | |||
8314 | #ifdef CONFIG_FAIR_GROUP_SCHED | 8715 | #ifdef CONFIG_FAIR_GROUP_SCHED |
8315 | if (tsk->sched_class->moved_group) | 8716 | if (tsk->sched_class->task_move_group) |
8316 | tsk->sched_class->moved_group(tsk, on_rq); | 8717 | tsk->sched_class->task_move_group(tsk, on_rq); |
8718 | else | ||
8317 | #endif | 8719 | #endif |
8720 | set_task_rq(tsk, task_cpu(tsk)); | ||
8318 | 8721 | ||
8319 | if (unlikely(running)) | 8722 | if (unlikely(running)) |
8320 | tsk->sched_class->set_curr_task(rq); | 8723 | tsk->sched_class->set_curr_task(rq); |
@@ -8540,7 +8943,7 @@ static int tg_set_bandwidth(struct task_group *tg, | |||
8540 | raw_spin_unlock(&rt_rq->rt_runtime_lock); | 8943 | raw_spin_unlock(&rt_rq->rt_runtime_lock); |
8541 | } | 8944 | } |
8542 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); | 8945 | raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); |
8543 | unlock: | 8946 | unlock: |
8544 | read_unlock(&tasklist_lock); | 8947 | read_unlock(&tasklist_lock); |
8545 | mutex_unlock(&rt_constraints_mutex); | 8948 | mutex_unlock(&rt_constraints_mutex); |
8546 | 8949 | ||