aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched.c')
-rw-r--r--kernel/sched.c593
1 files changed, 511 insertions, 82 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 41541d79e3c8..297d1a0eedb0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -426,9 +426,7 @@ struct root_domain {
426 */ 426 */
427 cpumask_var_t rto_mask; 427 cpumask_var_t rto_mask;
428 atomic_t rto_count; 428 atomic_t rto_count;
429#ifdef CONFIG_SMP
430 struct cpupri cpupri; 429 struct cpupri cpupri;
431#endif
432}; 430};
433 431
434/* 432/*
@@ -437,7 +435,7 @@ struct root_domain {
437 */ 435 */
438static struct root_domain def_root_domain; 436static struct root_domain def_root_domain;
439 437
440#endif 438#endif /* CONFIG_SMP */
441 439
442/* 440/*
443 * This is the main, per-CPU runqueue data structure. 441 * This is the main, per-CPU runqueue data structure.
@@ -488,11 +486,12 @@ struct rq {
488 */ 486 */
489 unsigned long nr_uninterruptible; 487 unsigned long nr_uninterruptible;
490 488
491 struct task_struct *curr, *idle; 489 struct task_struct *curr, *idle, *stop;
492 unsigned long next_balance; 490 unsigned long next_balance;
493 struct mm_struct *prev_mm; 491 struct mm_struct *prev_mm;
494 492
495 u64 clock; 493 u64 clock;
494 u64 clock_task;
496 495
497 atomic_t nr_iowait; 496 atomic_t nr_iowait;
498 497
@@ -520,6 +519,10 @@ struct rq {
520 u64 avg_idle; 519 u64 avg_idle;
521#endif 520#endif
522 521
522#ifdef CONFIG_IRQ_TIME_ACCOUNTING
523 u64 prev_irq_time;
524#endif
525
523 /* calc_load related fields */ 526 /* calc_load related fields */
524 unsigned long calc_load_update; 527 unsigned long calc_load_update;
525 long calc_load_active; 528 long calc_load_active;
@@ -557,18 +560,8 @@ struct rq {
557 560
558static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); 561static DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
559 562
560static inline
561void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
562{
563 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
564 563
565 /* 564static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
566 * A queue event has occurred, and we're going to schedule. In
567 * this case, we can save a useless back to back clock update.
568 */
569 if (test_tsk_need_resched(p))
570 rq->skip_clock_update = 1;
571}
572 565
573static inline int cpu_of(struct rq *rq) 566static inline int cpu_of(struct rq *rq)
574{ 567{
@@ -643,10 +636,18 @@ static inline struct task_group *task_group(struct task_struct *p)
643 636
644#endif /* CONFIG_CGROUP_SCHED */ 637#endif /* CONFIG_CGROUP_SCHED */
645 638
646inline void update_rq_clock(struct rq *rq) 639static void update_rq_clock_task(struct rq *rq, s64 delta);
640
641static void update_rq_clock(struct rq *rq)
647{ 642{
648 if (!rq->skip_clock_update) 643 s64 delta;
649 rq->clock = sched_clock_cpu(cpu_of(rq)); 644
645 if (rq->skip_clock_update)
646 return;
647
648 delta = sched_clock_cpu(cpu_of(rq)) - rq->clock;
649 rq->clock += delta;
650 update_rq_clock_task(rq, delta);
650} 651}
651 652
652/* 653/*
@@ -723,7 +724,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
723 size_t cnt, loff_t *ppos) 724 size_t cnt, loff_t *ppos)
724{ 725{
725 char buf[64]; 726 char buf[64];
726 char *cmp = buf; 727 char *cmp;
727 int neg = 0; 728 int neg = 0;
728 int i; 729 int i;
729 730
@@ -734,6 +735,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
734 return -EFAULT; 735 return -EFAULT;
735 736
736 buf[cnt] = 0; 737 buf[cnt] = 0;
738 cmp = strstrip(buf);
737 739
738 if (strncmp(buf, "NO_", 3) == 0) { 740 if (strncmp(buf, "NO_", 3) == 0) {
739 neg = 1; 741 neg = 1;
@@ -741,9 +743,7 @@ sched_feat_write(struct file *filp, const char __user *ubuf,
741 } 743 }
742 744
743 for (i = 0; sched_feat_names[i]; i++) { 745 for (i = 0; sched_feat_names[i]; i++) {
744 int len = strlen(sched_feat_names[i]); 746 if (strcmp(cmp, sched_feat_names[i]) == 0) {
745
746 if (strncmp(cmp, sched_feat_names[i], len) == 0) {
747 if (neg) 747 if (neg)
748 sysctl_sched_features &= ~(1UL << i); 748 sysctl_sched_features &= ~(1UL << i);
749 else 749 else
@@ -1294,6 +1294,10 @@ static void resched_task(struct task_struct *p)
1294static void sched_rt_avg_update(struct rq *rq, u64 rt_delta) 1294static void sched_rt_avg_update(struct rq *rq, u64 rt_delta)
1295{ 1295{
1296} 1296}
1297
1298static void sched_avg_update(struct rq *rq)
1299{
1300}
1297#endif /* CONFIG_SMP */ 1301#endif /* CONFIG_SMP */
1298 1302
1299#if BITS_PER_LONG == 32 1303#if BITS_PER_LONG == 32
@@ -1836,7 +1840,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1836 1840
1837static const struct sched_class rt_sched_class; 1841static const struct sched_class rt_sched_class;
1838 1842
1839#define sched_class_highest (&rt_sched_class) 1843#define sched_class_highest (&stop_sched_class)
1840#define for_each_class(class) \ 1844#define for_each_class(class) \
1841 for (class = sched_class_highest; class; class = class->next) 1845 for (class = sched_class_highest; class; class = class->next)
1842 1846
@@ -1854,12 +1858,6 @@ static void dec_nr_running(struct rq *rq)
1854 1858
1855static void set_load_weight(struct task_struct *p) 1859static void set_load_weight(struct task_struct *p)
1856{ 1860{
1857 if (task_has_rt_policy(p)) {
1858 p->se.load.weight = 0;
1859 p->se.load.inv_weight = WMULT_CONST;
1860 return;
1861 }
1862
1863 /* 1861 /*
1864 * SCHED_IDLE tasks get minimal weight: 1862 * SCHED_IDLE tasks get minimal weight:
1865 */ 1863 */
@@ -1913,13 +1911,193 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1913 dec_nr_running(rq); 1911 dec_nr_running(rq);
1914} 1912}
1915 1913
1914#ifdef CONFIG_IRQ_TIME_ACCOUNTING
1915
1916/*
1917 * There are no locks covering percpu hardirq/softirq time.
1918 * They are only modified in account_system_vtime, on corresponding CPU
1919 * with interrupts disabled. So, writes are safe.
1920 * They are read and saved off onto struct rq in update_rq_clock().
1921 * This may result in other CPU reading this CPU's irq time and can
1922 * race with irq/account_system_vtime on this CPU. We would either get old
1923 * or new value with a side effect of accounting a slice of irq time to wrong
1924 * task when irq is in progress while we read rq->clock. That is a worthy
1925 * compromise in place of having locks on each irq in account_system_time.
1926 */
1927static DEFINE_PER_CPU(u64, cpu_hardirq_time);
1928static DEFINE_PER_CPU(u64, cpu_softirq_time);
1929
1930static DEFINE_PER_CPU(u64, irq_start_time);
1931static int sched_clock_irqtime;
1932
1933void enable_sched_clock_irqtime(void)
1934{
1935 sched_clock_irqtime = 1;
1936}
1937
1938void disable_sched_clock_irqtime(void)
1939{
1940 sched_clock_irqtime = 0;
1941}
1942
1943#ifndef CONFIG_64BIT
1944static DEFINE_PER_CPU(seqcount_t, irq_time_seq);
1945
1946static inline void irq_time_write_begin(void)
1947{
1948 __this_cpu_inc(irq_time_seq.sequence);
1949 smp_wmb();
1950}
1951
1952static inline void irq_time_write_end(void)
1953{
1954 smp_wmb();
1955 __this_cpu_inc(irq_time_seq.sequence);
1956}
1957
1958static inline u64 irq_time_read(int cpu)
1959{
1960 u64 irq_time;
1961 unsigned seq;
1962
1963 do {
1964 seq = read_seqcount_begin(&per_cpu(irq_time_seq, cpu));
1965 irq_time = per_cpu(cpu_softirq_time, cpu) +
1966 per_cpu(cpu_hardirq_time, cpu);
1967 } while (read_seqcount_retry(&per_cpu(irq_time_seq, cpu), seq));
1968
1969 return irq_time;
1970}
1971#else /* CONFIG_64BIT */
1972static inline void irq_time_write_begin(void)
1973{
1974}
1975
1976static inline void irq_time_write_end(void)
1977{
1978}
1979
1980static inline u64 irq_time_read(int cpu)
1981{
1982 return per_cpu(cpu_softirq_time, cpu) + per_cpu(cpu_hardirq_time, cpu);
1983}
1984#endif /* CONFIG_64BIT */
1985
1986/*
1987 * Called before incrementing preempt_count on {soft,}irq_enter
1988 * and before decrementing preempt_count on {soft,}irq_exit.
1989 */
1990void account_system_vtime(struct task_struct *curr)
1991{
1992 unsigned long flags;
1993 s64 delta;
1994 int cpu;
1995
1996 if (!sched_clock_irqtime)
1997 return;
1998
1999 local_irq_save(flags);
2000
2001 cpu = smp_processor_id();
2002 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
2003 __this_cpu_add(irq_start_time, delta);
2004
2005 irq_time_write_begin();
2006 /*
2007 * We do not account for softirq time from ksoftirqd here.
2008 * We want to continue accounting softirq time to ksoftirqd thread
2009 * in that case, so as not to confuse scheduler with a special task
2010 * that do not consume any time, but still wants to run.
2011 */
2012 if (hardirq_count())
2013 __this_cpu_add(cpu_hardirq_time, delta);
2014 else if (in_serving_softirq() && !(curr->flags & PF_KSOFTIRQD))
2015 __this_cpu_add(cpu_softirq_time, delta);
2016
2017 irq_time_write_end();
2018 local_irq_restore(flags);
2019}
2020EXPORT_SYMBOL_GPL(account_system_vtime);
2021
2022static void update_rq_clock_task(struct rq *rq, s64 delta)
2023{
2024 s64 irq_delta;
2025
2026 irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
2027
2028 /*
2029 * Since irq_time is only updated on {soft,}irq_exit, we might run into
2030 * this case when a previous update_rq_clock() happened inside a
2031 * {soft,}irq region.
2032 *
2033 * When this happens, we stop ->clock_task and only update the
2034 * prev_irq_time stamp to account for the part that fit, so that a next
2035 * update will consume the rest. This ensures ->clock_task is
2036 * monotonic.
2037 *
2038 * It does however cause some slight miss-attribution of {soft,}irq
2039 * time, a more accurate solution would be to update the irq_time using
2040 * the current rq->clock timestamp, except that would require using
2041 * atomic ops.
2042 */
2043 if (irq_delta > delta)
2044 irq_delta = delta;
2045
2046 rq->prev_irq_time += irq_delta;
2047 delta -= irq_delta;
2048 rq->clock_task += delta;
2049
2050 if (irq_delta && sched_feat(NONIRQ_POWER))
2051 sched_rt_avg_update(rq, irq_delta);
2052}
2053
2054#else /* CONFIG_IRQ_TIME_ACCOUNTING */
2055
2056static void update_rq_clock_task(struct rq *rq, s64 delta)
2057{
2058 rq->clock_task += delta;
2059}
2060
2061#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
2062
1916#include "sched_idletask.c" 2063#include "sched_idletask.c"
1917#include "sched_fair.c" 2064#include "sched_fair.c"
1918#include "sched_rt.c" 2065#include "sched_rt.c"
2066#include "sched_stoptask.c"
1919#ifdef CONFIG_SCHED_DEBUG 2067#ifdef CONFIG_SCHED_DEBUG
1920# include "sched_debug.c" 2068# include "sched_debug.c"
1921#endif 2069#endif
1922 2070
2071void sched_set_stop_task(int cpu, struct task_struct *stop)
2072{
2073 struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
2074 struct task_struct *old_stop = cpu_rq(cpu)->stop;
2075
2076 if (stop) {
2077 /*
2078 * Make it appear like a SCHED_FIFO task, its something
2079 * userspace knows about and won't get confused about.
2080 *
2081 * Also, it will make PI more or less work without too
2082 * much confusion -- but then, stop work should not
2083 * rely on PI working anyway.
2084 */
2085 sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
2086
2087 stop->sched_class = &stop_sched_class;
2088 }
2089
2090 cpu_rq(cpu)->stop = stop;
2091
2092 if (old_stop) {
2093 /*
2094 * Reset it back to a normal scheduling class so that
2095 * it can die in pieces.
2096 */
2097 old_stop->sched_class = &rt_sched_class;
2098 }
2099}
2100
1923/* 2101/*
1924 * __normal_prio - return the priority that is based on the static prio 2102 * __normal_prio - return the priority that is based on the static prio
1925 */ 2103 */
@@ -1987,6 +2165,31 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
1987 p->sched_class->prio_changed(rq, p, oldprio, running); 2165 p->sched_class->prio_changed(rq, p, oldprio, running);
1988} 2166}
1989 2167
2168static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2169{
2170 const struct sched_class *class;
2171
2172 if (p->sched_class == rq->curr->sched_class) {
2173 rq->curr->sched_class->check_preempt_curr(rq, p, flags);
2174 } else {
2175 for_each_class(class) {
2176 if (class == rq->curr->sched_class)
2177 break;
2178 if (class == p->sched_class) {
2179 resched_task(rq->curr);
2180 break;
2181 }
2182 }
2183 }
2184
2185 /*
2186 * A queue event has occurred, and we're going to schedule. In
2187 * this case, we can save a useless back to back clock update.
2188 */
2189 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
2190 rq->skip_clock_update = 1;
2191}
2192
1990#ifdef CONFIG_SMP 2193#ifdef CONFIG_SMP
1991/* 2194/*
1992 * Is this task likely cache-hot: 2195 * Is this task likely cache-hot:
@@ -1999,6 +2202,9 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
1999 if (p->sched_class != &fair_sched_class) 2202 if (p->sched_class != &fair_sched_class)
2000 return 0; 2203 return 0;
2001 2204
2205 if (unlikely(p->policy == SCHED_IDLE))
2206 return 0;
2207
2002 /* 2208 /*
2003 * Buddy candidates are cache hot: 2209 * Buddy candidates are cache hot:
2004 */ 2210 */
@@ -2848,14 +3054,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
2848 */ 3054 */
2849 arch_start_context_switch(prev); 3055 arch_start_context_switch(prev);
2850 3056
2851 if (likely(!mm)) { 3057 if (!mm) {
2852 next->active_mm = oldmm; 3058 next->active_mm = oldmm;
2853 atomic_inc(&oldmm->mm_count); 3059 atomic_inc(&oldmm->mm_count);
2854 enter_lazy_tlb(oldmm, next); 3060 enter_lazy_tlb(oldmm, next);
2855 } else 3061 } else
2856 switch_mm(oldmm, mm, next); 3062 switch_mm(oldmm, mm, next);
2857 3063
2858 if (likely(!prev->mm)) { 3064 if (!prev->mm) {
2859 prev->active_mm = NULL; 3065 prev->active_mm = NULL;
2860 rq->prev_mm = oldmm; 3066 rq->prev_mm = oldmm;
2861 } 3067 }
@@ -2970,6 +3176,15 @@ static long calc_load_fold_active(struct rq *this_rq)
2970 return delta; 3176 return delta;
2971} 3177}
2972 3178
3179static unsigned long
3180calc_load(unsigned long load, unsigned long exp, unsigned long active)
3181{
3182 load *= exp;
3183 load += active * (FIXED_1 - exp);
3184 load += 1UL << (FSHIFT - 1);
3185 return load >> FSHIFT;
3186}
3187
2973#ifdef CONFIG_NO_HZ 3188#ifdef CONFIG_NO_HZ
2974/* 3189/*
2975 * For NO_HZ we delay the active fold to the next LOAD_FREQ update. 3190 * For NO_HZ we delay the active fold to the next LOAD_FREQ update.
@@ -2999,6 +3214,128 @@ static long calc_load_fold_idle(void)
2999 3214
3000 return delta; 3215 return delta;
3001} 3216}
3217
3218/**
3219 * fixed_power_int - compute: x^n, in O(log n) time
3220 *
3221 * @x: base of the power
3222 * @frac_bits: fractional bits of @x
3223 * @n: power to raise @x to.
3224 *
3225 * By exploiting the relation between the definition of the natural power
3226 * function: x^n := x*x*...*x (x multiplied by itself for n times), and
3227 * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
3228 * (where: n_i \elem {0, 1}, the binary vector representing n),
3229 * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
3230 * of course trivially computable in O(log_2 n), the length of our binary
3231 * vector.
3232 */
3233static unsigned long
3234fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
3235{
3236 unsigned long result = 1UL << frac_bits;
3237
3238 if (n) for (;;) {
3239 if (n & 1) {
3240 result *= x;
3241 result += 1UL << (frac_bits - 1);
3242 result >>= frac_bits;
3243 }
3244 n >>= 1;
3245 if (!n)
3246 break;
3247 x *= x;
3248 x += 1UL << (frac_bits - 1);
3249 x >>= frac_bits;
3250 }
3251
3252 return result;
3253}
3254
3255/*
3256 * a1 = a0 * e + a * (1 - e)
3257 *
3258 * a2 = a1 * e + a * (1 - e)
3259 * = (a0 * e + a * (1 - e)) * e + a * (1 - e)
3260 * = a0 * e^2 + a * (1 - e) * (1 + e)
3261 *
3262 * a3 = a2 * e + a * (1 - e)
3263 * = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
3264 * = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
3265 *
3266 * ...
3267 *
3268 * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
3269 * = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
3270 * = a0 * e^n + a * (1 - e^n)
3271 *
3272 * [1] application of the geometric series:
3273 *
3274 * n 1 - x^(n+1)
3275 * S_n := \Sum x^i = -------------
3276 * i=0 1 - x
3277 */
3278static unsigned long
3279calc_load_n(unsigned long load, unsigned long exp,
3280 unsigned long active, unsigned int n)
3281{
3282
3283 return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
3284}
3285
3286/*
3287 * NO_HZ can leave us missing all per-cpu ticks calling
3288 * calc_load_account_active(), but since an idle CPU folds its delta into
3289 * calc_load_tasks_idle per calc_load_account_idle(), all we need to do is fold
3290 * in the pending idle delta if our idle period crossed a load cycle boundary.
3291 *
3292 * Once we've updated the global active value, we need to apply the exponential
3293 * weights adjusted to the number of cycles missed.
3294 */
3295static void calc_global_nohz(unsigned long ticks)
3296{
3297 long delta, active, n;
3298
3299 if (time_before(jiffies, calc_load_update))
3300 return;
3301
3302 /*
3303 * If we crossed a calc_load_update boundary, make sure to fold
3304 * any pending idle changes, the respective CPUs might have
3305 * missed the tick driven calc_load_account_active() update
3306 * due to NO_HZ.
3307 */
3308 delta = calc_load_fold_idle();
3309 if (delta)
3310 atomic_long_add(delta, &calc_load_tasks);
3311
3312 /*
3313 * If we were idle for multiple load cycles, apply them.
3314 */
3315 if (ticks >= LOAD_FREQ) {
3316 n = ticks / LOAD_FREQ;
3317
3318 active = atomic_long_read(&calc_load_tasks);
3319 active = active > 0 ? active * FIXED_1 : 0;
3320
3321 avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
3322 avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
3323 avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
3324
3325 calc_load_update += n * LOAD_FREQ;
3326 }
3327
3328 /*
3329 * Its possible the remainder of the above division also crosses
3330 * a LOAD_FREQ period, the regular check in calc_global_load()
3331 * which comes after this will take care of that.
3332 *
3333 * Consider us being 11 ticks before a cycle completion, and us
3334 * sleeping for 4*LOAD_FREQ + 22 ticks, then the above code will
3335 * age us 4 cycles, and the test in calc_global_load() will
3336 * pick up the final one.
3337 */
3338}
3002#else 3339#else
3003static void calc_load_account_idle(struct rq *this_rq) 3340static void calc_load_account_idle(struct rq *this_rq)
3004{ 3341{
@@ -3008,6 +3345,10 @@ static inline long calc_load_fold_idle(void)
3008{ 3345{
3009 return 0; 3346 return 0;
3010} 3347}
3348
3349static void calc_global_nohz(unsigned long ticks)
3350{
3351}
3011#endif 3352#endif
3012 3353
3013/** 3354/**
@@ -3025,24 +3366,17 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
3025 loads[2] = (avenrun[2] + offset) << shift; 3366 loads[2] = (avenrun[2] + offset) << shift;
3026} 3367}
3027 3368
3028static unsigned long
3029calc_load(unsigned long load, unsigned long exp, unsigned long active)
3030{
3031 load *= exp;
3032 load += active * (FIXED_1 - exp);
3033 return load >> FSHIFT;
3034}
3035
3036/* 3369/*
3037 * calc_load - update the avenrun load estimates 10 ticks after the 3370 * calc_load - update the avenrun load estimates 10 ticks after the
3038 * CPUs have updated calc_load_tasks. 3371 * CPUs have updated calc_load_tasks.
3039 */ 3372 */
3040void calc_global_load(void) 3373void calc_global_load(unsigned long ticks)
3041{ 3374{
3042 unsigned long upd = calc_load_update + 10;
3043 long active; 3375 long active;
3044 3376
3045 if (time_before(jiffies, upd)) 3377 calc_global_nohz(ticks);
3378
3379 if (time_before(jiffies, calc_load_update + 10))
3046 return; 3380 return;
3047 3381
3048 active = atomic_long_read(&calc_load_tasks); 3382 active = atomic_long_read(&calc_load_tasks);
@@ -3182,6 +3516,8 @@ static void update_cpu_load(struct rq *this_rq)
3182 3516
3183 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i; 3517 this_rq->cpu_load[i] = (old_load * (scale - 1) + new_load) >> i;
3184 } 3518 }
3519
3520 sched_avg_update(this_rq);
3185} 3521}
3186 3522
3187static void update_cpu_load_active(struct rq *this_rq) 3523static void update_cpu_load_active(struct rq *this_rq)
@@ -3242,7 +3578,7 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
3242 3578
3243 if (task_current(rq, p)) { 3579 if (task_current(rq, p)) {
3244 update_rq_clock(rq); 3580 update_rq_clock(rq);
3245 ns = rq->clock - p->se.exec_start; 3581 ns = rq->clock_task - p->se.exec_start;
3246 if ((s64)ns < 0) 3582 if ((s64)ns < 0)
3247 ns = 0; 3583 ns = 0;
3248 } 3584 }
@@ -3391,7 +3727,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
3391 tmp = cputime_to_cputime64(cputime); 3727 tmp = cputime_to_cputime64(cputime);
3392 if (hardirq_count() - hardirq_offset) 3728 if (hardirq_count() - hardirq_offset)
3393 cpustat->irq = cputime64_add(cpustat->irq, tmp); 3729 cpustat->irq = cputime64_add(cpustat->irq, tmp);
3394 else if (softirq_count()) 3730 else if (in_serving_softirq())
3395 cpustat->softirq = cputime64_add(cpustat->softirq, tmp); 3731 cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
3396 else 3732 else
3397 cpustat->system = cputime64_add(cpustat->system, tmp); 3733 cpustat->system = cputime64_add(cpustat->system, tmp);
@@ -3507,9 +3843,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3507 rtime = nsecs_to_cputime(p->se.sum_exec_runtime); 3843 rtime = nsecs_to_cputime(p->se.sum_exec_runtime);
3508 3844
3509 if (total) { 3845 if (total) {
3510 u64 temp; 3846 u64 temp = rtime;
3511 3847
3512 temp = (u64)(rtime * utime); 3848 temp *= utime;
3513 do_div(temp, total); 3849 do_div(temp, total);
3514 utime = (cputime_t)temp; 3850 utime = (cputime_t)temp;
3515 } else 3851 } else
@@ -3540,9 +3876,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
3540 rtime = nsecs_to_cputime(cputime.sum_exec_runtime); 3876 rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
3541 3877
3542 if (total) { 3878 if (total) {
3543 u64 temp; 3879 u64 temp = rtime;
3544 3880
3545 temp = (u64)(rtime * cputime.utime); 3881 temp *= cputime.utime;
3546 do_div(temp, total); 3882 do_div(temp, total);
3547 utime = (cputime_t)temp; 3883 utime = (cputime_t)temp;
3548 } else 3884 } else
@@ -3578,7 +3914,7 @@ void scheduler_tick(void)
3578 curr->sched_class->task_tick(rq, curr, 0); 3914 curr->sched_class->task_tick(rq, curr, 0);
3579 raw_spin_unlock(&rq->lock); 3915 raw_spin_unlock(&rq->lock);
3580 3916
3581 perf_event_task_tick(curr); 3917 perf_event_task_tick();
3582 3918
3583#ifdef CONFIG_SMP 3919#ifdef CONFIG_SMP
3584 rq->idle_at_tick = idle_cpu(cpu); 3920 rq->idle_at_tick = idle_cpu(cpu);
@@ -3694,7 +4030,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
3694{ 4030{
3695 if (prev->se.on_rq) 4031 if (prev->se.on_rq)
3696 update_rq_clock(rq); 4032 update_rq_clock(rq);
3697 rq->skip_clock_update = 0;
3698 prev->sched_class->put_prev_task(rq, prev); 4033 prev->sched_class->put_prev_task(rq, prev);
3699} 4034}
3700 4035
@@ -3717,17 +4052,13 @@ pick_next_task(struct rq *rq)
3717 return p; 4052 return p;
3718 } 4053 }
3719 4054
3720 class = sched_class_highest; 4055 for_each_class(class) {
3721 for ( ; ; ) {
3722 p = class->pick_next_task(rq); 4056 p = class->pick_next_task(rq);
3723 if (p) 4057 if (p)
3724 return p; 4058 return p;
3725 /*
3726 * Will never be NULL as the idle class always
3727 * returns a non-NULL p:
3728 */
3729 class = class->next;
3730 } 4059 }
4060
4061 BUG(); /* the idle class will always have a runnable task */
3731} 4062}
3732 4063
3733/* 4064/*
@@ -3756,7 +4087,6 @@ need_resched_nonpreemptible:
3756 hrtick_clear(rq); 4087 hrtick_clear(rq);
3757 4088
3758 raw_spin_lock_irq(&rq->lock); 4089 raw_spin_lock_irq(&rq->lock);
3759 clear_tsk_need_resched(prev);
3760 4090
3761 switch_count = &prev->nivcsw; 4091 switch_count = &prev->nivcsw;
3762 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4092 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -3788,6 +4118,8 @@ need_resched_nonpreemptible:
3788 4118
3789 put_prev_task(rq, prev); 4119 put_prev_task(rq, prev);
3790 next = pick_next_task(rq); 4120 next = pick_next_task(rq);
4121 clear_tsk_need_resched(prev);
4122 rq->skip_clock_update = 0;
3791 4123
3792 if (likely(prev != next)) { 4124 if (likely(prev != next)) {
3793 sched_info_switch(prev, next); 4125 sched_info_switch(prev, next);
@@ -3865,8 +4197,16 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
3865 /* 4197 /*
3866 * Owner changed, break to re-assess state. 4198 * Owner changed, break to re-assess state.
3867 */ 4199 */
3868 if (lock->owner != owner) 4200 if (lock->owner != owner) {
4201 /*
4202 * If the lock has switched to a different owner,
4203 * we likely have heavy contention. Return 0 to quit
4204 * optimistic spinning and not contend further:
4205 */
4206 if (lock->owner)
4207 return 0;
3869 break; 4208 break;
4209 }
3870 4210
3871 /* 4211 /*
3872 * Is that owner really running on that cpu? 4212 * Is that owner really running on that cpu?
@@ -4344,6 +4684,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
4344 4684
4345 rq = task_rq_lock(p, &flags); 4685 rq = task_rq_lock(p, &flags);
4346 4686
4687 trace_sched_pi_setprio(p, prio);
4347 oldprio = p->prio; 4688 oldprio = p->prio;
4348 prev_class = p->sched_class; 4689 prev_class = p->sched_class;
4349 on_rq = p->se.on_rq; 4690 on_rq = p->se.on_rq;
@@ -4631,7 +4972,7 @@ recheck:
4631 } 4972 }
4632 4973
4633 if (user) { 4974 if (user) {
4634 retval = security_task_setscheduler(p, policy, param); 4975 retval = security_task_setscheduler(p);
4635 if (retval) 4976 if (retval)
4636 return retval; 4977 return retval;
4637 } 4978 }
@@ -4647,6 +4988,15 @@ recheck:
4647 */ 4988 */
4648 rq = __task_rq_lock(p); 4989 rq = __task_rq_lock(p);
4649 4990
4991 /*
4992 * Changing the policy of the stop threads its a very bad idea
4993 */
4994 if (p == rq->stop) {
4995 __task_rq_unlock(rq);
4996 raw_spin_unlock_irqrestore(&p->pi_lock, flags);
4997 return -EINVAL;
4998 }
4999
4650#ifdef CONFIG_RT_GROUP_SCHED 5000#ifdef CONFIG_RT_GROUP_SCHED
4651 if (user) { 5001 if (user) {
4652 /* 5002 /*
@@ -4873,13 +5223,13 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4873 if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) 5223 if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
4874 goto out_unlock; 5224 goto out_unlock;
4875 5225
4876 retval = security_task_setscheduler(p, 0, NULL); 5226 retval = security_task_setscheduler(p);
4877 if (retval) 5227 if (retval)
4878 goto out_unlock; 5228 goto out_unlock;
4879 5229
4880 cpuset_cpus_allowed(p, cpus_allowed); 5230 cpuset_cpus_allowed(p, cpus_allowed);
4881 cpumask_and(new_mask, in_mask, cpus_allowed); 5231 cpumask_and(new_mask, in_mask, cpus_allowed);
4882 again: 5232again:
4883 retval = set_cpus_allowed_ptr(p, new_mask); 5233 retval = set_cpus_allowed_ptr(p, new_mask);
4884 5234
4885 if (!retval) { 5235 if (!retval) {
@@ -5323,7 +5673,19 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5323 idle->se.exec_start = sched_clock(); 5673 idle->se.exec_start = sched_clock();
5324 5674
5325 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu)); 5675 cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
5676 /*
5677 * We're having a chicken and egg problem, even though we are
5678 * holding rq->lock, the cpu isn't yet set to this cpu so the
5679 * lockdep check in task_group() will fail.
5680 *
5681 * Similar case to sched_fork(). / Alternatively we could
5682 * use task_rq_lock() here and obtain the other rq->lock.
5683 *
5684 * Silence PROVE_RCU
5685 */
5686 rcu_read_lock();
5326 __set_task_cpu(idle, cpu); 5687 __set_task_cpu(idle, cpu);
5688 rcu_read_unlock();
5327 5689
5328 rq->curr = rq->idle = idle; 5690 rq->curr = rq->idle = idle;
5329#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW) 5691#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
@@ -6500,6 +6862,7 @@ struct s_data {
6500 cpumask_var_t nodemask; 6862 cpumask_var_t nodemask;
6501 cpumask_var_t this_sibling_map; 6863 cpumask_var_t this_sibling_map;
6502 cpumask_var_t this_core_map; 6864 cpumask_var_t this_core_map;
6865 cpumask_var_t this_book_map;
6503 cpumask_var_t send_covered; 6866 cpumask_var_t send_covered;
6504 cpumask_var_t tmpmask; 6867 cpumask_var_t tmpmask;
6505 struct sched_group **sched_group_nodes; 6868 struct sched_group **sched_group_nodes;
@@ -6511,6 +6874,7 @@ enum s_alloc {
6511 sa_rootdomain, 6874 sa_rootdomain,
6512 sa_tmpmask, 6875 sa_tmpmask,
6513 sa_send_covered, 6876 sa_send_covered,
6877 sa_this_book_map,
6514 sa_this_core_map, 6878 sa_this_core_map,
6515 sa_this_sibling_map, 6879 sa_this_sibling_map,
6516 sa_nodemask, 6880 sa_nodemask,
@@ -6546,31 +6910,48 @@ cpu_to_cpu_group(int cpu, const struct cpumask *cpu_map,
6546#ifdef CONFIG_SCHED_MC 6910#ifdef CONFIG_SCHED_MC
6547static DEFINE_PER_CPU(struct static_sched_domain, core_domains); 6911static DEFINE_PER_CPU(struct static_sched_domain, core_domains);
6548static DEFINE_PER_CPU(struct static_sched_group, sched_group_core); 6912static DEFINE_PER_CPU(struct static_sched_group, sched_group_core);
6549#endif /* CONFIG_SCHED_MC */
6550 6913
6551#if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
6552static int 6914static int
6553cpu_to_core_group(int cpu, const struct cpumask *cpu_map, 6915cpu_to_core_group(int cpu, const struct cpumask *cpu_map,
6554 struct sched_group **sg, struct cpumask *mask) 6916 struct sched_group **sg, struct cpumask *mask)
6555{ 6917{
6556 int group; 6918 int group;
6557 6919#ifdef CONFIG_SCHED_SMT
6558 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map); 6920 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
6559 group = cpumask_first(mask); 6921 group = cpumask_first(mask);
6922#else
6923 group = cpu;
6924#endif
6560 if (sg) 6925 if (sg)
6561 *sg = &per_cpu(sched_group_core, group).sg; 6926 *sg = &per_cpu(sched_group_core, group).sg;
6562 return group; 6927 return group;
6563} 6928}
6564#elif defined(CONFIG_SCHED_MC) 6929#endif /* CONFIG_SCHED_MC */
6930
6931/*
6932 * book sched-domains:
6933 */
6934#ifdef CONFIG_SCHED_BOOK
6935static DEFINE_PER_CPU(struct static_sched_domain, book_domains);
6936static DEFINE_PER_CPU(struct static_sched_group, sched_group_book);
6937
6565static int 6938static int
6566cpu_to_core_group(int cpu, const struct cpumask *cpu_map, 6939cpu_to_book_group(int cpu, const struct cpumask *cpu_map,
6567 struct sched_group **sg, struct cpumask *unused) 6940 struct sched_group **sg, struct cpumask *mask)
6568{ 6941{
6942 int group = cpu;
6943#ifdef CONFIG_SCHED_MC
6944 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
6945 group = cpumask_first(mask);
6946#elif defined(CONFIG_SCHED_SMT)
6947 cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
6948 group = cpumask_first(mask);
6949#endif
6569 if (sg) 6950 if (sg)
6570 *sg = &per_cpu(sched_group_core, cpu).sg; 6951 *sg = &per_cpu(sched_group_book, group).sg;
6571 return cpu; 6952 return group;
6572} 6953}
6573#endif 6954#endif /* CONFIG_SCHED_BOOK */
6574 6955
6575static DEFINE_PER_CPU(struct static_sched_domain, phys_domains); 6956static DEFINE_PER_CPU(struct static_sched_domain, phys_domains);
6576static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys); 6957static DEFINE_PER_CPU(struct static_sched_group, sched_group_phys);
@@ -6580,7 +6961,10 @@ cpu_to_phys_group(int cpu, const struct cpumask *cpu_map,
6580 struct sched_group **sg, struct cpumask *mask) 6961 struct sched_group **sg, struct cpumask *mask)
6581{ 6962{
6582 int group; 6963 int group;
6583#ifdef CONFIG_SCHED_MC 6964#ifdef CONFIG_SCHED_BOOK
6965 cpumask_and(mask, cpu_book_mask(cpu), cpu_map);
6966 group = cpumask_first(mask);
6967#elif defined(CONFIG_SCHED_MC)
6584 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map); 6968 cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
6585 group = cpumask_first(mask); 6969 group = cpumask_first(mask);
6586#elif defined(CONFIG_SCHED_SMT) 6970#elif defined(CONFIG_SCHED_SMT)
@@ -6776,6 +7160,8 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
6776 if (cpu != group_first_cpu(sd->groups)) 7160 if (cpu != group_first_cpu(sd->groups))
6777 return; 7161 return;
6778 7162
7163 sd->groups->group_weight = cpumask_weight(sched_group_cpus(sd->groups));
7164
6779 child = sd->child; 7165 child = sd->child;
6780 7166
6781 sd->groups->cpu_power = 0; 7167 sd->groups->cpu_power = 0;
@@ -6841,6 +7227,9 @@ SD_INIT_FUNC(CPU)
6841#ifdef CONFIG_SCHED_MC 7227#ifdef CONFIG_SCHED_MC
6842 SD_INIT_FUNC(MC) 7228 SD_INIT_FUNC(MC)
6843#endif 7229#endif
7230#ifdef CONFIG_SCHED_BOOK
7231 SD_INIT_FUNC(BOOK)
7232#endif
6844 7233
6845static int default_relax_domain_level = -1; 7234static int default_relax_domain_level = -1;
6846 7235
@@ -6890,6 +7279,8 @@ static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
6890 free_cpumask_var(d->tmpmask); /* fall through */ 7279 free_cpumask_var(d->tmpmask); /* fall through */
6891 case sa_send_covered: 7280 case sa_send_covered:
6892 free_cpumask_var(d->send_covered); /* fall through */ 7281 free_cpumask_var(d->send_covered); /* fall through */
7282 case sa_this_book_map:
7283 free_cpumask_var(d->this_book_map); /* fall through */
6893 case sa_this_core_map: 7284 case sa_this_core_map:
6894 free_cpumask_var(d->this_core_map); /* fall through */ 7285 free_cpumask_var(d->this_core_map); /* fall through */
6895 case sa_this_sibling_map: 7286 case sa_this_sibling_map:
@@ -6936,8 +7327,10 @@ static enum s_alloc __visit_domain_allocation_hell(struct s_data *d,
6936 return sa_nodemask; 7327 return sa_nodemask;
6937 if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL)) 7328 if (!alloc_cpumask_var(&d->this_core_map, GFP_KERNEL))
6938 return sa_this_sibling_map; 7329 return sa_this_sibling_map;
6939 if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL)) 7330 if (!alloc_cpumask_var(&d->this_book_map, GFP_KERNEL))
6940 return sa_this_core_map; 7331 return sa_this_core_map;
7332 if (!alloc_cpumask_var(&d->send_covered, GFP_KERNEL))
7333 return sa_this_book_map;
6941 if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL)) 7334 if (!alloc_cpumask_var(&d->tmpmask, GFP_KERNEL))
6942 return sa_send_covered; 7335 return sa_send_covered;
6943 d->rd = alloc_rootdomain(); 7336 d->rd = alloc_rootdomain();
@@ -6995,6 +7388,23 @@ static struct sched_domain *__build_cpu_sched_domain(struct s_data *d,
6995 return sd; 7388 return sd;
6996} 7389}
6997 7390
7391static struct sched_domain *__build_book_sched_domain(struct s_data *d,
7392 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
7393 struct sched_domain *parent, int i)
7394{
7395 struct sched_domain *sd = parent;
7396#ifdef CONFIG_SCHED_BOOK
7397 sd = &per_cpu(book_domains, i).sd;
7398 SD_INIT(sd, BOOK);
7399 set_domain_attribute(sd, attr);
7400 cpumask_and(sched_domain_span(sd), cpu_map, cpu_book_mask(i));
7401 sd->parent = parent;
7402 parent->child = sd;
7403 cpu_to_book_group(i, cpu_map, &sd->groups, d->tmpmask);
7404#endif
7405 return sd;
7406}
7407
6998static struct sched_domain *__build_mc_sched_domain(struct s_data *d, 7408static struct sched_domain *__build_mc_sched_domain(struct s_data *d,
6999 const struct cpumask *cpu_map, struct sched_domain_attr *attr, 7409 const struct cpumask *cpu_map, struct sched_domain_attr *attr,
7000 struct sched_domain *parent, int i) 7410 struct sched_domain *parent, int i)
@@ -7052,6 +7462,15 @@ static void build_sched_groups(struct s_data *d, enum sched_domain_level l,
7052 d->send_covered, d->tmpmask); 7462 d->send_covered, d->tmpmask);
7053 break; 7463 break;
7054#endif 7464#endif
7465#ifdef CONFIG_SCHED_BOOK
7466 case SD_LV_BOOK: /* set up book groups */
7467 cpumask_and(d->this_book_map, cpu_map, cpu_book_mask(cpu));
7468 if (cpu == cpumask_first(d->this_book_map))
7469 init_sched_build_groups(d->this_book_map, cpu_map,
7470 &cpu_to_book_group,
7471 d->send_covered, d->tmpmask);
7472 break;
7473#endif
7055 case SD_LV_CPU: /* set up physical groups */ 7474 case SD_LV_CPU: /* set up physical groups */
7056 cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map); 7475 cpumask_and(d->nodemask, cpumask_of_node(cpu), cpu_map);
7057 if (!cpumask_empty(d->nodemask)) 7476 if (!cpumask_empty(d->nodemask))
@@ -7099,12 +7518,14 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7099 7518
7100 sd = __build_numa_sched_domains(&d, cpu_map, attr, i); 7519 sd = __build_numa_sched_domains(&d, cpu_map, attr, i);
7101 sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i); 7520 sd = __build_cpu_sched_domain(&d, cpu_map, attr, sd, i);
7521 sd = __build_book_sched_domain(&d, cpu_map, attr, sd, i);
7102 sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i); 7522 sd = __build_mc_sched_domain(&d, cpu_map, attr, sd, i);
7103 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i); 7523 sd = __build_smt_sched_domain(&d, cpu_map, attr, sd, i);
7104 } 7524 }
7105 7525
7106 for_each_cpu(i, cpu_map) { 7526 for_each_cpu(i, cpu_map) {
7107 build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i); 7527 build_sched_groups(&d, SD_LV_SIBLING, cpu_map, i);
7528 build_sched_groups(&d, SD_LV_BOOK, cpu_map, i);
7108 build_sched_groups(&d, SD_LV_MC, cpu_map, i); 7529 build_sched_groups(&d, SD_LV_MC, cpu_map, i);
7109 } 7530 }
7110 7531
@@ -7135,6 +7556,12 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7135 init_sched_groups_power(i, sd); 7556 init_sched_groups_power(i, sd);
7136 } 7557 }
7137#endif 7558#endif
7559#ifdef CONFIG_SCHED_BOOK
7560 for_each_cpu(i, cpu_map) {
7561 sd = &per_cpu(book_domains, i).sd;
7562 init_sched_groups_power(i, sd);
7563 }
7564#endif
7138 7565
7139 for_each_cpu(i, cpu_map) { 7566 for_each_cpu(i, cpu_map) {
7140 sd = &per_cpu(phys_domains, i).sd; 7567 sd = &per_cpu(phys_domains, i).sd;
@@ -7160,6 +7587,8 @@ static int __build_sched_domains(const struct cpumask *cpu_map,
7160 sd = &per_cpu(cpu_domains, i).sd; 7587 sd = &per_cpu(cpu_domains, i).sd;
7161#elif defined(CONFIG_SCHED_MC) 7588#elif defined(CONFIG_SCHED_MC)
7162 sd = &per_cpu(core_domains, i).sd; 7589 sd = &per_cpu(core_domains, i).sd;
7590#elif defined(CONFIG_SCHED_BOOK)
7591 sd = &per_cpu(book_domains, i).sd;
7163#else 7592#else
7164 sd = &per_cpu(phys_domains, i).sd; 7593 sd = &per_cpu(phys_domains, i).sd;
7165#endif 7594#endif
@@ -8064,9 +8493,9 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
8064 8493
8065 return 1; 8494 return 1;
8066 8495
8067 err_free_rq: 8496err_free_rq:
8068 kfree(cfs_rq); 8497 kfree(cfs_rq);
8069 err: 8498err:
8070 return 0; 8499 return 0;
8071} 8500}
8072 8501
@@ -8154,9 +8583,9 @@ int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent)
8154 8583
8155 return 1; 8584 return 1;
8156 8585
8157 err_free_rq: 8586err_free_rq:
8158 kfree(rt_rq); 8587 kfree(rt_rq);
8159 err: 8588err:
8160 return 0; 8589 return 0;
8161} 8590}
8162 8591
@@ -8283,12 +8712,12 @@ void sched_move_task(struct task_struct *tsk)
8283 if (unlikely(running)) 8712 if (unlikely(running))
8284 tsk->sched_class->put_prev_task(rq, tsk); 8713 tsk->sched_class->put_prev_task(rq, tsk);
8285 8714
8286 set_task_rq(tsk, task_cpu(tsk));
8287
8288#ifdef CONFIG_FAIR_GROUP_SCHED 8715#ifdef CONFIG_FAIR_GROUP_SCHED
8289 if (tsk->sched_class->moved_group) 8716 if (tsk->sched_class->task_move_group)
8290 tsk->sched_class->moved_group(tsk, on_rq); 8717 tsk->sched_class->task_move_group(tsk, on_rq);
8718 else
8291#endif 8719#endif
8720 set_task_rq(tsk, task_cpu(tsk));
8292 8721
8293 if (unlikely(running)) 8722 if (unlikely(running))
8294 tsk->sched_class->set_curr_task(rq); 8723 tsk->sched_class->set_curr_task(rq);
@@ -8514,7 +8943,7 @@ static int tg_set_bandwidth(struct task_group *tg,
8514 raw_spin_unlock(&rt_rq->rt_runtime_lock); 8943 raw_spin_unlock(&rt_rq->rt_runtime_lock);
8515 } 8944 }
8516 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); 8945 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
8517 unlock: 8946unlock:
8518 read_unlock(&tasklist_lock); 8947 read_unlock(&tasklist_lock);
8519 mutex_unlock(&rt_constraints_mutex); 8948 mutex_unlock(&rt_constraints_mutex);
8520 8949