sched: rt group scheduling

Extend group scheduling to also cover the realtime classes. It uses the time limiting introduced by the previous patch to allow multiple realtime groups. The hard time limit is required to keep behaviour deterministic. The algorithms used make the realtime scheduler O(tg), linear scaling wrt the number of task groups. This is the worst case behaviour I can't seem to get out of, the avg. case of the algorithms can be improved, I focused on correctness and worst case. [ akpm@linux-foundation.org: move side-effects out of BUG_ON(). ] Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Zijlstra <a.p.zijlstra@chello.nl> 2008-01-25 15:08:30 -0500
committer: Ingo Molnar <mingo@elte.hu> 2008-01-25 15:08:30 -0500
commit: 6f505b16425a51270058e4a93441fe64de3dd435 (patch)
tree: be21e711d93bc4d088b97c4a4f585a5044dbaa7d /kernel/sched_rt.c
parent: fa85ae2418e6843953107cd6a06f645752829bc0 (diff)
1 files changed, 336 insertions, 119 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index fd10d965aa06..1178257613ad 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -45,47 +45,167 @@ static void update_rt_migration(struct rq *rq)
 }
 #endif /* CONFIG_SMP */
-static int sched_rt_ratio_exceeded(struct rq *rq, struct rt_rq *rt_rq)
+static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
 {
+        return container_of(rt_se, struct task_struct, rt);
+}
+static inline int on_rt_rq(struct sched_rt_entity *rt_se)
+{
+        return !list_empty(&rt_se->run_list);
+}
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+{
+        if (!rt_rq->tg)
+                return SCHED_RT_FRAC;
+        return rt_rq->tg->rt_ratio;
+}
+#define for_each_leaf_rt_rq(rt_rq, rq) \
+        list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
+static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
+{
+        return rt_rq->rq;
+}
+static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
+{
+        return rt_se->rt_rq;
+}
+#define for_each_sched_rt_entity(rt_se) \
+        for (; rt_se; rt_se = rt_se->parent)
+static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+{
+        return rt_se->my_q;
+}
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+{
+        struct sched_rt_entity *rt_se = rt_rq->rt_se;
+        if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
+                enqueue_rt_entity(rt_se);
+                resched_task(rq_of_rt_rq(rt_rq)->curr);
+        }
+}
+static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+{
+        struct sched_rt_entity *rt_se = rt_rq->rt_se;
+        if (rt_se && on_rt_rq(rt_se))
+                dequeue_rt_entity(rt_se);
+}
+#else
+static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
+{
+        return sysctl_sched_rt_ratio;
+}
+#define for_each_leaf_rt_rq(rt_rq, rq) \
+        for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
+{
+        return container_of(rt_rq, struct rq, rt);
+}
+static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
+{
+        struct task_struct *p = rt_task_of(rt_se);
+        struct rq *rq = task_rq(p);
+        return &rq->rt;
+}
+#define for_each_sched_rt_entity(rt_se) \
+        for (; rt_se; rt_se = NULL)
+static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+{
+        return NULL;
+}
+static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
+{
+}
+static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
+{
+}
+#endif
+static inline int rt_se_prio(struct sched_rt_entity *rt_se)
+{
+#ifdef CONFIG_FAIR_GROUP_SCHED
+        struct rt_rq *rt_rq = group_rt_rq(rt_se);
+        if (rt_rq)
+                return rt_rq->highest_prio;
+#endif
+        return rt_task_of(rt_se)->prio;
+}
+static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
+{
+        unsigned int rt_ratio = sched_rt_ratio(rt_rq);
        u64 period, ratio;
-        if (sysctl_sched_rt_ratio == SCHED_RT_FRAC)
+        if (rt_ratio == SCHED_RT_FRAC)
                return 0;
        if (rt_rq->rt_throttled)
                return 1;
        period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
-        ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+        ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
        if (rt_rq->rt_time > ratio) {
-                rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time;
+                rt_rq->rt_throttled = 1;
+                sched_rt_ratio_dequeue(rt_rq);
                return 1;
        }
        return 0;
 }
+static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
+{
+        unsigned long rt_ratio = sched_rt_ratio(rt_rq);
+        u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
+        rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
+        if (rt_rq->rt_throttled) {
+                rt_rq->rt_throttled = 0;
+                sched_rt_ratio_enqueue(rt_rq);
+        }
+}
 static void update_sched_rt_period(struct rq *rq)
 {
-        while (rq->clock > rq->rt_period_expire) {
+        struct rt_rq *rt_rq;
-                u64 period, ratio;
+        u64 period;
+        while (rq->clock > rq->rt_period_expire) {
                period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
-                ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
-                rq->rt.rt_time -= min(rq->rt.rt_time, ratio);
                rq->rt_period_expire += period;
-        }
-        /*
+                for_each_leaf_rt_rq(rt_rq, rq)
-         * When the rt throttle is expired, let them rip.
+                        __update_sched_rt_period(rt_rq, period);
-         * (XXX: use hrtick when available)
-         */
-        if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) {
-                rq->rt.rt_throttled = 0;
-                if (!sched_rt_ratio_exceeded(rq, &rq->rt))
-                        resched_task(rq->curr);
        }
 }
@@ -96,6 +216,8 @@ static void update_sched_rt_period(struct rq *rq)
 static void update_curr_rt(struct rq *rq)
 {
        struct task_struct *curr = rq->curr;
+        struct sched_rt_entity *rt_se = &curr->rt;
+        struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
        u64 delta_exec;
        if (!task_has_rt_policy(curr))
@@ -111,95 +233,184 @@ static void update_curr_rt(struct rq *rq)
        curr->se.exec_start = rq->clock;
        cpuacct_charge(curr, delta_exec);
-        rq->rt.rt_time += delta_exec;
+        rt_rq->rt_time += delta_exec;
-        update_sched_rt_period(rq);
+        /*
-        if (sched_rt_ratio_exceeded(rq, &rq->rt))
+         * might make it a tad more accurate:
+         *
+         * update_sched_rt_period(rq);
+         */
+        if (sched_rt_ratio_exceeded(rt_rq))
                resched_task(curr);
 }
-static inline void inc_rt_tasks(struct task_struct *p, struct rq *rq)
+static inline
+void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
-        WARN_ON(!rt_task(p));
+        WARN_ON(!rt_prio(rt_se_prio(rt_se)));
-        rq->rt.rt_nr_running++;
+        rt_rq->rt_nr_running++;
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
+        if (rt_se_prio(rt_se) < rt_rq->highest_prio)
+                rt_rq->highest_prio = rt_se_prio(rt_se);
+#endif
 #ifdef CONFIG_SMP
-        if (p->prio < rq->rt.highest_prio)
+        if (rt_se->nr_cpus_allowed > 1) {
-                rq->rt.highest_prio = p->prio;
+                struct rq *rq = rq_of_rt_rq(rt_rq);
-        if (p->nr_cpus_allowed > 1)
                rq->rt.rt_nr_migratory++;
+        }
-        update_rt_migration(rq);
+        update_rt_migration(rq_of_rt_rq(rt_rq));
-#endif /* CONFIG_SMP */
+#endif
 }
-static inline void dec_rt_tasks(struct task_struct *p, struct rq *rq)
+static inline
+void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
-        WARN_ON(!rt_task(p));
+        WARN_ON(!rt_prio(rt_se_prio(rt_se)));
-        WARN_ON(!rq->rt.rt_nr_running);
+        WARN_ON(!rt_rq->rt_nr_running);
-        rq->rt.rt_nr_running--;
+        rt_rq->rt_nr_running--;
-#ifdef CONFIG_SMP
+#if defined CONFIG_SMP || defined CONFIG_FAIR_GROUP_SCHED
-        if (rq->rt.rt_nr_running) {
+        if (rt_rq->rt_nr_running) {
                struct rt_prio_array *array;
-                WARN_ON(p->prio < rq->rt.highest_prio);
+                WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
-                if (p->prio == rq->rt.highest_prio) {
+                if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
                        /* recalculate */
-                        array = &rq->rt.active;
+                        array = &rt_rq->active;
-                        rq->rt.highest_prio =
+                        rt_rq->highest_prio =
                                sched_find_first_bit(array->bitmap);
                } /* otherwise leave rq->highest prio alone */
        } else
-                rq->rt.highest_prio = MAX_RT_PRIO;
+                rt_rq->highest_prio = MAX_RT_PRIO;
-        if (p->nr_cpus_allowed > 1)
+#endif
+#ifdef CONFIG_SMP
+        if (rt_se->nr_cpus_allowed > 1) {
+                struct rq *rq = rq_of_rt_rq(rt_rq);
                rq->rt.rt_nr_migratory--;
+        }
-        update_rt_migration(rq);
+        update_rt_migration(rq_of_rt_rq(rt_rq));
 #endif /* CONFIG_SMP */
 }
-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
 {
-        struct rt_prio_array *array = &rq->rt.active;
+        struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+        struct rt_prio_array *array = &rt_rq->active;
+        struct rt_rq *group_rq = group_rt_rq(rt_se);
-        list_add_tail(&p->rt.run_list, array->queue + p->prio);
+        if (group_rq && group_rq->rt_throttled)
-        __set_bit(p->prio, array->bitmap);
+                return;
-        inc_cpu_load(rq, p->se.load.weight);
-        inc_rt_tasks(p, rq);
+        list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
+        __set_bit(rt_se_prio(rt_se), array->bitmap);
-        if (wakeup)
+        inc_rt_tasks(rt_se, rt_rq);
-                p->rt.timeout = 0;
+}
+static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+{
+        struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+        struct rt_prio_array *array = &rt_rq->active;
+        list_del_init(&rt_se->run_list);
+        if (list_empty(array->queue + rt_se_prio(rt_se)))
+                __clear_bit(rt_se_prio(rt_se), array->bitmap);
+        dec_rt_tasks(rt_se, rt_rq);
+}
+/*
+ * Because the prio of an upper entry depends on the lower
+ * entries, we must remove entries top - down.
+ *
+ * XXX: O(1/2 h^2) because we can only walk up, not down the chain.
+ *      doesn't matter much for now, as h=2 for GROUP_SCHED.
+ */
+static void dequeue_rt_stack(struct task_struct *p)
+{
+        struct sched_rt_entity *rt_se, *top_se;
+        /*
+         * dequeue all, top - down.
+         */
+        do {
+                rt_se = &p->rt;
+                top_se = NULL;
+                for_each_sched_rt_entity(rt_se) {
+                        if (on_rt_rq(rt_se))
+                                top_se = rt_se;
+                }
+                if (top_se)
+                        dequeue_rt_entity(top_se);
+        } while (top_se);
 }
 /*
 * Adding/removing a task to/from a priority array:
 */
+static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+{
+        struct sched_rt_entity *rt_se = &p->rt;
+        if (wakeup)
+                rt_se->timeout = 0;
+        dequeue_rt_stack(p);
+        /*
+         * enqueue everybody, bottom - up.
+         */
+        for_each_sched_rt_entity(rt_se)
+                enqueue_rt_entity(rt_se);
+        inc_cpu_load(rq, p->se.load.weight);
+}
 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
 {
-        struct rt_prio_array *array = &rq->rt.active;
+        struct sched_rt_entity *rt_se = &p->rt;
+        struct rt_rq *rt_rq;
        update_curr_rt(rq);
-        list_del(&p->rt.run_list);
+        dequeue_rt_stack(p);
-        if (list_empty(array->queue + p->prio))
-                __clear_bit(p->prio, array->bitmap);
+        /*
-        dec_cpu_load(rq, p->se.load.weight);
+         * re-enqueue all non-empty rt_rq entities.
+         */
+        for_each_sched_rt_entity(rt_se) {
+                rt_rq = group_rt_rq(rt_se);
+                if (rt_rq && rt_rq->rt_nr_running)
+                        enqueue_rt_entity(rt_se);
+        }
-        dec_rt_tasks(p, rq);
+        dec_cpu_load(rq, p->se.load.weight);
 }
 /*
 * Put task to the end of the run list without the overhead of dequeue
 * followed by enqueue.
 */
+static
+void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
+{
+        struct rt_prio_array *array = &rt_rq->active;
+        list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
+}
 static void requeue_task_rt(struct rq *rq, struct task_struct *p)
 {
-        struct rt_prio_array *array = &rq->rt.active;
+        struct sched_rt_entity *rt_se = &p->rt;
+        struct rt_rq *rt_rq;
-        list_move_tail(&p->rt.run_list, array->queue + p->prio);
+        for_each_sched_rt_entity(rt_se) {
+                rt_rq = rt_rq_of_se(rt_se);
+                requeue_rt_entity(rt_rq, rt_se);
+        }
 }
-static void
+static void yield_task_rt(struct rq *rq)
-yield_task_rt(struct rq *rq)
 {
        requeue_task_rt(rq, rq->curr);
 }
@@ -229,7 +440,7 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
         * cold cache anyway.
         */
        if (unlikely(rt_task(rq->curr)) &&
-            (p->nr_cpus_allowed > 1)) {
+            (p->rt.nr_cpus_allowed > 1)) {
                int cpu = find_lowest_rq(p);
                return (cpu == -1) ? task_cpu(p) : cpu;
@@ -252,27 +463,51 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
                resched_task(rq->curr);
 }
-static struct task_struct *pick_next_task_rt(struct rq *rq)
+static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
+                                                   struct rt_rq *rt_rq)
 {
-        struct rt_prio_array *array = &rq->rt.active;
+        struct rt_prio_array *array = &rt_rq->active;
-        struct task_struct *next;
+        struct sched_rt_entity *next = NULL;
        struct list_head *queue;
-        struct rt_rq *rt_rq = &rq->rt;
        int idx;
-        if (sched_rt_ratio_exceeded(rq, rt_rq))
+        if (sched_rt_ratio_exceeded(rt_rq))
-                return NULL;
+                goto out;
        idx = sched_find_first_bit(array->bitmap);
-        if (idx >= MAX_RT_PRIO)
+        BUG_ON(idx >= MAX_RT_PRIO);
-                return NULL;
        queue = array->queue + idx;
-        next = list_entry(queue->next, struct task_struct, rt.run_list);
+        next = list_entry(queue->next, struct sched_rt_entity, run_list);
+ out:
+        return next;
+}
-        next->se.exec_start = rq->clock;
+static struct task_struct *pick_next_task_rt(struct rq *rq)
+{
+        struct sched_rt_entity *rt_se;
+        struct task_struct *p;
+        struct rt_rq *rt_rq;
-        return next;
+ retry:
+        rt_rq = &rq->rt;
+        if (unlikely(!rt_rq->rt_nr_running))
+                return NULL;
+        if (sched_rt_ratio_exceeded(rt_rq))
+                return NULL;
+        do {
+                rt_se = pick_next_rt_entity(rq, rt_rq);
+                if (unlikely(!rt_se))
+                        goto retry;
+                rt_rq = group_rt_rq(rt_se);
+        } while (rt_rq);
+        p = rt_task_of(rt_se);
+        p->se.exec_start = rq->clock;
+        return p;
 }
 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
@@ -282,6 +517,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 }
 #ifdef CONFIG_SMP
 /* Only try algorithms three times */
 #define RT_MAX_TRIES 3
@@ -292,7 +528,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
        if (!task_running(rq, p) &&
            (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)) &&
-            (p->nr_cpus_allowed > 1))
+            (p->rt.nr_cpus_allowed > 1))
                return 1;
        return 0;
 }
@@ -300,52 +536,33 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 /* Return the second highest RT task, NULL otherwise */
 static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
 {
-        struct rt_prio_array *array = &rq->rt.active;
+        struct task_struct *next = NULL;
-        struct task_struct *next;
+        struct sched_rt_entity *rt_se;
-        struct list_head *queue;
+        struct rt_prio_array *array;
+        struct rt_rq *rt_rq;
        int idx;
-        if (likely(rq->rt.rt_nr_running < 2))
+        for_each_leaf_rt_rq(rt_rq, rq) {
-                return NULL;
+                array = &rt_rq->active;
+                idx = sched_find_first_bit(array->bitmap);
-        idx = sched_find_first_bit(array->bitmap);
+ next_idx:
-        if (unlikely(idx >= MAX_RT_PRIO)) {
+                if (idx >= MAX_RT_PRIO)
-                WARN_ON(1); /* rt_nr_running is bad */
+                        continue;
-                return NULL;
+                if (next && next->prio < idx)
-        }
+                        continue;
+                list_for_each_entry(rt_se, array->queue + idx, run_list) {
-        queue = array->queue + idx;
+                        struct task_struct *p = rt_task_of(rt_se);
-        BUG_ON(list_empty(queue));
+                        if (pick_rt_task(rq, p, cpu)) {
+                                next = p;
-        next = list_entry(queue->next, struct task_struct, rt.run_list);
+                                break;
-        if (unlikely(pick_rt_task(rq, next, cpu)))
+                        }
-                goto out;
+                }
+                if (!next) {
-        if (queue->next->next != queue) {
+                        idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-                /* same prio task */
+                        goto next_idx;
-                next = list_entry(queue->next->next, struct task_struct,
+                }
-                                  rt.run_list);
-                if (pick_rt_task(rq, next, cpu))
-                        goto out;
-        }
- retry:
-        /* slower, but more flexible */
-        idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
-        if (unlikely(idx >= MAX_RT_PRIO))
-                return NULL;
-        queue = array->queue + idx;
-        BUG_ON(list_empty(queue));
-        list_for_each_entry(next, queue, rt.run_list) {
-                if (pick_rt_task(rq, next, cpu))
-                        goto out;
        }
-        goto retry;
- out:
        return next;
 }
@@ -774,12 +991,12 @@ static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
         * Update the migration status of the RQ if we have an RT task
         * which is running AND changing its weight value.
         */
-        if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {
+        if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
                struct rq *rq = task_rq(p);
-                if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {
+                if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
                        rq->rt.rt_nr_migratory++;
-                } else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {
+                } else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
                        BUG_ON(!rq->rt.rt_nr_migratory);
                        rq->rt.rt_nr_migratory--;
                }
@@ -788,7 +1005,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask)
        }
        p->cpus_allowed    = *new_mask;
-        p->nr_cpus_allowed = weight;
+        p->rt.nr_cpus_allowed = weight;
 }
 /* Assumes rq->lock is held */
author	Peter Zijlstra <a.p.zijlstra@chello.nl>	2008-01-25 15:08:30 -0500
committer	Ingo Molnar <mingo@elte.hu>	2008-01-25 15:08:30 -0500
commit	6f505b16425a51270058e4a93441fe64de3dd435 (patch)
tree	be21e711d93bc4d088b97c4a4f585a5044dbaa7d /kernel/sched_rt.c
parent	fa85ae2418e6843953107cd6a06f645752829bc0 (diff)

diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index fd10d965aa06..1178257613ad 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -45,47 +45,167 @@ static void update_rt_migration(struct rq *rq)
45	}	45	}
46	#endif /* CONFIG_SMP */	46	#endif /* CONFIG_SMP */
47		47
48	static int sched_rt_ratio_exceeded(struct rq rq, struct rt_rq rt_rq)	48	static inline struct task_struct rt_task_of(struct sched_rt_entity rt_se)
49	{	49	{
		50	return container_of(rt_se, struct task_struct, rt);
		51	}
		52
		53	static inline int on_rt_rq(struct sched_rt_entity *rt_se)
		54	{
		55	return !list_empty(&rt_se->run_list);
		56	}
		57
		58	#ifdef CONFIG_FAIR_GROUP_SCHED
		59
		60	static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
		61	{
		62	if (!rt_rq->tg)
		63	return SCHED_RT_FRAC;
		64
		65	return rt_rq->tg->rt_ratio;
		66	}
		67
		68	#define for_each_leaf_rt_rq(rt_rq, rq) \
		69	list_for_each_entry(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
		70
		71	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
		72	{
		73	return rt_rq->rq;
		74	}
		75
		76	static inline struct rt_rq rt_rq_of_se(struct sched_rt_entity rt_se)
		77	{
		78	return rt_se->rt_rq;
		79	}
		80
		81	#define for_each_sched_rt_entity(rt_se) \
		82	for (; rt_se; rt_se = rt_se->parent)
		83
		84	static inline struct rt_rq group_rt_rq(struct sched_rt_entity rt_se)
		85	{
		86	return rt_se->my_q;
		87	}
		88
		89	static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
		90	static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
		91
		92	static void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
		93	{
		94	struct sched_rt_entity *rt_se = rt_rq->rt_se;
		95
		96	if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) {
		97	enqueue_rt_entity(rt_se);
		98	resched_task(rq_of_rt_rq(rt_rq)->curr);
		99	}
		100	}
		101
		102	static void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
		103	{
		104	struct sched_rt_entity *rt_se = rt_rq->rt_se;
		105
		106	if (rt_se && on_rt_rq(rt_se))
		107	dequeue_rt_entity(rt_se);
		108	}
		109
		110	#else
		111
		112	static inline unsigned int sched_rt_ratio(struct rt_rq *rt_rq)
		113	{
		114	return sysctl_sched_rt_ratio;
		115	}
		116
		117	#define for_each_leaf_rt_rq(rt_rq, rq) \
		118	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
		119
		120	static inline struct rq rq_of_rt_rq(struct rt_rq rt_rq)
		121	{
		122	return container_of(rt_rq, struct rq, rt);
		123	}
		124
		125	static inline struct rt_rq rt_rq_of_se(struct sched_rt_entity rt_se)
		126	{
		127	struct task_struct *p = rt_task_of(rt_se);
		128	struct rq *rq = task_rq(p);
		129
		130	return &rq->rt;
		131	}
		132
		133	#define for_each_sched_rt_entity(rt_se) \
		134	for (; rt_se; rt_se = NULL)
		135
		136	static inline struct rt_rq group_rt_rq(struct sched_rt_entity rt_se)
		137	{
		138	return NULL;
		139	}
		140
		141	static inline void sched_rt_ratio_enqueue(struct rt_rq *rt_rq)
		142	{
		143	}
		144
		145	static inline void sched_rt_ratio_dequeue(struct rt_rq *rt_rq)
		146	{
		147	}
		148
		149	#endif
		150
		151	static inline int rt_se_prio(struct sched_rt_entity *rt_se)
		152	{
		153	#ifdef CONFIG_FAIR_GROUP_SCHED
		154	struct rt_rq *rt_rq = group_rt_rq(rt_se);
		155
		156	if (rt_rq)
		157	return rt_rq->highest_prio;
		158	#endif
		159
		160	return rt_task_of(rt_se)->prio;
		161	}
		162
		163	static int sched_rt_ratio_exceeded(struct rt_rq *rt_rq)
		164	{
		165	unsigned int rt_ratio = sched_rt_ratio(rt_rq);
50	u64 period, ratio;	166	u64 period, ratio;
51		167
52	if (sysctl_sched_rt_ratio == SCHED_RT_FRAC)	168	if (rt_ratio == SCHED_RT_FRAC)
53	return 0;	169	return 0;
54		170
55	if (rt_rq->rt_throttled)	171	if (rt_rq->rt_throttled)
56	return 1;	172	return 1;
57		173
58	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;	174	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
59	ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;	175	ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
60		176
61	if (rt_rq->rt_time > ratio) {	177	if (rt_rq->rt_time > ratio) {
62	rt_rq->rt_throttled = rq->clock + period - rt_rq->rt_time;	178	rt_rq->rt_throttled = 1;
		179	sched_rt_ratio_dequeue(rt_rq);
63	return 1;	180	return 1;
64	}	181	}
65		182
66	return 0;	183	return 0;
67	}	184	}
68		185
		186	static void __update_sched_rt_period(struct rt_rq *rt_rq, u64 period)
		187	{
		188	unsigned long rt_ratio = sched_rt_ratio(rt_rq);
		189	u64 ratio = (period * rt_ratio) >> SCHED_RT_FRAC_SHIFT;
		190
		191	rt_rq->rt_time -= min(rt_rq->rt_time, ratio);
		192	if (rt_rq->rt_throttled) {
		193	rt_rq->rt_throttled = 0;
		194	sched_rt_ratio_enqueue(rt_rq);
		195	}
		196	}
		197
69	static void update_sched_rt_period(struct rq *rq)	198	static void update_sched_rt_period(struct rq *rq)
70	{	199	{
71	while (rq->clock > rq->rt_period_expire) {	200	struct rt_rq *rt_rq;
72	u64 period, ratio;	201	u64 period;
73		202
		203	while (rq->clock > rq->rt_period_expire) {
74	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;	204	period = (u64)sysctl_sched_rt_period * NSEC_PER_MSEC;
75	ratio = (period * sysctl_sched_rt_ratio) >> SCHED_RT_FRAC_SHIFT;
76
77	rq->rt.rt_time -= min(rq->rt.rt_time, ratio);
78	rq->rt_period_expire += period;	205	rq->rt_period_expire += period;
79	}
80		206
81	/*	207	for_each_leaf_rt_rq(rt_rq, rq)
82	* When the rt throttle is expired, let them rip.	208	__update_sched_rt_period(rt_rq, period);
83	* (XXX: use hrtick when available)
84	*/
85	if (rq->rt.rt_throttled && rq->clock > rq->rt.rt_throttled) {
86	rq->rt.rt_throttled = 0;
87	if (!sched_rt_ratio_exceeded(rq, &rq->rt))
88	resched_task(rq->curr);
89	}	209	}
90	}	210	}
91		211
@@ -96,6 +216,8 @@ static void update_sched_rt_period(struct rq *rq)
96	static void update_curr_rt(struct rq *rq)	216	static void update_curr_rt(struct rq *rq)
97	{	217	{
98	struct task_struct *curr = rq->curr;	218	struct task_struct *curr = rq->curr;
		219	struct sched_rt_entity *rt_se = &curr->rt;
		220	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
99	u64 delta_exec;	221	u64 delta_exec;
100		222
101	if (!task_has_rt_policy(curr))	223	if (!task_has_rt_policy(curr))
@@ -111,95 +233,184 @@ static void update_curr_rt(struct rq *rq)
111	curr->se.exec_start = rq->clock;	233	curr->se.exec_start = rq->clock;
112	cpuacct_charge(curr, delta_exec);	234	cpuacct_charge(curr, delta_exec);
113		235
114	rq->rt.rt_time += delta_exec;	236	rt_rq->rt_time += delta_exec;
115	update_sched_rt_period(rq);	237	/*
116	if (sched_rt_ratio_exceeded(rq, &rq->rt))	238	* might make it a tad more accurate:
		239	*
		240	* update_sched_rt_period(rq);
		241	*/
		242	if (sched_rt_ratio_exceeded(rt_rq))
117	resched_task(curr);	243	resched_task(curr);
118	}	244	}
119		245
120	static inline void inc_rt_tasks(struct task_struct p, struct rq rq)	246	static inline
		247	void inc_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
121	{	248	{
122	WARN_ON(!rt_task(p));	249	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
123	rq->rt.rt_nr_running++;	250	rt_rq->rt_nr_running++;
		251	#if defined CONFIG_SMP \|\| defined CONFIG_FAIR_GROUP_SCHED
		252	if (rt_se_prio(rt_se) < rt_rq->highest_prio)
		253	rt_rq->highest_prio = rt_se_prio(rt_se);
		254	#endif
124	#ifdef CONFIG_SMP	255	#ifdef CONFIG_SMP
125	if (p->prio < rq->rt.highest_prio)	256	if (rt_se->nr_cpus_allowed > 1) {
126	rq->rt.highest_prio = p->prio;	257	struct rq *rq = rq_of_rt_rq(rt_rq);
127	if (p->nr_cpus_allowed > 1)
128	rq->rt.rt_nr_migratory++;	258	rq->rt.rt_nr_migratory++;
		259	}
129		260
130	update_rt_migration(rq);	261	update_rt_migration(rq_of_rt_rq(rt_rq));
131	#endif /* CONFIG_SMP */	262	#endif
132	}	263	}
133		264
134	static inline void dec_rt_tasks(struct task_struct p, struct rq rq)	265	static inline
		266	void dec_rt_tasks(struct sched_rt_entity rt_se, struct rt_rq rt_rq)
135	{	267	{
136	WARN_ON(!rt_task(p));	268	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
137	WARN_ON(!rq->rt.rt_nr_running);	269	WARN_ON(!rt_rq->rt_nr_running);
138	rq->rt.rt_nr_running--;	270	rt_rq->rt_nr_running--;
139	#ifdef CONFIG_SMP	271	#if defined CONFIG_SMP \|\| defined CONFIG_FAIR_GROUP_SCHED
140	if (rq->rt.rt_nr_running) {	272	if (rt_rq->rt_nr_running) {
141	struct rt_prio_array *array;	273	struct rt_prio_array *array;
142		274
143	WARN_ON(p->prio < rq->rt.highest_prio);	275	WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
144	if (p->prio == rq->rt.highest_prio) {	276	if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
145	/* recalculate */	277	/* recalculate */
146	array = &rq->rt.active;	278	array = &rt_rq->active;
147	rq->rt.highest_prio =	279	rt_rq->highest_prio =
148	sched_find_first_bit(array->bitmap);	280	sched_find_first_bit(array->bitmap);
149	} /* otherwise leave rq->highest prio alone */	281	} /* otherwise leave rq->highest prio alone */
150	} else	282	} else
151	rq->rt.highest_prio = MAX_RT_PRIO;	283	rt_rq->highest_prio = MAX_RT_PRIO;
152	if (p->nr_cpus_allowed > 1)	284	#endif
		285	#ifdef CONFIG_SMP
		286	if (rt_se->nr_cpus_allowed > 1) {
		287	struct rq *rq = rq_of_rt_rq(rt_rq);
153	rq->rt.rt_nr_migratory--;	288	rq->rt.rt_nr_migratory--;
		289	}
154		290
155	update_rt_migration(rq);	291	update_rt_migration(rq_of_rt_rq(rt_rq));
156	#endif /* CONFIG_SMP */	292	#endif /* CONFIG_SMP */
157	}	293	}
158		294
159	static void enqueue_task_rt(struct rq rq, struct task_struct p, int wakeup)	295	static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
160	{	296	{
161	struct rt_prio_array *array = &rq->rt.active;	297	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
		298	struct rt_prio_array *array = &rt_rq->active;
		299	struct rt_rq *group_rq = group_rt_rq(rt_se);
162		300
163	list_add_tail(&p->rt.run_list, array->queue + p->prio);	301	if (group_rq && group_rq->rt_throttled)
164	__set_bit(p->prio, array->bitmap);	302	return;
165	inc_cpu_load(rq, p->se.load.weight);
166		303
167	inc_rt_tasks(p, rq);	304	list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
		305	__set_bit(rt_se_prio(rt_se), array->bitmap);
168		306
169	if (wakeup)	307	inc_rt_tasks(rt_se, rt_rq);
170	p->rt.timeout = 0;	308	}
		309
		310	static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
		311	{
		312	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
		313	struct rt_prio_array *array = &rt_rq->active;
		314
		315	list_del_init(&rt_se->run_list);
		316	if (list_empty(array->queue + rt_se_prio(rt_se)))
		317	__clear_bit(rt_se_prio(rt_se), array->bitmap);
		318
		319	dec_rt_tasks(rt_se, rt_rq);
		320	}
		321
		322	/*
		323	* Because the prio of an upper entry depends on the lower
		324	* entries, we must remove entries top - down.
		325	*
		326	* XXX: O(1/2 h^2) because we can only walk up, not down the chain.
		327	* doesn't matter much for now, as h=2 for GROUP_SCHED.
		328	*/
		329	static void dequeue_rt_stack(struct task_struct *p)
		330	{
		331	struct sched_rt_entity rt_se, top_se;
		332
		333	/*
		334	* dequeue all, top - down.
		335	*/
		336	do {
		337	rt_se = &p->rt;
		338	top_se = NULL;
		339	for_each_sched_rt_entity(rt_se) {
		340	if (on_rt_rq(rt_se))
		341	top_se = rt_se;
		342	}
		343	if (top_se)
		344	dequeue_rt_entity(top_se);
		345	} while (top_se);
171	}	346	}
172		347
173	/*	348	/*
174	* Adding/removing a task to/from a priority array:	349	* Adding/removing a task to/from a priority array:
175	*/	350	*/
		351	static void enqueue_task_rt(struct rq rq, struct task_struct p, int wakeup)
		352	{
		353	struct sched_rt_entity *rt_se = &p->rt;
		354
		355	if (wakeup)
		356	rt_se->timeout = 0;
		357
		358	dequeue_rt_stack(p);
		359
		360	/*
		361	* enqueue everybody, bottom - up.
		362	*/
		363	for_each_sched_rt_entity(rt_se)
		364	enqueue_rt_entity(rt_se);
		365
		366	inc_cpu_load(rq, p->se.load.weight);
		367	}
		368
176	static void dequeue_task_rt(struct rq rq, struct task_struct p, int sleep)	369	static void dequeue_task_rt(struct rq rq, struct task_struct p, int sleep)
177	{	370	{
178	struct rt_prio_array *array = &rq->rt.active;	371	struct sched_rt_entity *rt_se = &p->rt;
		372	struct rt_rq *rt_rq;
179		373
180	update_curr_rt(rq);	374	update_curr_rt(rq);
181		375
182	list_del(&p->rt.run_list);	376	dequeue_rt_stack(p);
183	if (list_empty(array->queue + p->prio))	377
184	__clear_bit(p->prio, array->bitmap);	378	/*
185	dec_cpu_load(rq, p->se.load.weight);	379	* re-enqueue all non-empty rt_rq entities.
		380	*/
		381	for_each_sched_rt_entity(rt_se) {
		382	rt_rq = group_rt_rq(rt_se);
		383	if (rt_rq && rt_rq->rt_nr_running)
		384	enqueue_rt_entity(rt_se);
		385	}
186		386
187	dec_rt_tasks(p, rq);	387	dec_cpu_load(rq, p->se.load.weight);
188	}	388	}
189		389
190	/*	390	/*
191	* Put task to the end of the run list without the overhead of dequeue	391	* Put task to the end of the run list without the overhead of dequeue
192	* followed by enqueue.	392	* followed by enqueue.
193	*/	393	*/
		394	static
		395	void requeue_rt_entity(struct rt_rq rt_rq, struct sched_rt_entity rt_se)
		396	{
		397	struct rt_prio_array *array = &rt_rq->active;
		398
		399	list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se));
		400	}
		401
194	static void requeue_task_rt(struct rq rq, struct task_struct p)	402	static void requeue_task_rt(struct rq rq, struct task_struct p)
195	{	403	{
196	struct rt_prio_array *array = &rq->rt.active;	404	struct sched_rt_entity *rt_se = &p->rt;
		405	struct rt_rq *rt_rq;
197		406
198	list_move_tail(&p->rt.run_list, array->queue + p->prio);	407	for_each_sched_rt_entity(rt_se) {
		408	rt_rq = rt_rq_of_se(rt_se);
		409	requeue_rt_entity(rt_rq, rt_se);
		410	}
199	}	411	}
200		412
201	static void	413	static void yield_task_rt(struct rq *rq)
202	yield_task_rt(struct rq *rq)
203	{	414	{
204	requeue_task_rt(rq, rq->curr);	415	requeue_task_rt(rq, rq->curr);
205	}	416	}
@@ -229,7 +440,7 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
229	* cold cache anyway.	440	* cold cache anyway.
230	*/	441	*/
231	if (unlikely(rt_task(rq->curr)) &&	442	if (unlikely(rt_task(rq->curr)) &&
232	(p->nr_cpus_allowed > 1)) {	443	(p->rt.nr_cpus_allowed > 1)) {
233	int cpu = find_lowest_rq(p);	444	int cpu = find_lowest_rq(p);
234		445
235	return (cpu == -1) ? task_cpu(p) : cpu;	446	return (cpu == -1) ? task_cpu(p) : cpu;
@@ -252,27 +463,51 @@ static void check_preempt_curr_rt(struct rq rq, struct task_struct p)
252	resched_task(rq->curr);	463	resched_task(rq->curr);
253	}	464	}
254		465
255	static struct task_struct pick_next_task_rt(struct rq rq)	466	static struct sched_rt_entity pick_next_rt_entity(struct rq rq,
		467	struct rt_rq *rt_rq)
256	{	468	{
257	struct rt_prio_array *array = &rq->rt.active;	469	struct rt_prio_array *array = &rt_rq->active;
258	struct task_struct *next;	470	struct sched_rt_entity *next = NULL;
259	struct list_head *queue;	471	struct list_head *queue;
260	struct rt_rq *rt_rq = &rq->rt;
261	int idx;	472	int idx;
262		473
263	if (sched_rt_ratio_exceeded(rq, rt_rq))	474	if (sched_rt_ratio_exceeded(rt_rq))
264	return NULL;	475	goto out;
265		476
266	idx = sched_find_first_bit(array->bitmap);	477	idx = sched_find_first_bit(array->bitmap);
267	if (idx >= MAX_RT_PRIO)	478	BUG_ON(idx >= MAX_RT_PRIO);
268	return NULL;
269		479
270	queue = array->queue + idx;	480	queue = array->queue + idx;
271	next = list_entry(queue->next, struct task_struct, rt.run_list);	481	next = list_entry(queue->next, struct sched_rt_entity, run_list);
		482	out:
		483	return next;
		484	}
272		485
273	next->se.exec_start = rq->clock;	486	static struct task_struct pick_next_task_rt(struct rq rq)
		487	{
		488	struct sched_rt_entity *rt_se;
		489	struct task_struct *p;
		490	struct rt_rq *rt_rq;
274		491
275	return next;	492	retry:
		493	rt_rq = &rq->rt;
		494
		495	if (unlikely(!rt_rq->rt_nr_running))
		496	return NULL;
		497
		498	if (sched_rt_ratio_exceeded(rt_rq))
		499	return NULL;
		500
		501	do {
		502	rt_se = pick_next_rt_entity(rq, rt_rq);
		503	if (unlikely(!rt_se))
		504	goto retry;
		505	rt_rq = group_rt_rq(rt_se);
		506	} while (rt_rq);
		507
		508	p = rt_task_of(rt_se);
		509	p->se.exec_start = rq->clock;
		510	return p;
276	}	511	}
277		512
278	static void put_prev_task_rt(struct rq rq, struct task_struct p)	513	static void put_prev_task_rt(struct rq rq, struct task_struct p)
@@ -282,6 +517,7 @@ static void put_prev_task_rt(struct rq rq, struct task_struct p)
282	}	517	}
283		518
284	#ifdef CONFIG_SMP	519	#ifdef CONFIG_SMP
		520
285	/* Only try algorithms three times */	521	/* Only try algorithms three times */
286	#define RT_MAX_TRIES 3	522	#define RT_MAX_TRIES 3
287		523
@@ -292,7 +528,7 @@ static int pick_rt_task(struct rq rq, struct task_struct p, int cpu)
292	{	528	{
293	if (!task_running(rq, p) &&	529	if (!task_running(rq, p) &&
294	(cpu < 0 \|\| cpu_isset(cpu, p->cpus_allowed)) &&	530	(cpu < 0 \|\| cpu_isset(cpu, p->cpus_allowed)) &&
295	(p->nr_cpus_allowed > 1))	531	(p->rt.nr_cpus_allowed > 1))
296	return 1;	532	return 1;
297	return 0;	533	return 0;
298	}	534	}
@@ -300,52 +536,33 @@ static int pick_rt_task(struct rq rq, struct task_struct p, int cpu)
300	/* Return the second highest RT task, NULL otherwise */	536	/* Return the second highest RT task, NULL otherwise */
301	static struct task_struct pick_next_highest_task_rt(struct rq rq, int cpu)	537	static struct task_struct pick_next_highest_task_rt(struct rq rq, int cpu)
302	{	538	{
303	struct rt_prio_array *array = &rq->rt.active;	539	struct task_struct *next = NULL;
304	struct task_struct *next;	540	struct sched_rt_entity *rt_se;
305	struct list_head *queue;	541	struct rt_prio_array *array;
		542	struct rt_rq *rt_rq;
306	int idx;	543	int idx;
307		544
308	if (likely(rq->rt.rt_nr_running < 2))	545	for_each_leaf_rt_rq(rt_rq, rq) {
309	return NULL;	546	array = &rt_rq->active;
310		547	idx = sched_find_first_bit(array->bitmap);
311	idx = sched_find_first_bit(array->bitmap);	548	next_idx:
312	if (unlikely(idx >= MAX_RT_PRIO)) {	549	if (idx >= MAX_RT_PRIO)
313	WARN_ON(1); /* rt_nr_running is bad */	550	continue;
314	return NULL;	551	if (next && next->prio < idx)
315	}	552	continue;
316		553	list_for_each_entry(rt_se, array->queue + idx, run_list) {
317	queue = array->queue + idx;	554	struct task_struct *p = rt_task_of(rt_se);
318	BUG_ON(list_empty(queue));	555	if (pick_rt_task(rq, p, cpu)) {
319		556	next = p;
320	next = list_entry(queue->next, struct task_struct, rt.run_list);	557	break;
321	if (unlikely(pick_rt_task(rq, next, cpu)))	558	}
322	goto out;	559	}
323		560	if (!next) {
324	if (queue->next->next != queue) {	561	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
325	/* same prio task */	562	goto next_idx;
326	next = list_entry(queue->next->next, struct task_struct,	563	}
327	rt.run_list);
328	if (pick_rt_task(rq, next, cpu))
329	goto out;
330	}
331
332	retry:
333	/* slower, but more flexible */
334	idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
335	if (unlikely(idx >= MAX_RT_PRIO))
336	return NULL;
337
338	queue = array->queue + idx;
339	BUG_ON(list_empty(queue));
340
341	list_for_each_entry(next, queue, rt.run_list) {
342	if (pick_rt_task(rq, next, cpu))
343	goto out;
344	}	564	}
345		565
346	goto retry;
347
348	out:
349	return next;	566	return next;
350	}	567	}
351		568
@@ -774,12 +991,12 @@ static void set_cpus_allowed_rt(struct task_struct p, cpumask_t new_mask)
774	* Update the migration status of the RQ if we have an RT task	991	* Update the migration status of the RQ if we have an RT task
775	* which is running AND changing its weight value.	992	* which is running AND changing its weight value.
776	*/	993	*/
777	if (p->se.on_rq && (weight != p->nr_cpus_allowed)) {	994	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
778	struct rq *rq = task_rq(p);	995	struct rq *rq = task_rq(p);
779		996
780	if ((p->nr_cpus_allowed <= 1) && (weight > 1)) {	997	if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
781	rq->rt.rt_nr_migratory++;	998	rq->rt.rt_nr_migratory++;
782	} else if ((p->nr_cpus_allowed > 1) && (weight <= 1)) {	999	} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
783	BUG_ON(!rq->rt.rt_nr_migratory);	1000	BUG_ON(!rq->rt.rt_nr_migratory);
784	rq->rt.rt_nr_migratory--;	1001	rq->rt.rt_nr_migratory--;
785	}	1002	}
@@ -788,7 +1005,7 @@ static void set_cpus_allowed_rt(struct task_struct p, cpumask_t new_mask)
788	}	1005	}
789		1006
790	p->cpus_allowed = *new_mask;	1007	p->cpus_allowed = *new_mask;
791	p->nr_cpus_allowed = weight;	1008	p->rt.nr_cpus_allowed = weight;
792	}	1009	}
793		1010
794	/* Assumes rq->lock is held */	1011	/* Assumes rq->lock is held */