1 files changed, 158 insertions, 58 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 257002c13bb0..7f12624a393c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
 #endif
 #include "sched.h"
-#include "../workqueue_sched.h"
+#include "../workqueue_internal.h"
 #include "../smpboot.h"
 #define CREATE_TRACE_POINTS
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process);
 */
 static int select_fallback_rq(int cpu, struct task_struct *p)
 {
-        const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+        int nid = cpu_to_node(cpu);
+        const struct cpumask *nodemask = NULL;
        enum { cpuset, possible, fail } state = cpuset;
        int dest_cpu;
-        /* Look for allowed, online CPU in same node. */
+        /*
-        for_each_cpu(dest_cpu, nodemask) {
+         * If the node that the cpu is on has been offlined, cpu_to_node()
-                if (!cpu_online(dest_cpu))
+         * will return -1. There is no cpu on the node, and we should
-                        continue;
+         * select the cpu on the other node.
-                if (!cpu_active(dest_cpu))
+         */
-                        continue;
+        if (nid != -1) {
-                if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+                nodemask = cpumask_of_node(nid);
-                        return dest_cpu;
+                /* Look for allowed, online CPU in same node. */
+                for_each_cpu(dest_cpu, nodemask) {
+                        if (!cpu_online(dest_cpu))
+                                continue;
+                        if (!cpu_active(dest_cpu))
+                                continue;
+                        if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
+                                return dest_cpu;
+                }
        }
        for (;;) {
@@ -1523,7 +1533,8 @@ out:
 */
 int wake_up_process(struct task_struct *p)
 {
-        return try_to_wake_up(p, TASK_ALL, 0);
+        WARN_ON(task_is_stopped_or_traced(p));
+        return try_to_wake_up(p, TASK_NORMAL, 0);
 }
 EXPORT_SYMBOL(wake_up_process);
@@ -1741,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
 static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
 {
        struct preempt_notifier *notifier;
-        struct hlist_node *node;
-        hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+        hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
                notifier->ops->sched_in(notifier, raw_smp_processor_id());
 }
@@ -1752,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
                                 struct task_struct *next)
 {
        struct preempt_notifier *notifier;
-        struct hlist_node *node;
-        hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+        hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
                notifier->ops->sched_out(notifier, next);
 }
@@ -1968,11 +1977,10 @@ context_switch(struct rq *rq, struct task_struct *prev,
 }
 /*
- * nr_running, nr_uninterruptible and nr_context_switches:
+ * nr_running and nr_context_switches:
 *
 * externally visible scheduler statistics: current number of runnable
- * threads, current number of uninterruptible-sleeping threads, total
+ * threads, total number of context switches performed since bootup.
- * number of context switches performed since bootup.
 */
 unsigned long nr_running(void)
 {
@@ -1984,23 +1992,6 @@ unsigned long nr_running(void)
        return sum;
 }
-unsigned long nr_uninterruptible(void)
-{
-        unsigned long i, sum = 0;
-        for_each_possible_cpu(i)
-                sum += cpu_rq(i)->nr_uninterruptible;
-        /*
-         * Since we read the counters lockless, it might be slightly
-         * inaccurate. Do not allow it to go below zero though:
-         */
-        if (unlikely((long)sum < 0))
-                sum = 0;
-        return sum;
-}
 unsigned long long nr_context_switches(void)
 {
        int i;
@@ -2785,7 +2776,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
        if (irqs_disabled())
                print_irqtrace_events(prev);
        dump_stack();
-        add_taint(TAINT_WARN);
+        add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 }
 /*
@@ -3267,7 +3258,8 @@ void complete_all(struct completion *x)
 EXPORT_SYMBOL(complete_all);
 static inline long __sched
-do_wait_for_common(struct completion *x, long timeout, int state)
+do_wait_for_common(struct completion *x,
+                   long (*action)(long), long timeout, int state)
 {
        if (!x->done) {
                DECLARE_WAITQUEUE(wait, current);
@@ -3280,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
                        }
                        __set_current_state(state);
                        spin_unlock_irq(&x->wait.lock);
-                        timeout = schedule_timeout(timeout);
+                        timeout = action(timeout);
                        spin_lock_irq(&x->wait.lock);
                } while (!x->done && timeout);
                __remove_wait_queue(&x->wait, &wait);
@@ -3291,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
        return timeout ?: 1;
 }
-static long __sched
+static inline long __sched
-wait_for_common(struct completion *x, long timeout, int state)
+__wait_for_common(struct completion *x,
+                  long (*action)(long), long timeout, int state)
 {
        might_sleep();
        spin_lock_irq(&x->wait.lock);
-        timeout = do_wait_for_common(x, timeout, state);
+        timeout = do_wait_for_common(x, action, timeout, state);
        spin_unlock_irq(&x->wait.lock);
        return timeout;
 }
+static long __sched
+wait_for_common(struct completion *x, long timeout, int state)
+{
+        return __wait_for_common(x, schedule_timeout, timeout, state);
+}
+static long __sched
+wait_for_common_io(struct completion *x, long timeout, int state)
+{
+        return __wait_for_common(x, io_schedule_timeout, timeout, state);
+}
 /**
 * wait_for_completion: - waits for completion of a task
 * @x:  holds the state of this particular completion
@@ -3338,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
 EXPORT_SYMBOL(wait_for_completion_timeout);
 /**
+ * wait_for_completion_io: - waits for completion of a task
+ * @x:  holds the state of this particular completion
+ *
+ * This waits to be signaled for completion of a specific task. It is NOT
+ * interruptible and there is no timeout. The caller is accounted as waiting
+ * for IO.
+ */
+void __sched wait_for_completion_io(struct completion *x)
+{
+        wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io);
+/**
+ * wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
+ * @x:  holds the state of this particular completion
+ * @timeout:  timeout value in jiffies
+ *
+ * This waits for either a completion of a specific task to be signaled or for a
+ * specified timeout to expire. The timeout is in jiffies. It is not
+ * interruptible. The caller is accounted as waiting for IO.
+ *
+ * The return value is 0 if timed out, and positive (at least 1, or number of
+ * jiffies left till timeout) if completed.
+ */
+unsigned long __sched
+wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
+{
+        return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
+}
+EXPORT_SYMBOL(wait_for_completion_io_timeout);
+/**
 * wait_for_completion_interruptible: - waits for completion of a task (w/intr)
 * @x:  holds the state of this particular completion
 *
@@ -4363,20 +4401,32 @@ EXPORT_SYMBOL(yield);
 * It's the caller's job to ensure that the target task struct
 * can't go away on us before we can do any checks.
 *
- * Returns true if we indeed boosted the target task.
+ * Returns:
+ *      true (>0) if we indeed boosted the target task.
+ *      false (0) if we failed to boost the target.
+ *      -ESRCH if there's no task to yield to.
 */
 bool __sched yield_to(struct task_struct *p, bool preempt)
 {
        struct task_struct *curr = current;
        struct rq *rq, *p_rq;
        unsigned long flags;
-        bool yielded = 0;
+        int yielded = 0;
        local_irq_save(flags);
        rq = this_rq();
 again:
        p_rq = task_rq(p);
+        /*
+         * If we're the only runnable task on the rq and target rq also
+         * has only one task, there's absolutely no point in yielding.
+         */
+        if (rq->nr_running == 1 && p_rq->nr_running == 1) {
+                yielded = -ESRCH;
+                goto out_irq;
+        }
        double_rq_lock(rq, p_rq);
        while (task_rq(p) != p_rq) {
                double_rq_unlock(rq, p_rq);
@@ -4384,13 +4434,13 @@ again:
        }
        if (!curr->sched_class->yield_to_task)
-                goto out;
+                goto out_unlock;
        if (curr->sched_class != p->sched_class)
-                goto out;
+                goto out_unlock;
        if (task_running(p_rq, p) || p->state)
-                goto out;
+                goto out_unlock;
        yielded = curr->sched_class->yield_to_task(rq, p, preempt);
        if (yielded) {
@@ -4403,11 +4453,12 @@ again:
                        resched_task(p_rq->curr);
        }
-out:
+out_unlock:
        double_rq_unlock(rq, p_rq);
+out_irq:
        local_irq_restore(flags);
-        if (yielded)
+        if (yielded > 0)
                schedule();
        return yielded;
@@ -4666,6 +4717,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
         */
        idle->sched_class = &idle_sched_class;
        ftrace_graph_init_idle_task(idle, cpu);
+        vtime_init_idle(idle);
 #if defined(CONFIG_SMP)
        sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
 #endif
@@ -7159,7 +7211,6 @@ static void free_sched_group(struct task_group *tg)
 struct task_group *sched_create_group(struct task_group *parent)
 {
        struct task_group *tg;
-        unsigned long flags;
        tg = kzalloc(sizeof(*tg), GFP_KERNEL);
        if (!tg)
@@ -7171,6 +7222,17 @@ struct task_group *sched_create_group(struct task_group *parent)
        if (!alloc_rt_sched_group(tg, parent))
                goto err;
+        return tg;
+err:
+        free_sched_group(tg);
+        return ERR_PTR(-ENOMEM);
+}
+void sched_online_group(struct task_group *tg, struct task_group *parent)
+{
+        unsigned long flags;
        spin_lock_irqsave(&task_group_lock, flags);
        list_add_rcu(&tg->list, &task_groups);
@@ -7180,12 +7242,6 @@ struct task_group *sched_create_group(struct task_group *parent)
        INIT_LIST_HEAD(&tg->children);
        list_add_rcu(&tg->siblings, &parent->children);
        spin_unlock_irqrestore(&task_group_lock, flags);
-        return tg;
-err:
-        free_sched_group(tg);
-        return ERR_PTR(-ENOMEM);
 }
 /* rcu callback to free various structures associated with a task group */
@@ -7198,6 +7254,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
 /* Destroy runqueue etc associated with a task group */
 void sched_destroy_group(struct task_group *tg)
 {
+        /* wait for possible concurrent references to cfs_rqs complete */
+        call_rcu(&tg->rcu, free_sched_group_rcu);
+}
+void sched_offline_group(struct task_group *tg)
+{
        unsigned long flags;
        int i;
@@ -7209,9 +7271,6 @@ void sched_destroy_group(struct task_group *tg)
        list_del_rcu(&tg->list);
        list_del_rcu(&tg->siblings);
        spin_unlock_irqrestore(&task_group_lock, flags);
-        /* wait for possible concurrent references to cfs_rqs complete */
-        call_rcu(&tg->rcu, free_sched_group_rcu);
 }
 /* change task's runqueue when it moves between groups.
@@ -7507,6 +7566,25 @@ static int sched_rt_global_constraints(void)
 }
 #endif /* CONFIG_RT_GROUP_SCHED */
+int sched_rr_handler(struct ctl_table *table, int write,
+                void __user *buffer, size_t *lenp,
+                loff_t *ppos)
+{
+        int ret;
+        static DEFINE_MUTEX(mutex);
+        mutex_lock(&mutex);
+        ret = proc_dointvec(table, write, buffer, lenp, ppos);
+        /* make sure that internally we keep jiffies */
+        /* also, writing zero resets timeslice to default */
+        if (!ret && write) {
+                sched_rr_timeslice = sched_rr_timeslice <= 0 ?
+                        RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
+        }
+        mutex_unlock(&mutex);
+        return ret;
+}
 int sched_rt_handler(struct ctl_table *table, int write,
                void __user *buffer, size_t *lenp,
                loff_t *ppos)
@@ -7563,6 +7641,19 @@ static struct cgroup_subsys_state *cpu_cgroup_css_alloc(struct cgroup *cgrp)
        return &tg->css;
 }
+static int cpu_cgroup_css_online(struct cgroup *cgrp)
+{
+        struct task_group *tg = cgroup_tg(cgrp);
+        struct task_group *parent;
+        if (!cgrp->parent)
+                return 0;
+        parent = cgroup_tg(cgrp->parent);
+        sched_online_group(tg, parent);
+        return 0;
+}
 static void cpu_cgroup_css_free(struct cgroup *cgrp)
 {
        struct task_group *tg = cgroup_tg(cgrp);
@@ -7570,6 +7661,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
        sched_destroy_group(tg);
 }
+static void cpu_cgroup_css_offline(struct cgroup *cgrp)
+{
+        struct task_group *tg = cgroup_tg(cgrp);
+        sched_offline_group(tg);
+}
 static int cpu_cgroup_can_attach(struct cgroup *cgrp,
                                 struct cgroup_taskset *tset)
 {
@@ -7925,6 +8023,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
        .name           = "cpu",
        .css_alloc      = cpu_cgroup_css_alloc,
        .css_free       = cpu_cgroup_css_free,
+        .css_online     = cpu_cgroup_css_online,
+        .css_offline    = cpu_cgroup_css_offline,
        .can_attach     = cpu_cgroup_can_attach,
        .attach         = cpu_cgroup_attach,
        .exit           = cpu_cgroup_exit,

diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..7f12624a393c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c
@@ -83,7 +83,7 @@
83	#endif	83	#endif
84		84
85	#include "sched.h"	85	#include "sched.h"
86	#include "../workqueue_sched.h"	86	#include "../workqueue_internal.h"
87	#include "../smpboot.h"	87	#include "../smpboot.h"
88		88
89	#define CREATE_TRACE_POINTS	89	#define CREATE_TRACE_POINTS
@@ -1132,18 +1132,28 @@ EXPORT_SYMBOL_GPL(kick_process);
1132	*/	1132	*/
1133	static int select_fallback_rq(int cpu, struct task_struct *p)	1133	static int select_fallback_rq(int cpu, struct task_struct *p)
1134	{	1134	{
1135	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));	1135	int nid = cpu_to_node(cpu);
		1136	const struct cpumask *nodemask = NULL;
1136	enum { cpuset, possible, fail } state = cpuset;	1137	enum { cpuset, possible, fail } state = cpuset;
1137	int dest_cpu;	1138	int dest_cpu;
1138		1139
1139	/* Look for allowed, online CPU in same node. */	1140	/*
1140	for_each_cpu(dest_cpu, nodemask) {	1141	* If the node that the cpu is on has been offlined, cpu_to_node()
1141	if (!cpu_online(dest_cpu))	1142	* will return -1. There is no cpu on the node, and we should
1142	continue;	1143	* select the cpu on the other node.
1143	if (!cpu_active(dest_cpu))	1144	*/
1144	continue;	1145	if (nid != -1) {
1145	if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))	1146	nodemask = cpumask_of_node(nid);
1146	return dest_cpu;	1147
		1148	/* Look for allowed, online CPU in same node. */
		1149	for_each_cpu(dest_cpu, nodemask) {
		1150	if (!cpu_online(dest_cpu))
		1151	continue;
		1152	if (!cpu_active(dest_cpu))
		1153	continue;
		1154	if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
		1155	return dest_cpu;
		1156	}
1147	}	1157	}
1148		1158
1149	for (;;) {	1159	for (;;) {
@@ -1523,7 +1533,8 @@ out:
1523	*/	1533	*/
1524	int wake_up_process(struct task_struct *p)	1534	int wake_up_process(struct task_struct *p)
1525	{	1535	{
1526	return try_to_wake_up(p, TASK_ALL, 0);	1536	WARN_ON(task_is_stopped_or_traced(p));
		1537	return try_to_wake_up(p, TASK_NORMAL, 0);
1527	}	1538	}
1528	EXPORT_SYMBOL(wake_up_process);	1539	EXPORT_SYMBOL(wake_up_process);
1529		1540
@@ -1741,9 +1752,8 @@ EXPORT_SYMBOL_GPL(preempt_notifier_unregister);
1741	static void fire_sched_in_preempt_notifiers(struct task_struct *curr)	1752	static void fire_sched_in_preempt_notifiers(struct task_struct *curr)
1742	{	1753	{
1743	struct preempt_notifier *notifier;	1754	struct preempt_notifier *notifier;
1744	struct hlist_node *node;
1745		1755
1746	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)	1756	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
1747	notifier->ops->sched_in(notifier, raw_smp_processor_id());	1757	notifier->ops->sched_in(notifier, raw_smp_processor_id());
1748	}	1758	}
1749		1759
@@ -1752,9 +1762,8 @@ fire_sched_out_preempt_notifiers(struct task_struct *curr,
1752	struct task_struct *next)	1762	struct task_struct *next)
1753	{	1763	{
1754	struct preempt_notifier *notifier;	1764	struct preempt_notifier *notifier;
1755	struct hlist_node *node;
1756		1765
1757	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)	1766	hlist_for_each_entry(notifier, &curr->preempt_notifiers, link)
1758	notifier->ops->sched_out(notifier, next);	1767	notifier->ops->sched_out(notifier, next);
1759	}	1768	}
1760		1769
@@ -1968,11 +1977,10 @@ context_switch(struct rq rq, struct task_struct prev,
1968	}	1977	}
1969		1978
1970	/*	1979	/*
1971	* nr_running, nr_uninterruptible and nr_context_switches:	1980	* nr_running and nr_context_switches:
1972	*	1981	*
1973	* externally visible scheduler statistics: current number of runnable	1982	* externally visible scheduler statistics: current number of runnable
1974	* threads, current number of uninterruptible-sleeping threads, total	1983	* threads, total number of context switches performed since bootup.
1975	* number of context switches performed since bootup.
1976	*/	1984	*/
1977	unsigned long nr_running(void)	1985	unsigned long nr_running(void)
1978	{	1986	{
@@ -1984,23 +1992,6 @@ unsigned long nr_running(void)
1984	return sum;	1992	return sum;
1985	}	1993	}
1986		1994
1987	unsigned long nr_uninterruptible(void)
1988	{
1989	unsigned long i, sum = 0;
1990
1991	for_each_possible_cpu(i)
1992	sum += cpu_rq(i)->nr_uninterruptible;
1993
1994	/*
1995	* Since we read the counters lockless, it might be slightly
1996	* inaccurate. Do not allow it to go below zero though:
1997	*/
1998	if (unlikely((long)sum < 0))
1999	sum = 0;
2000
2001	return sum;
2002	}
2003
2004	unsigned long long nr_context_switches(void)	1995	unsigned long long nr_context_switches(void)
2005	{	1996	{
2006	int i;	1997	int i;
@@ -2785,7 +2776,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
2785	if (irqs_disabled())	2776	if (irqs_disabled())
2786	print_irqtrace_events(prev);	2777	print_irqtrace_events(prev);
2787	dump_stack();	2778	dump_stack();
2788	add_taint(TAINT_WARN);	2779	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
2789	}	2780	}
2790		2781
2791	/*	2782	/*
@@ -3267,7 +3258,8 @@ void complete_all(struct completion *x)
3267	EXPORT_SYMBOL(complete_all);	3258	EXPORT_SYMBOL(complete_all);
3268		3259
3269	static inline long __sched	3260	static inline long __sched
3270	do_wait_for_common(struct completion *x, long timeout, int state)	3261	do_wait_for_common(struct completion *x,
		3262	long (*action)(long), long timeout, int state)
3271	{	3263	{
3272	if (!x->done) {	3264	if (!x->done) {
3273	DECLARE_WAITQUEUE(wait, current);	3265	DECLARE_WAITQUEUE(wait, current);
@@ -3280,7 +3272,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
3280	}	3272	}
3281	__set_current_state(state);	3273	__set_current_state(state);
3282	spin_unlock_irq(&x->wait.lock);	3274	spin_unlock_irq(&x->wait.lock);
3283	timeout = schedule_timeout(timeout);	3275	timeout = action(timeout);
3284	spin_lock_irq(&x->wait.lock);	3276	spin_lock_irq(&x->wait.lock);
3285	} while (!x->done && timeout);	3277	} while (!x->done && timeout);
3286	__remove_wait_queue(&x->wait, &wait);	3278	__remove_wait_queue(&x->wait, &wait);
@@ -3291,17 +3283,30 @@ do_wait_for_common(struct completion *x, long timeout, int state)
3291	return timeout ?: 1;	3283	return timeout ?: 1;
3292	}	3284	}
3293		3285
3294	static long __sched	3286	static inline long __sched
3295	wait_for_common(struct completion *x, long timeout, int state)	3287	__wait_for_common(struct completion *x,
		3288	long (*action)(long), long timeout, int state)
3296	{	3289	{
3297	might_sleep();	3290	might_sleep();
3298		3291
3299	spin_lock_irq(&x->wait.lock);	3292	spin_lock_irq(&x->wait.lock);
3300	timeout = do_wait_for_common(x, timeout, state);	3293	timeout = do_wait_for_common(x, action, timeout, state);
3301	spin_unlock_irq(&x->wait.lock);	3294	spin_unlock_irq(&x->wait.lock);
3302	return timeout;	3295	return timeout;
3303	}	3296	}
3304		3297
		3298	static long __sched
		3299	wait_for_common(struct completion *x, long timeout, int state)
		3300	{
		3301	return __wait_for_common(x, schedule_timeout, timeout, state);
		3302	}
		3303
		3304	static long __sched
		3305	wait_for_common_io(struct completion *x, long timeout, int state)
		3306	{
		3307	return __wait_for_common(x, io_schedule_timeout, timeout, state);
		3308	}
		3309
3305	/**	3310	/**
3306	* wait_for_completion: - waits for completion of a task	3311	* wait_for_completion: - waits for completion of a task
3307	* @x: holds the state of this particular completion	3312	* @x: holds the state of this particular completion
@@ -3338,6 +3343,39 @@ wait_for_completion_timeout(struct completion *x, unsigned long timeout)
3338	EXPORT_SYMBOL(wait_for_completion_timeout);	3343	EXPORT_SYMBOL(wait_for_completion_timeout);
3339		3344
3340	/**	3345	/**
		3346	* wait_for_completion_io: - waits for completion of a task
		3347	* @x: holds the state of this particular completion
		3348	*
		3349	* This waits to be signaled for completion of a specific task. It is NOT
		3350	* interruptible and there is no timeout. The caller is accounted as waiting
		3351	* for IO.
		3352	*/
		3353	void __sched wait_for_completion_io(struct completion *x)
		3354	{
		3355	wait_for_common_io(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE);
		3356	}
		3357	EXPORT_SYMBOL(wait_for_completion_io);
		3358
		3359	/**
		3360	* wait_for_completion_io_timeout: - waits for completion of a task (w/timeout)
		3361	* @x: holds the state of this particular completion
		3362	* @timeout: timeout value in jiffies
		3363	*
		3364	* This waits for either a completion of a specific task to be signaled or for a
		3365	* specified timeout to expire. The timeout is in jiffies. It is not
		3366	* interruptible. The caller is accounted as waiting for IO.
		3367	*
		3368	* The return value is 0 if timed out, and positive (at least 1, or number of
		3369	* jiffies left till timeout) if completed.
		3370	*/
		3371	unsigned long __sched
		3372	wait_for_completion_io_timeout(struct completion *x, unsigned long timeout)
		3373	{
		3374	return wait_for_common_io(x, timeout, TASK_UNINTERRUPTIBLE);
		3375	}
		3376	EXPORT_SYMBOL(wait_for_completion_io_timeout);
		3377
		3378	/**
3341	* wait_for_completion_interruptible: - waits for completion of a task (w/intr)	3379	* wait_for_completion_interruptible: - waits for completion of a task (w/intr)
3342	* @x: holds the state of this particular completion	3380	* @x: holds the state of this particular completion
3343	*	3381	*
@@ -4363,20 +4401,32 @@ EXPORT_SYMBOL(yield);
4363	* It's the caller's job to ensure that the target task struct	4401	* It's the caller's job to ensure that the target task struct
4364	* can't go away on us before we can do any checks.	4402	* can't go away on us before we can do any checks.
4365	*	4403	*
4366	* Returns true if we indeed boosted the target task.	4404	* Returns:
		4405	* true (>0) if we indeed boosted the target task.
		4406	* false (0) if we failed to boost the target.
		4407	* -ESRCH if there's no task to yield to.
4367	*/	4408	*/
4368	bool __sched yield_to(struct task_struct *p, bool preempt)	4409	bool __sched yield_to(struct task_struct *p, bool preempt)
4369	{	4410	{
4370	struct task_struct *curr = current;	4411	struct task_struct *curr = current;
4371	struct rq rq, p_rq;	4412	struct rq rq, p_rq;
4372	unsigned long flags;	4413	unsigned long flags;
4373	bool yielded = 0;	4414	int yielded = 0;
4374		4415
4375	local_irq_save(flags);	4416	local_irq_save(flags);
4376	rq = this_rq();	4417	rq = this_rq();
4377		4418
4378	again:	4419	again:
4379	p_rq = task_rq(p);	4420	p_rq = task_rq(p);
		4421	/*
		4422	* If we're the only runnable task on the rq and target rq also
		4423	* has only one task, there's absolutely no point in yielding.
		4424	*/
		4425	if (rq->nr_running == 1 && p_rq->nr_running == 1) {
		4426	yielded = -ESRCH;
		4427	goto out_irq;
		4428	}
		4429
4380	double_rq_lock(rq, p_rq);	4430	double_rq_lock(rq, p_rq);
4381	while (task_rq(p) != p_rq) {	4431	while (task_rq(p) != p_rq) {
4382	double_rq_unlock(rq, p_rq);	4432	double_rq_unlock(rq, p_rq);
@@ -4384,13 +4434,13 @@ again:
4384	}	4434	}
4385		4435
4386	if (!curr->sched_class->yield_to_task)	4436	if (!curr->sched_class->yield_to_task)
4387	goto out;	4437	goto out_unlock;
4388		4438
4389	if (curr->sched_class != p->sched_class)	4439	if (curr->sched_class != p->sched_class)
4390	goto out;	4440	goto out_unlock;
4391		4441
4392	if (task_running(p_rq, p) \|\| p->state)	4442	if (task_running(p_rq, p) \|\| p->state)
4393	goto out;	4443	goto out_unlock;
4394		4444
4395	yielded = curr->sched_class->yield_to_task(rq, p, preempt);	4445	yielded = curr->sched_class->yield_to_task(rq, p, preempt);
4396	if (yielded) {	4446	if (yielded) {
@@ -4403,11 +4453,12 @@ again:
4403	resched_task(p_rq->curr);	4453	resched_task(p_rq->curr);
4404	}	4454	}
4405		4455
4406	out:	4456	out_unlock:
4407	double_rq_unlock(rq, p_rq);	4457	double_rq_unlock(rq, p_rq);
		4458	out_irq:
4408	local_irq_restore(flags);	4459	local_irq_restore(flags);
4409		4460
4410	if (yielded)	4461	if (yielded > 0)
4411	schedule();	4462	schedule();
4412		4463
4413	return yielded;	4464	return yielded;
@@ -4666,6 +4717,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
4666	*/	4717	*/
4667	idle->sched_class = &idle_sched_class;	4718	idle->sched_class = &idle_sched_class;
4668	ftrace_graph_init_idle_task(idle, cpu);	4719	ftrace_graph_init_idle_task(idle, cpu);
		4720	vtime_init_idle(idle);
4669	#if defined(CONFIG_SMP)	4721	#if defined(CONFIG_SMP)
4670	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);	4722	sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu);
4671	#endif	4723	#endif
@@ -7159,7 +7211,6 @@ static void free_sched_group(struct task_group *tg)
7159	struct task_group sched_create_group(struct task_group parent)	7211	struct task_group sched_create_group(struct task_group parent)
7160	{	7212	{
7161	struct task_group *tg;	7213	struct task_group *tg;
7162	unsigned long flags;
7163		7214
7164	tg = kzalloc(sizeof(*tg), GFP_KERNEL);	7215	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
7165	if (!tg)	7216	if (!tg)
@@ -7171,6 +7222,17 @@ struct task_group sched_create_group(struct task_group parent)
7171	if (!alloc_rt_sched_group(tg, parent))	7222	if (!alloc_rt_sched_group(tg, parent))
7172	goto err;	7223	goto err;
7173		7224
		7225	return tg;
		7226
		7227	err:
		7228	free_sched_group(tg);
		7229	return ERR_PTR(-ENOMEM);
		7230	}
		7231
		7232	void sched_online_group(struct task_group tg, struct task_group parent)
		7233	{
		7234	unsigned long flags;
		7235
7174	spin_lock_irqsave(&task_group_lock, flags);	7236	spin_lock_irqsave(&task_group_lock, flags);
7175	list_add_rcu(&tg->list, &task_groups);	7237	list_add_rcu(&tg->list, &task_groups);
7176		7238
@@ -7180,12 +7242,6 @@ struct task_group sched_create_group(struct task_group parent)
7180	INIT_LIST_HEAD(&tg->children);	7242	INIT_LIST_HEAD(&tg->children);
7181	list_add_rcu(&tg->siblings, &parent->children);	7243	list_add_rcu(&tg->siblings, &parent->children);
7182	spin_unlock_irqrestore(&task_group_lock, flags);	7244	spin_unlock_irqrestore(&task_group_lock, flags);
7183
7184	return tg;
7185
7186	err:
7187	free_sched_group(tg);
7188	return ERR_PTR(-ENOMEM);
7189	}	7245	}
7190		7246
7191	/* rcu callback to free various structures associated with a task group */	7247	/* rcu callback to free various structures associated with a task group */
@@ -7198,6 +7254,12 @@ static void free_sched_group_rcu(struct rcu_head *rhp)
7198	/* Destroy runqueue etc associated with a task group */	7254	/* Destroy runqueue etc associated with a task group */
7199	void sched_destroy_group(struct task_group *tg)	7255	void sched_destroy_group(struct task_group *tg)
7200	{	7256	{
		7257	/* wait for possible concurrent references to cfs_rqs complete */
		7258	call_rcu(&tg->rcu, free_sched_group_rcu);
		7259	}
		7260
		7261	void sched_offline_group(struct task_group *tg)
		7262	{
7201	unsigned long flags;	7263	unsigned long flags;
7202	int i;	7264	int i;
7203		7265
@@ -7209,9 +7271,6 @@ void sched_destroy_group(struct task_group *tg)
7209	list_del_rcu(&tg->list);	7271	list_del_rcu(&tg->list);
7210	list_del_rcu(&tg->siblings);	7272	list_del_rcu(&tg->siblings);
7211	spin_unlock_irqrestore(&task_group_lock, flags);	7273	spin_unlock_irqrestore(&task_group_lock, flags);
7212
7213	/* wait for possible concurrent references to cfs_rqs complete */
7214	call_rcu(&tg->rcu, free_sched_group_rcu);
7215	}	7274	}
7216		7275
7217	/* change task's runqueue when it moves between groups.	7276	/* change task's runqueue when it moves between groups.
@@ -7507,6 +7566,25 @@ static int sched_rt_global_constraints(void)
7507	}	7566	}
7508	#endif /* CONFIG_RT_GROUP_SCHED */	7567	#endif /* CONFIG_RT_GROUP_SCHED */
7509		7568
		7569	int sched_rr_handler(struct ctl_table *table, int write,
		7570	void __user buffer, size_t lenp,
		7571	loff_t *ppos)
		7572	{
		7573	int ret;
		7574	static DEFINE_MUTEX(mutex);
		7575
		7576	mutex_lock(&mutex);
		7577	ret = proc_dointvec(table, write, buffer, lenp, ppos);
		7578	/* make sure that internally we keep jiffies */
		7579	/* also, writing zero resets timeslice to default */
		7580	if (!ret && write) {
		7581	sched_rr_timeslice = sched_rr_timeslice <= 0 ?
		7582	RR_TIMESLICE : msecs_to_jiffies(sched_rr_timeslice);
		7583	}
		7584	mutex_unlock(&mutex);
		7585	return ret;
		7586	}
		7587
7510	int sched_rt_handler(struct ctl_table *table, int write,	7588	int sched_rt_handler(struct ctl_table *table, int write,
7511	void __user buffer, size_t lenp,	7589	void __user buffer, size_t lenp,
7512	loff_t *ppos)	7590	loff_t *ppos)
@@ -7563,6 +7641,19 @@ static struct cgroup_subsys_state cpu_cgroup_css_alloc(struct cgroup cgrp)
7563	return &tg->css;	7641	return &tg->css;
7564	}	7642	}
7565		7643
		7644	static int cpu_cgroup_css_online(struct cgroup *cgrp)
		7645	{
		7646	struct task_group *tg = cgroup_tg(cgrp);
		7647	struct task_group *parent;
		7648
		7649	if (!cgrp->parent)
		7650	return 0;
		7651
		7652	parent = cgroup_tg(cgrp->parent);
		7653	sched_online_group(tg, parent);
		7654	return 0;
		7655	}
		7656
7566	static void cpu_cgroup_css_free(struct cgroup *cgrp)	7657	static void cpu_cgroup_css_free(struct cgroup *cgrp)
7567	{	7658	{
7568	struct task_group *tg = cgroup_tg(cgrp);	7659	struct task_group *tg = cgroup_tg(cgrp);
@@ -7570,6 +7661,13 @@ static void cpu_cgroup_css_free(struct cgroup *cgrp)
7570	sched_destroy_group(tg);	7661	sched_destroy_group(tg);
7571	}	7662	}
7572		7663
		7664	static void cpu_cgroup_css_offline(struct cgroup *cgrp)
		7665	{
		7666	struct task_group *tg = cgroup_tg(cgrp);
		7667
		7668	sched_offline_group(tg);
		7669	}
		7670
7573	static int cpu_cgroup_can_attach(struct cgroup *cgrp,	7671	static int cpu_cgroup_can_attach(struct cgroup *cgrp,
7574	struct cgroup_taskset *tset)	7672	struct cgroup_taskset *tset)
7575	{	7673	{
@@ -7925,6 +8023,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7925	.name = "cpu",	8023	.name = "cpu",
7926	.css_alloc = cpu_cgroup_css_alloc,	8024	.css_alloc = cpu_cgroup_css_alloc,
7927	.css_free = cpu_cgroup_css_free,	8025	.css_free = cpu_cgroup_css_free,
		8026	.css_online = cpu_cgroup_css_online,
		8027	.css_offline = cpu_cgroup_css_offline,
7928	.can_attach = cpu_cgroup_can_attach,	8028	.can_attach = cpu_cgroup_can_attach,
7929	.attach = cpu_cgroup_attach,	8029	.attach = cpu_cgroup_attach,
7930	.exit = cpu_cgroup_exit,	8030	.exit = cpu_cgroup_exit,