4 files changed, 83 insertions, 117 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 66169005f008..03c13b663e4b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
 struct seq_file;
 struct cfs_rq;
+struct task_grp;
 #ifdef CONFIG_SCHED_DEBUG
 extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
 extern void proc_sched_set_task(struct task_struct *p);
@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
 extern void normalize_rt_tasks(void);
+#ifdef CONFIG_FAIR_GROUP_SCHED
+extern struct task_grp init_task_grp;
+extern struct task_grp *sched_create_group(void);
+extern void sched_destroy_group(struct task_grp *tg);
+extern void sched_move_task(struct task_struct *tsk);
+extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
+#endif
 #ifdef CONFIG_TASK_XACCT
 static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
 {
diff --git a/init/Kconfig b/init/Kconfig
index 11c6762a6529..ef90a154dd90 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -282,13 +282,12 @@ config CPUSETS
          Say N if unsure.
 config FAIR_GROUP_SCHED
-        bool "Fair group scheduler"
+        bool "Fair group cpu scheduler"
-        depends on EXPERIMENTAL && CONTAINERS
+        default n
+        depends on EXPERIMENTAL
        help
-          This option enables you to group tasks and control CPU resource
+          This feature lets cpu scheduler recognize task groups and control cpu
-          allocation to such groups.
+          bandwidth allocation to such task groups.
-          Say N if unsure.
 config SYSFS_DEPRECATED
        bool "Create deprecated sysfs files"
diff --git a/kernel/sched.c b/kernel/sched.c
index ee7ac71b12f8..e10c403b1213 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,13 +173,10 @@ struct rt_prio_array {
 #ifdef CONFIG_FAIR_GROUP_SCHED
-#include <linux/container.h>
 struct cfs_rq;
 /* task group related information */
 struct task_grp {
-        struct container_subsys_state css;
        /* schedulable entities of this group on each cpu */
        struct sched_entity **se;
        /* runqueue "owned" by this group on each cpu */
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
 /* Default task group's cfs_rq on each cpu */
 static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
-static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS];
+static struct sched_entity *init_sched_entity_p[NR_CPUS];
-static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS];
+static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
 /* Default task group.
 *      Every task in system belong to this group at bootup.
 */
-static struct task_grp init_task_grp =  {
+struct task_grp init_task_grp =  {
-                                        .se     = init_sched_entity_p,
+                                .se     = init_sched_entity_p,
-                                        .cfs_rq = init_cfs_rq_p,
+                                .cfs_rq = init_cfs_rq_p,
-                                        };
+                                 };
+#define INIT_TASK_GRP_LOAD      NICE_0_LOAD
+static int init_task_grp_load = INIT_TASK_GRP_LOAD;
 /* return group to which a task belongs */
 static inline struct task_grp *task_grp(struct task_struct *p)
 {
-        return container_of(task_subsys_state(p, cpu_subsys_id),
+        struct task_grp *tg;
-                                struct task_grp, css);
+        tg  = &init_task_grp;
+        return tg;
 }
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -250,6 +253,7 @@ struct cfs_rq {
         */
        struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
        struct task_grp *tg;    /* group that "owns" this runqueue */
+        struct rcu_head rcu;
 #endif
 };
@@ -6513,11 +6517,12 @@ void __init sched_init(void)
                        init_sched_entity_p[i] = se;
                        se->cfs_rq = &rq->cfs;
                        se->my_q = cfs_rq;
-                        se->load.weight = NICE_0_LOAD;
+                        se->load.weight = init_task_grp_load;
-                        se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);
+                        se->load.inv_weight =
+                                 div64_64(1ULL<<32, init_task_grp_load);
                        se->parent = NULL;
                }
-                init_task_grp.shares = NICE_0_LOAD;
+                init_task_grp.shares = init_task_grp_load;
 #endif
                for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
 #ifdef CONFIG_FAIR_GROUP_SCHED
-/* return corresponding task_grp object of a container */
-static inline struct task_grp *container_tg(struct container *cont)
-{
-        return container_of(container_subsys_state(cont, cpu_subsys_id),
-                                         struct task_grp, css);
-}
 /* allocate runqueue etc for a new task group */
-static struct container_subsys_state *
+struct task_grp *sched_create_group(void)
-sched_create_group(struct container_subsys *ss, struct container *cont)
 {
        struct task_grp *tg;
        struct cfs_rq *cfs_rq;
        struct sched_entity *se;
+        struct rq *rq;
        int i;
-        if (!cont->parent) {
-                /* This is early initialization for the top container */
-                init_task_grp.css.container = cont;
-                return &init_task_grp.css;
-        }
-        /* we support only 1-level deep hierarchical scheduler atm */
-        if (cont->parent->parent)
-                return ERR_PTR(-EINVAL);
        tg = kzalloc(sizeof(*tg), GFP_KERNEL);
        if (!tg)
                return ERR_PTR(-ENOMEM);
-        tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL);
+        tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
        if (!tg->cfs_rq)
                goto err;
-        tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL);
+        tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
        if (!tg->se)
                goto err;
        for_each_possible_cpu(i) {
-                struct rq *rq = cpu_rq(i);
+                rq = cpu_rq(i);
                cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
                                                         cpu_to_node(i));
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
                tg->cfs_rq[i] = cfs_rq;
                init_cfs_rq(cfs_rq, rq);
                cfs_rq->tg = tg;
-                list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
                tg->se[i] = se;
                se->cfs_rq = &rq->cfs;
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
                se->parent = NULL;
        }
-        tg->shares = NICE_0_LOAD;
+        for_each_possible_cpu(i) {
+                rq = cpu_rq(i);
+                cfs_rq = tg->cfs_rq[i];
+                list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
+        }
-        /* Bind the container to task_grp object we just created */
+        tg->shares = NICE_0_LOAD;
-        tg->css.container = cont;
-        return &tg->css;
+        return tg;
 err:
        for_each_possible_cpu(i) {
@@ -6797,24 +6787,14 @@ err:
        return ERR_PTR(-ENOMEM);
 }
+/* rcu callback to free various structures associated with a task group */
-/* destroy runqueue etc associated with a task group */
+static void free_sched_group(struct rcu_head *rhp)
-static void sched_destroy_group(struct container_subsys *ss,
-                                        struct container *cont)
 {
-        struct task_grp *tg = container_tg(cont);
+        struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
-        struct cfs_rq *cfs_rq;
+        struct task_grp *tg = cfs_rq->tg;
        struct sched_entity *se;
        int i;
-        for_each_possible_cpu(i) {
-                cfs_rq = tg->cfs_rq[i];
-                list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-        }
-        /* wait for possible concurrent references to cfs_rqs complete */
-        synchronize_sched();
        /* now it should be safe to free those cfs_rqs */
        for_each_possible_cpu(i) {
                cfs_rq = tg->cfs_rq[i];
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
        kfree(tg);
 }
-static int sched_can_attach(struct container_subsys *ss,
+/* Destroy runqueue etc associated with a task group */
-                             struct container *cont, struct task_struct *tsk)
+void sched_destroy_group(struct task_grp *tg)
 {
-        /* We don't support RT-tasks being in separate groups */
+        struct cfs_rq *cfs_rq;
-        if (tsk->sched_class != &fair_sched_class)
+        int i;
-                return -EINVAL;
-        return 0;
+        for_each_possible_cpu(i) {
+                cfs_rq = tg->cfs_rq[i];
+                list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
+        }
+        cfs_rq = tg->cfs_rq[0];
+        /* wait for possible concurrent references to cfs_rqs complete */
+        call_rcu(&cfs_rq->rcu, free_sched_group);
 }
-/* change task's runqueue when it moves between groups */
+/* change task's runqueue when it moves between groups.
-static void sched_move_task(struct container_subsys *ss, struct container *cont,
+ *      The caller of this function should have put the task in its new group
-                        struct container *old_cont, struct task_struct *tsk)
+ *      by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
+ *      reflect its new group.
+ */
+void sched_move_task(struct task_struct *tsk)
 {
        int on_rq, running;
        unsigned long flags;
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
        spin_unlock_irq(&rq->lock);
 }
-static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype,
+int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
-                                struct file *file, const char __user *userbuf,
-                                size_t nbytes, loff_t *ppos)
 {
        int i;
-        unsigned long shareval;
-        struct task_grp *tg = container_tg(cont);
-        char buffer[2*sizeof(unsigned long) + 1];
-        if (nbytes > 2*sizeof(unsigned long))   /* safety check */
-                return -E2BIG;
-        if (copy_from_user(buffer, userbuf, nbytes))
+        if (tg->shares == shares)
-                return -EFAULT;
+                return 0;
-        buffer[nbytes] = 0;     /* nul-terminate */
+        /* return -EINVAL if the new value is not sane */
-        shareval = simple_strtoul(buffer, NULL, 10);
-        tg->shares = shareval;
+        tg->shares = shares;
        for_each_possible_cpu(i)
-                set_se_shares(tg->se[i], shareval);
+                set_se_shares(tg->se[i], shares);
-        return nbytes;
-}
-static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
-{
-        struct task_grp *tg = container_tg(cont);
-        return (u64) tg->shares;
-}
-struct cftype cpuctl_share = {
+        return 0;
-        .name = "shares",
-        .read_uint = cpu_shares_read_uint,
-        .write = cpu_shares_write,
-};
-static int sched_populate(struct container_subsys *ss, struct container *cont)
-{
-        return container_add_file(cont, ss, &cpuctl_share);
 }
-struct container_subsys cpu_subsys = {
+#endif  /* CONFIG_FAIR_GROUP_SCHED */
-        .name = "cpu",
-        .create = sched_create_group,
-        .destroy  = sched_destroy_group,
-        .can_attach = sched_can_attach,
-        .attach = sched_move_task,
-        .populate = sched_populate,
-        .subsys_id = cpu_subsys_id,
-        .early_init = 1,
-};
-#endif  /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 556942cf2606..abd65ed9f2a5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
        if (!cfs_rq->nr_running)
                return MAX_PRIO;
-        curr = __pick_next_entity(cfs_rq);
+        curr = cfs_rq->curr;
+        if (!curr)
+                curr = __pick_next_entity(cfs_rq);
        p = task_of(curr);
        return p->prio;

diff --git a/include/linux/sched.h b/include/linux/sched.h index 66169005f008..03c13b663e4b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
136		136
137	struct seq_file;	137	struct seq_file;
138	struct cfs_rq;	138	struct cfs_rq;
		139	struct task_grp;
139	#ifdef CONFIG_SCHED_DEBUG	140	#ifdef CONFIG_SCHED_DEBUG
140	extern void proc_sched_show_task(struct task_struct p, struct seq_file m);	141	extern void proc_sched_show_task(struct task_struct p, struct seq_file m);
141	extern void proc_sched_set_task(struct task_struct *p);	142	extern void proc_sched_set_task(struct task_struct *p);
@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
1834		1835
1835	extern void normalize_rt_tasks(void);	1836	extern void normalize_rt_tasks(void);
1836		1837
		1838	#ifdef CONFIG_FAIR_GROUP_SCHED
		1839
		1840	extern struct task_grp init_task_grp;
		1841
		1842	extern struct task_grp *sched_create_group(void);
		1843	extern void sched_destroy_group(struct task_grp *tg);
		1844	extern void sched_move_task(struct task_struct *tsk);
		1845	extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
		1846
		1847	#endif
		1848
1837	#ifdef CONFIG_TASK_XACCT	1849	#ifdef CONFIG_TASK_XACCT
1838	static inline void add_rchar(struct task_struct *tsk, ssize_t amt)	1850	static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
1839	{	1851	{


diff --git a/init/Kconfig b/init/Kconfig index 11c6762a6529..ef90a154dd90 100644 --- a/init/Kconfig +++ b/init/Kconfig
@@ -282,13 +282,12 @@ config CPUSETS
282	Say N if unsure.	282	Say N if unsure.
283		283
284	config FAIR_GROUP_SCHED	284	config FAIR_GROUP_SCHED
285	bool "Fair group scheduler"	285	bool "Fair group cpu scheduler"
286	depends on EXPERIMENTAL && CONTAINERS	286	default n
		287	depends on EXPERIMENTAL
287	help	288	help
288	This option enables you to group tasks and control CPU resource	289	This feature lets cpu scheduler recognize task groups and control cpu
289	allocation to such groups.	290	bandwidth allocation to such task groups.
290
291	Say N if unsure.
292		291
293	config SYSFS_DEPRECATED	292	config SYSFS_DEPRECATED
294	bool "Create deprecated sysfs files"	293	bool "Create deprecated sysfs files"


diff --git a/kernel/sched.c b/kernel/sched.c index ee7ac71b12f8..e10c403b1213 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -173,13 +173,10 @@ struct rt_prio_array {
173		173
174	#ifdef CONFIG_FAIR_GROUP_SCHED	174	#ifdef CONFIG_FAIR_GROUP_SCHED
175		175
176	#include <linux/container.h>
177
178	struct cfs_rq;	176	struct cfs_rq;
179		177
180	/* task group related information */	178	/* task group related information */
181	struct task_grp {	179	struct task_grp {
182	struct container_subsys_state css;
183	/* schedulable entities of this group on each cpu */	180	/* schedulable entities of this group on each cpu */
184	struct sched_entity **se;	181	struct sched_entity **se;
185	/* runqueue "owned" by this group on each cpu */	182	/* runqueue "owned" by this group on each cpu */
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
192	/* Default task group's cfs_rq on each cpu */	189	/* Default task group's cfs_rq on each cpu */
193	static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;	190	static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
194		191
195	static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS];	192	static struct sched_entity *init_sched_entity_p[NR_CPUS];
196	static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS];	193	static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
197		194
198	/* Default task group.	195	/* Default task group.
199	* Every task in system belong to this group at bootup.	196	* Every task in system belong to this group at bootup.
200	*/	197	*/
201	static struct task_grp init_task_grp = {	198	struct task_grp init_task_grp = {
202	.se = init_sched_entity_p,	199	.se = init_sched_entity_p,
203	.cfs_rq = init_cfs_rq_p,	200	.cfs_rq = init_cfs_rq_p,
204	};	201	};
		202
		203	#define INIT_TASK_GRP_LOAD NICE_0_LOAD
		204	static int init_task_grp_load = INIT_TASK_GRP_LOAD;
205		205
206	/* return group to which a task belongs */	206	/* return group to which a task belongs */
207	static inline struct task_grp task_grp(struct task_struct p)	207	static inline struct task_grp task_grp(struct task_struct p)
208	{	208	{
209	return container_of(task_subsys_state(p, cpu_subsys_id),	209	struct task_grp *tg;
210	struct task_grp, css);	210
		211	tg = &init_task_grp;
		212
		213	return tg;
211	}	214	}
212		215
213	/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */	216	/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -250,6 +253,7 @@ struct cfs_rq {
250	*/	253	*/
251	struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */	254	struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
252	struct task_grp tg; / group that "owns" this runqueue */	255	struct task_grp tg; / group that "owns" this runqueue */
		256	struct rcu_head rcu;
253	#endif	257	#endif
254	};	258	};
255		259
@@ -6513,11 +6517,12 @@ void __init sched_init(void)
6513	init_sched_entity_p[i] = se;	6517	init_sched_entity_p[i] = se;
6514	se->cfs_rq = &rq->cfs;	6518	se->cfs_rq = &rq->cfs;
6515	se->my_q = cfs_rq;	6519	se->my_q = cfs_rq;
6516	se->load.weight = NICE_0_LOAD;	6520	se->load.weight = init_task_grp_load;
6517	se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD);	6521	se->load.inv_weight =
		6522	div64_64(1ULL<<32, init_task_grp_load);
6518	se->parent = NULL;	6523	se->parent = NULL;
6519	}	6524	}
6520	init_task_grp.shares = NICE_0_LOAD;	6525	init_task_grp.shares = init_task_grp_load;
6521	#endif	6526	#endif
6522		6527
6523	for (j = 0; j < CPU_LOAD_IDX_MAX; j++)	6528	for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
6707		6712
6708	#ifdef CONFIG_FAIR_GROUP_SCHED	6713	#ifdef CONFIG_FAIR_GROUP_SCHED
6709		6714
6710	/* return corresponding task_grp object of a container */
6711	static inline struct task_grp container_tg(struct container cont)
6712	{
6713	return container_of(container_subsys_state(cont, cpu_subsys_id),
6714	struct task_grp, css);
6715	}
6716
6717	/* allocate runqueue etc for a new task group */	6715	/* allocate runqueue etc for a new task group */
6718	static struct container_subsys_state *	6716	struct task_grp *sched_create_group(void)
6719	sched_create_group(struct container_subsys ss, struct container cont)
6720	{	6717	{
6721	struct task_grp *tg;	6718	struct task_grp *tg;
6722	struct cfs_rq *cfs_rq;	6719	struct cfs_rq *cfs_rq;
6723	struct sched_entity *se;	6720	struct sched_entity *se;
		6721	struct rq *rq;
6724	int i;	6722	int i;
6725		6723
6726	if (!cont->parent) {
6727	/* This is early initialization for the top container */
6728	init_task_grp.css.container = cont;
6729	return &init_task_grp.css;
6730	}
6731
6732	/* we support only 1-level deep hierarchical scheduler atm */
6733	if (cont->parent->parent)
6734	return ERR_PTR(-EINVAL);
6735
6736	tg = kzalloc(sizeof(*tg), GFP_KERNEL);	6724	tg = kzalloc(sizeof(*tg), GFP_KERNEL);
6737	if (!tg)	6725	if (!tg)
6738	return ERR_PTR(-ENOMEM);	6726	return ERR_PTR(-ENOMEM);
6739		6727
6740	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL);	6728	tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
6741	if (!tg->cfs_rq)	6729	if (!tg->cfs_rq)
6742	goto err;	6730	goto err;
6743	tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL);	6731	tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
6744	if (!tg->se)	6732	if (!tg->se)
6745	goto err;	6733	goto err;
6746		6734
6747	for_each_possible_cpu(i) {	6735	for_each_possible_cpu(i) {
6748	struct rq *rq = cpu_rq(i);	6736	rq = cpu_rq(i);
6749		6737
6750	cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,	6738	cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
6751	cpu_to_node(i));	6739	cpu_to_node(i));
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys ss, struct container cont)
6763	tg->cfs_rq[i] = cfs_rq;	6751	tg->cfs_rq[i] = cfs_rq;
6764	init_cfs_rq(cfs_rq, rq);	6752	init_cfs_rq(cfs_rq, rq);
6765	cfs_rq->tg = tg;	6753	cfs_rq->tg = tg;
6766	list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
6767		6754
6768	tg->se[i] = se;	6755	tg->se[i] = se;
6769	se->cfs_rq = &rq->cfs;	6756	se->cfs_rq = &rq->cfs;
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys ss, struct container cont)
6773	se->parent = NULL;	6760	se->parent = NULL;
6774	}	6761	}
6775		6762
6776	tg->shares = NICE_0_LOAD;	6763	for_each_possible_cpu(i) {
		6764	rq = cpu_rq(i);
		6765	cfs_rq = tg->cfs_rq[i];
		6766	list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
		6767	}
6777		6768
6778	/* Bind the container to task_grp object we just created */	6769	tg->shares = NICE_0_LOAD;
6779	tg->css.container = cont;
6780		6770
6781	return &tg->css;	6771	return tg;
6782		6772
6783	err:	6773	err:
6784	for_each_possible_cpu(i) {	6774	for_each_possible_cpu(i) {
@@ -6797,24 +6787,14 @@ err:
6797	return ERR_PTR(-ENOMEM);	6787	return ERR_PTR(-ENOMEM);
6798	}	6788	}
6799		6789
6800		6790	/* rcu callback to free various structures associated with a task group */
6801	/* destroy runqueue etc associated with a task group */	6791	static void free_sched_group(struct rcu_head *rhp)
6802	static void sched_destroy_group(struct container_subsys *ss,
6803	struct container *cont)
6804	{	6792	{
6805	struct task_grp *tg = container_tg(cont);	6793	struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
6806	struct cfs_rq *cfs_rq;	6794	struct task_grp *tg = cfs_rq->tg;
6807	struct sched_entity *se;	6795	struct sched_entity *se;
6808	int i;	6796	int i;
6809		6797
6810	for_each_possible_cpu(i) {
6811	cfs_rq = tg->cfs_rq[i];
6812	list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
6813	}
6814
6815	/* wait for possible concurrent references to cfs_rqs complete */
6816	synchronize_sched();
6817
6818	/* now it should be safe to free those cfs_rqs */	6798	/* now it should be safe to free those cfs_rqs */
6819	for_each_possible_cpu(i) {	6799	for_each_possible_cpu(i) {
6820	cfs_rq = tg->cfs_rq[i];	6800	cfs_rq = tg->cfs_rq[i];
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
6829	kfree(tg);	6809	kfree(tg);
6830	}	6810	}
6831		6811
6832	static int sched_can_attach(struct container_subsys *ss,	6812	/* Destroy runqueue etc associated with a task group */
6833	struct container cont, struct task_struct tsk)	6813	void sched_destroy_group(struct task_grp *tg)
6834	{	6814	{
6835	/* We don't support RT-tasks being in separate groups */	6815	struct cfs_rq *cfs_rq;
6836	if (tsk->sched_class != &fair_sched_class)	6816	int i;
6837	return -EINVAL;
6838		6817
6839	return 0;	6818	for_each_possible_cpu(i) {
		6819	cfs_rq = tg->cfs_rq[i];
		6820	list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
		6821	}
		6822
		6823	cfs_rq = tg->cfs_rq[0];
		6824
		6825	/* wait for possible concurrent references to cfs_rqs complete */
		6826	call_rcu(&cfs_rq->rcu, free_sched_group);
6840	}	6827	}
6841		6828
6842	/* change task's runqueue when it moves between groups */	6829	/* change task's runqueue when it moves between groups.
6843	static void sched_move_task(struct container_subsys ss, struct container cont,	6830	* The caller of this function should have put the task in its new group
6844	struct container old_cont, struct task_struct tsk)	6831	* by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
		6832	* reflect its new group.
		6833	*/
		6834	void sched_move_task(struct task_struct *tsk)
6845	{	6835	{
6846	int on_rq, running;	6836	int on_rq, running;
6847	unsigned long flags;	6837	unsigned long flags;
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
6896	spin_unlock_irq(&rq->lock);	6886	spin_unlock_irq(&rq->lock);
6897	}	6887	}
6898		6888
6899	static ssize_t cpu_shares_write(struct container cont, struct cftype cftype,	6889	int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
6900	struct file file, const char __user userbuf,
6901	size_t nbytes, loff_t *ppos)
6902	{	6890	{
6903	int i;	6891	int i;
6904	unsigned long shareval;
6905	struct task_grp *tg = container_tg(cont);
6906	char buffer[2*sizeof(unsigned long) + 1];
6907
6908	if (nbytes > 2sizeof(unsigned long)) / safety check */
6909	return -E2BIG;
6910		6892
6911	if (copy_from_user(buffer, userbuf, nbytes))	6893	if (tg->shares == shares)
6912	return -EFAULT;	6894	return 0;
6913		6895
6914	buffer[nbytes] = 0; /* nul-terminate */	6896	/* return -EINVAL if the new value is not sane */
6915	shareval = simple_strtoul(buffer, NULL, 10);
6916		6897
6917	tg->shares = shareval;	6898	tg->shares = shares;
6918	for_each_possible_cpu(i)	6899	for_each_possible_cpu(i)
6919	set_se_shares(tg->se[i], shareval);	6900	set_se_shares(tg->se[i], shares);
6920
6921	return nbytes;
6922	}
6923
6924	static u64 cpu_shares_read_uint(struct container cont, struct cftype cft)
6925	{
6926	struct task_grp *tg = container_tg(cont);
6927
6928	return (u64) tg->shares;
6929	}
6930		6901
6931	struct cftype cpuctl_share = {	6902	return 0;
6932	.name = "shares",
6933	.read_uint = cpu_shares_read_uint,
6934	.write = cpu_shares_write,
6935	};
6936
6937	static int sched_populate(struct container_subsys ss, struct container cont)
6938	{
6939	return container_add_file(cont, ss, &cpuctl_share);
6940	}	6903	}
6941		6904
6942	struct container_subsys cpu_subsys = {	6905	#endif /* CONFIG_FAIR_GROUP_SCHED */
6943	.name = "cpu",
6944	.create = sched_create_group,
6945	.destroy = sched_destroy_group,
6946	.can_attach = sched_can_attach,
6947	.attach = sched_move_task,
6948	.populate = sched_populate,
6949	.subsys_id = cpu_subsys_id,
6950	.early_init = 1,
6951	};
6952
6953	#endif /* CONFIG_FAIR_GROUP_SCHED */


diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 556942cf2606..abd65ed9f2a5 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
877	if (!cfs_rq->nr_running)	877	if (!cfs_rq->nr_running)
878	return MAX_PRIO;	878	return MAX_PRIO;
879		879
880	curr = __pick_next_entity(cfs_rq);	880	curr = cfs_rq->curr;
		881	if (!curr)
		882	curr = __pick_next_entity(cfs_rq);
		883
881	p = task_of(curr);	884	p = task_of(curr);
882		885
883	return p->prio;	886	return p->prio;