aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/sched.h12
-rw-r--r--init/Kconfig11
-rw-r--r--kernel/sched.c172
-rw-r--r--kernel/sched_fair.c5
4 files changed, 83 insertions, 117 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 66169005f008..03c13b663e4b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu);
136 136
137struct seq_file; 137struct seq_file;
138struct cfs_rq; 138struct cfs_rq;
139struct task_grp;
139#ifdef CONFIG_SCHED_DEBUG 140#ifdef CONFIG_SCHED_DEBUG
140extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); 141extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
141extern void proc_sched_set_task(struct task_struct *p); 142extern void proc_sched_set_task(struct task_struct *p);
@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings;
1834 1835
1835extern void normalize_rt_tasks(void); 1836extern void normalize_rt_tasks(void);
1836 1837
1838#ifdef CONFIG_FAIR_GROUP_SCHED
1839
1840extern struct task_grp init_task_grp;
1841
1842extern struct task_grp *sched_create_group(void);
1843extern void sched_destroy_group(struct task_grp *tg);
1844extern void sched_move_task(struct task_struct *tsk);
1845extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares);
1846
1847#endif
1848
1837#ifdef CONFIG_TASK_XACCT 1849#ifdef CONFIG_TASK_XACCT
1838static inline void add_rchar(struct task_struct *tsk, ssize_t amt) 1850static inline void add_rchar(struct task_struct *tsk, ssize_t amt)
1839{ 1851{
diff --git a/init/Kconfig b/init/Kconfig
index 11c6762a6529..ef90a154dd90 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -282,13 +282,12 @@ config CPUSETS
282 Say N if unsure. 282 Say N if unsure.
283 283
284config FAIR_GROUP_SCHED 284config FAIR_GROUP_SCHED
285 bool "Fair group scheduler" 285 bool "Fair group cpu scheduler"
286 depends on EXPERIMENTAL && CONTAINERS 286 default n
287 depends on EXPERIMENTAL
287 help 288 help
288 This option enables you to group tasks and control CPU resource 289 This feature lets cpu scheduler recognize task groups and control cpu
289 allocation to such groups. 290 bandwidth allocation to such task groups.
290
291 Say N if unsure.
292 291
293config SYSFS_DEPRECATED 292config SYSFS_DEPRECATED
294 bool "Create deprecated sysfs files" 293 bool "Create deprecated sysfs files"
diff --git a/kernel/sched.c b/kernel/sched.c
index ee7ac71b12f8..e10c403b1213 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,13 +173,10 @@ struct rt_prio_array {
173 173
174#ifdef CONFIG_FAIR_GROUP_SCHED 174#ifdef CONFIG_FAIR_GROUP_SCHED
175 175
176#include <linux/container.h>
177
178struct cfs_rq; 176struct cfs_rq;
179 177
180/* task group related information */ 178/* task group related information */
181struct task_grp { 179struct task_grp {
182 struct container_subsys_state css;
183 /* schedulable entities of this group on each cpu */ 180 /* schedulable entities of this group on each cpu */
184 struct sched_entity **se; 181 struct sched_entity **se;
185 /* runqueue "owned" by this group on each cpu */ 182 /* runqueue "owned" by this group on each cpu */
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
192/* Default task group's cfs_rq on each cpu */ 189/* Default task group's cfs_rq on each cpu */
193static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 190static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
194 191
195static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS]; 192static struct sched_entity *init_sched_entity_p[NR_CPUS];
196static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS]; 193static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
197 194
198/* Default task group. 195/* Default task group.
199 * Every task in system belong to this group at bootup. 196 * Every task in system belong to this group at bootup.
200 */ 197 */
201static struct task_grp init_task_grp = { 198struct task_grp init_task_grp = {
202 .se = init_sched_entity_p, 199 .se = init_sched_entity_p,
203 .cfs_rq = init_cfs_rq_p, 200 .cfs_rq = init_cfs_rq_p,
204 }; 201 };
202
203#define INIT_TASK_GRP_LOAD NICE_0_LOAD
204static int init_task_grp_load = INIT_TASK_GRP_LOAD;
205 205
206/* return group to which a task belongs */ 206/* return group to which a task belongs */
207static inline struct task_grp *task_grp(struct task_struct *p) 207static inline struct task_grp *task_grp(struct task_struct *p)
208{ 208{
209 return container_of(task_subsys_state(p, cpu_subsys_id), 209 struct task_grp *tg;
210 struct task_grp, css); 210
211 tg = &init_task_grp;
212
213 return tg;
211} 214}
212 215
213/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 216/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -250,6 +253,7 @@ struct cfs_rq {
250 */ 253 */
251 struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ 254 struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
252 struct task_grp *tg; /* group that "owns" this runqueue */ 255 struct task_grp *tg; /* group that "owns" this runqueue */
256 struct rcu_head rcu;
253#endif 257#endif
254}; 258};
255 259
@@ -6513,11 +6517,12 @@ void __init sched_init(void)
6513 init_sched_entity_p[i] = se; 6517 init_sched_entity_p[i] = se;
6514 se->cfs_rq = &rq->cfs; 6518 se->cfs_rq = &rq->cfs;
6515 se->my_q = cfs_rq; 6519 se->my_q = cfs_rq;
6516 se->load.weight = NICE_0_LOAD; 6520 se->load.weight = init_task_grp_load;
6517 se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD); 6521 se->load.inv_weight =
6522 div64_64(1ULL<<32, init_task_grp_load);
6518 se->parent = NULL; 6523 se->parent = NULL;
6519 } 6524 }
6520 init_task_grp.shares = NICE_0_LOAD; 6525 init_task_grp.shares = init_task_grp_load;
6521#endif 6526#endif
6522 6527
6523 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 6528 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
6707 6712
6708#ifdef CONFIG_FAIR_GROUP_SCHED 6713#ifdef CONFIG_FAIR_GROUP_SCHED
6709 6714
6710/* return corresponding task_grp object of a container */
6711static inline struct task_grp *container_tg(struct container *cont)
6712{
6713 return container_of(container_subsys_state(cont, cpu_subsys_id),
6714 struct task_grp, css);
6715}
6716
6717/* allocate runqueue etc for a new task group */ 6715/* allocate runqueue etc for a new task group */
6718static struct container_subsys_state * 6716struct task_grp *sched_create_group(void)
6719sched_create_group(struct container_subsys *ss, struct container *cont)
6720{ 6717{
6721 struct task_grp *tg; 6718 struct task_grp *tg;
6722 struct cfs_rq *cfs_rq; 6719 struct cfs_rq *cfs_rq;
6723 struct sched_entity *se; 6720 struct sched_entity *se;
6721 struct rq *rq;
6724 int i; 6722 int i;
6725 6723
6726 if (!cont->parent) {
6727 /* This is early initialization for the top container */
6728 init_task_grp.css.container = cont;
6729 return &init_task_grp.css;
6730 }
6731
6732 /* we support only 1-level deep hierarchical scheduler atm */
6733 if (cont->parent->parent)
6734 return ERR_PTR(-EINVAL);
6735
6736 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 6724 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
6737 if (!tg) 6725 if (!tg)
6738 return ERR_PTR(-ENOMEM); 6726 return ERR_PTR(-ENOMEM);
6739 6727
6740 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL); 6728 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
6741 if (!tg->cfs_rq) 6729 if (!tg->cfs_rq)
6742 goto err; 6730 goto err;
6743 tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL); 6731 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
6744 if (!tg->se) 6732 if (!tg->se)
6745 goto err; 6733 goto err;
6746 6734
6747 for_each_possible_cpu(i) { 6735 for_each_possible_cpu(i) {
6748 struct rq *rq = cpu_rq(i); 6736 rq = cpu_rq(i);
6749 6737
6750 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, 6738 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
6751 cpu_to_node(i)); 6739 cpu_to_node(i));
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
6763 tg->cfs_rq[i] = cfs_rq; 6751 tg->cfs_rq[i] = cfs_rq;
6764 init_cfs_rq(cfs_rq, rq); 6752 init_cfs_rq(cfs_rq, rq);
6765 cfs_rq->tg = tg; 6753 cfs_rq->tg = tg;
6766 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
6767 6754
6768 tg->se[i] = se; 6755 tg->se[i] = se;
6769 se->cfs_rq = &rq->cfs; 6756 se->cfs_rq = &rq->cfs;
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
6773 se->parent = NULL; 6760 se->parent = NULL;
6774 } 6761 }
6775 6762
6776 tg->shares = NICE_0_LOAD; 6763 for_each_possible_cpu(i) {
6764 rq = cpu_rq(i);
6765 cfs_rq = tg->cfs_rq[i];
6766 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
6767 }
6777 6768
6778 /* Bind the container to task_grp object we just created */ 6769 tg->shares = NICE_0_LOAD;
6779 tg->css.container = cont;
6780 6770
6781 return &tg->css; 6771 return tg;
6782 6772
6783err: 6773err:
6784 for_each_possible_cpu(i) { 6774 for_each_possible_cpu(i) {
@@ -6797,24 +6787,14 @@ err:
6797 return ERR_PTR(-ENOMEM); 6787 return ERR_PTR(-ENOMEM);
6798} 6788}
6799 6789
6800 6790/* rcu callback to free various structures associated with a task group */
6801/* destroy runqueue etc associated with a task group */ 6791static void free_sched_group(struct rcu_head *rhp)
6802static void sched_destroy_group(struct container_subsys *ss,
6803 struct container *cont)
6804{ 6792{
6805 struct task_grp *tg = container_tg(cont); 6793 struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
6806 struct cfs_rq *cfs_rq; 6794 struct task_grp *tg = cfs_rq->tg;
6807 struct sched_entity *se; 6795 struct sched_entity *se;
6808 int i; 6796 int i;
6809 6797
6810 for_each_possible_cpu(i) {
6811 cfs_rq = tg->cfs_rq[i];
6812 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
6813 }
6814
6815 /* wait for possible concurrent references to cfs_rqs complete */
6816 synchronize_sched();
6817
6818 /* now it should be safe to free those cfs_rqs */ 6798 /* now it should be safe to free those cfs_rqs */
6819 for_each_possible_cpu(i) { 6799 for_each_possible_cpu(i) {
6820 cfs_rq = tg->cfs_rq[i]; 6800 cfs_rq = tg->cfs_rq[i];
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
6829 kfree(tg); 6809 kfree(tg);
6830} 6810}
6831 6811
6832static int sched_can_attach(struct container_subsys *ss, 6812/* Destroy runqueue etc associated with a task group */
6833 struct container *cont, struct task_struct *tsk) 6813void sched_destroy_group(struct task_grp *tg)
6834{ 6814{
6835 /* We don't support RT-tasks being in separate groups */ 6815 struct cfs_rq *cfs_rq;
6836 if (tsk->sched_class != &fair_sched_class) 6816 int i;
6837 return -EINVAL;
6838 6817
6839 return 0; 6818 for_each_possible_cpu(i) {
6819 cfs_rq = tg->cfs_rq[i];
6820 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
6821 }
6822
6823 cfs_rq = tg->cfs_rq[0];
6824
6825 /* wait for possible concurrent references to cfs_rqs complete */
6826 call_rcu(&cfs_rq->rcu, free_sched_group);
6840} 6827}
6841 6828
6842/* change task's runqueue when it moves between groups */ 6829/* change task's runqueue when it moves between groups.
6843static void sched_move_task(struct container_subsys *ss, struct container *cont, 6830 * The caller of this function should have put the task in its new group
6844 struct container *old_cont, struct task_struct *tsk) 6831 * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
6832 * reflect its new group.
6833 */
6834void sched_move_task(struct task_struct *tsk)
6845{ 6835{
6846 int on_rq, running; 6836 int on_rq, running;
6847 unsigned long flags; 6837 unsigned long flags;
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
6896 spin_unlock_irq(&rq->lock); 6886 spin_unlock_irq(&rq->lock);
6897} 6887}
6898 6888
6899static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype, 6889int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
6900 struct file *file, const char __user *userbuf,
6901 size_t nbytes, loff_t *ppos)
6902{ 6890{
6903 int i; 6891 int i;
6904 unsigned long shareval;
6905 struct task_grp *tg = container_tg(cont);
6906 char buffer[2*sizeof(unsigned long) + 1];
6907
6908 if (nbytes > 2*sizeof(unsigned long)) /* safety check */
6909 return -E2BIG;
6910 6892
6911 if (copy_from_user(buffer, userbuf, nbytes)) 6893 if (tg->shares == shares)
6912 return -EFAULT; 6894 return 0;
6913 6895
6914 buffer[nbytes] = 0; /* nul-terminate */ 6896 /* return -EINVAL if the new value is not sane */
6915 shareval = simple_strtoul(buffer, NULL, 10);
6916 6897
6917 tg->shares = shareval; 6898 tg->shares = shares;
6918 for_each_possible_cpu(i) 6899 for_each_possible_cpu(i)
6919 set_se_shares(tg->se[i], shareval); 6900 set_se_shares(tg->se[i], shares);
6920
6921 return nbytes;
6922}
6923
6924static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
6925{
6926 struct task_grp *tg = container_tg(cont);
6927
6928 return (u64) tg->shares;
6929}
6930 6901
6931struct cftype cpuctl_share = { 6902 return 0;
6932 .name = "shares",
6933 .read_uint = cpu_shares_read_uint,
6934 .write = cpu_shares_write,
6935};
6936
6937static int sched_populate(struct container_subsys *ss, struct container *cont)
6938{
6939 return container_add_file(cont, ss, &cpuctl_share);
6940} 6903}
6941 6904
6942struct container_subsys cpu_subsys = { 6905#endif /* CONFIG_FAIR_GROUP_SCHED */
6943 .name = "cpu",
6944 .create = sched_create_group,
6945 .destroy = sched_destroy_group,
6946 .can_attach = sched_can_attach,
6947 .attach = sched_move_task,
6948 .populate = sched_populate,
6949 .subsys_id = cpu_subsys_id,
6950 .early_init = 1,
6951};
6952
6953#endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 556942cf2606..abd65ed9f2a5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
877 if (!cfs_rq->nr_running) 877 if (!cfs_rq->nr_running)
878 return MAX_PRIO; 878 return MAX_PRIO;
879 879
880 curr = __pick_next_entity(cfs_rq); 880 curr = cfs_rq->curr;
881 if (!curr)
882 curr = __pick_next_entity(cfs_rq);
883
881 p = task_of(curr); 884 p = task_of(curr);
882 885
883 return p->prio; 886 return p->prio;