aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSrivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>2007-10-15 11:00:09 -0400
committerIngo Molnar <mingo@elte.hu>2007-10-15 11:00:09 -0400
commit9b5b77512dce239fa168183fa71896712232e95a (patch)
tree2a95cf9151b4b6bd745236858f5b73b639dbc0d9 /kernel
parent75c28ace9f2b2f403674e045939424a77c95b47c (diff)
sched: clean up code under CONFIG_FAIR_GROUP_SCHED
With the view of supporting user-id based fair scheduling (and not just container-based fair scheduling), this patch renames several functions and makes them independent of whether they are being used for container or user-id based fair scheduling. Also fix a problem reported by KAMEZAWA Hiroyuki (wrt allocating less-sized array for tg->cfs_rq[] and tf->se[]). Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com> Signed-off-by: Ingo Molnar <mingo@elte.hu> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c172
-rw-r--r--kernel/sched_fair.c5
2 files changed, 66 insertions, 111 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index ee7ac71b12f8..e10c403b1213 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -173,13 +173,10 @@ struct rt_prio_array {
173 173
174#ifdef CONFIG_FAIR_GROUP_SCHED 174#ifdef CONFIG_FAIR_GROUP_SCHED
175 175
176#include <linux/container.h>
177
178struct cfs_rq; 176struct cfs_rq;
179 177
180/* task group related information */ 178/* task group related information */
181struct task_grp { 179struct task_grp {
182 struct container_subsys_state css;
183 /* schedulable entities of this group on each cpu */ 180 /* schedulable entities of this group on each cpu */
184 struct sched_entity **se; 181 struct sched_entity **se;
185 /* runqueue "owned" by this group on each cpu */ 182 /* runqueue "owned" by this group on each cpu */
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity);
192/* Default task group's cfs_rq on each cpu */ 189/* Default task group's cfs_rq on each cpu */
193static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; 190static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp;
194 191
195static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS]; 192static struct sched_entity *init_sched_entity_p[NR_CPUS];
196static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS]; 193static struct cfs_rq *init_cfs_rq_p[NR_CPUS];
197 194
198/* Default task group. 195/* Default task group.
199 * Every task in system belong to this group at bootup. 196 * Every task in system belong to this group at bootup.
200 */ 197 */
201static struct task_grp init_task_grp = { 198struct task_grp init_task_grp = {
202 .se = init_sched_entity_p, 199 .se = init_sched_entity_p,
203 .cfs_rq = init_cfs_rq_p, 200 .cfs_rq = init_cfs_rq_p,
204 }; 201 };
202
203#define INIT_TASK_GRP_LOAD NICE_0_LOAD
204static int init_task_grp_load = INIT_TASK_GRP_LOAD;
205 205
206/* return group to which a task belongs */ 206/* return group to which a task belongs */
207static inline struct task_grp *task_grp(struct task_struct *p) 207static inline struct task_grp *task_grp(struct task_struct *p)
208{ 208{
209 return container_of(task_subsys_state(p, cpu_subsys_id), 209 struct task_grp *tg;
210 struct task_grp, css); 210
211 tg = &init_task_grp;
212
213 return tg;
211} 214}
212 215
213/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ 216/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
@@ -250,6 +253,7 @@ struct cfs_rq {
250 */ 253 */
251 struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ 254 struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */
252 struct task_grp *tg; /* group that "owns" this runqueue */ 255 struct task_grp *tg; /* group that "owns" this runqueue */
256 struct rcu_head rcu;
253#endif 257#endif
254}; 258};
255 259
@@ -6513,11 +6517,12 @@ void __init sched_init(void)
6513 init_sched_entity_p[i] = se; 6517 init_sched_entity_p[i] = se;
6514 se->cfs_rq = &rq->cfs; 6518 se->cfs_rq = &rq->cfs;
6515 se->my_q = cfs_rq; 6519 se->my_q = cfs_rq;
6516 se->load.weight = NICE_0_LOAD; 6520 se->load.weight = init_task_grp_load;
6517 se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD); 6521 se->load.inv_weight =
6522 div64_64(1ULL<<32, init_task_grp_load);
6518 se->parent = NULL; 6523 se->parent = NULL;
6519 } 6524 }
6520 init_task_grp.shares = NICE_0_LOAD; 6525 init_task_grp.shares = init_task_grp_load;
6521#endif 6526#endif
6522 6527
6523 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 6528 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p)
6707 6712
6708#ifdef CONFIG_FAIR_GROUP_SCHED 6713#ifdef CONFIG_FAIR_GROUP_SCHED
6709 6714
6710/* return corresponding task_grp object of a container */
6711static inline struct task_grp *container_tg(struct container *cont)
6712{
6713 return container_of(container_subsys_state(cont, cpu_subsys_id),
6714 struct task_grp, css);
6715}
6716
6717/* allocate runqueue etc for a new task group */ 6715/* allocate runqueue etc for a new task group */
6718static struct container_subsys_state * 6716struct task_grp *sched_create_group(void)
6719sched_create_group(struct container_subsys *ss, struct container *cont)
6720{ 6717{
6721 struct task_grp *tg; 6718 struct task_grp *tg;
6722 struct cfs_rq *cfs_rq; 6719 struct cfs_rq *cfs_rq;
6723 struct sched_entity *se; 6720 struct sched_entity *se;
6721 struct rq *rq;
6724 int i; 6722 int i;
6725 6723
6726 if (!cont->parent) {
6727 /* This is early initialization for the top container */
6728 init_task_grp.css.container = cont;
6729 return &init_task_grp.css;
6730 }
6731
6732 /* we support only 1-level deep hierarchical scheduler atm */
6733 if (cont->parent->parent)
6734 return ERR_PTR(-EINVAL);
6735
6736 tg = kzalloc(sizeof(*tg), GFP_KERNEL); 6724 tg = kzalloc(sizeof(*tg), GFP_KERNEL);
6737 if (!tg) 6725 if (!tg)
6738 return ERR_PTR(-ENOMEM); 6726 return ERR_PTR(-ENOMEM);
6739 6727
6740 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL); 6728 tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
6741 if (!tg->cfs_rq) 6729 if (!tg->cfs_rq)
6742 goto err; 6730 goto err;
6743 tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL); 6731 tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL);
6744 if (!tg->se) 6732 if (!tg->se)
6745 goto err; 6733 goto err;
6746 6734
6747 for_each_possible_cpu(i) { 6735 for_each_possible_cpu(i) {
6748 struct rq *rq = cpu_rq(i); 6736 rq = cpu_rq(i);
6749 6737
6750 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, 6738 cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL,
6751 cpu_to_node(i)); 6739 cpu_to_node(i));
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
6763 tg->cfs_rq[i] = cfs_rq; 6751 tg->cfs_rq[i] = cfs_rq;
6764 init_cfs_rq(cfs_rq, rq); 6752 init_cfs_rq(cfs_rq, rq);
6765 cfs_rq->tg = tg; 6753 cfs_rq->tg = tg;
6766 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
6767 6754
6768 tg->se[i] = se; 6755 tg->se[i] = se;
6769 se->cfs_rq = &rq->cfs; 6756 se->cfs_rq = &rq->cfs;
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont)
6773 se->parent = NULL; 6760 se->parent = NULL;
6774 } 6761 }
6775 6762
6776 tg->shares = NICE_0_LOAD; 6763 for_each_possible_cpu(i) {
6764 rq = cpu_rq(i);
6765 cfs_rq = tg->cfs_rq[i];
6766 list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
6767 }
6777 6768
6778 /* Bind the container to task_grp object we just created */ 6769 tg->shares = NICE_0_LOAD;
6779 tg->css.container = cont;
6780 6770
6781 return &tg->css; 6771 return tg;
6782 6772
6783err: 6773err:
6784 for_each_possible_cpu(i) { 6774 for_each_possible_cpu(i) {
@@ -6797,24 +6787,14 @@ err:
6797 return ERR_PTR(-ENOMEM); 6787 return ERR_PTR(-ENOMEM);
6798} 6788}
6799 6789
6800 6790/* rcu callback to free various structures associated with a task group */
6801/* destroy runqueue etc associated with a task group */ 6791static void free_sched_group(struct rcu_head *rhp)
6802static void sched_destroy_group(struct container_subsys *ss,
6803 struct container *cont)
6804{ 6792{
6805 struct task_grp *tg = container_tg(cont); 6793 struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu);
6806 struct cfs_rq *cfs_rq; 6794 struct task_grp *tg = cfs_rq->tg;
6807 struct sched_entity *se; 6795 struct sched_entity *se;
6808 int i; 6796 int i;
6809 6797
6810 for_each_possible_cpu(i) {
6811 cfs_rq = tg->cfs_rq[i];
6812 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
6813 }
6814
6815 /* wait for possible concurrent references to cfs_rqs complete */
6816 synchronize_sched();
6817
6818 /* now it should be safe to free those cfs_rqs */ 6798 /* now it should be safe to free those cfs_rqs */
6819 for_each_possible_cpu(i) { 6799 for_each_possible_cpu(i) {
6820 cfs_rq = tg->cfs_rq[i]; 6800 cfs_rq = tg->cfs_rq[i];
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss,
6829 kfree(tg); 6809 kfree(tg);
6830} 6810}
6831 6811
6832static int sched_can_attach(struct container_subsys *ss, 6812/* Destroy runqueue etc associated with a task group */
6833 struct container *cont, struct task_struct *tsk) 6813void sched_destroy_group(struct task_grp *tg)
6834{ 6814{
6835 /* We don't support RT-tasks being in separate groups */ 6815 struct cfs_rq *cfs_rq;
6836 if (tsk->sched_class != &fair_sched_class) 6816 int i;
6837 return -EINVAL;
6838 6817
6839 return 0; 6818 for_each_possible_cpu(i) {
6819 cfs_rq = tg->cfs_rq[i];
6820 list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
6821 }
6822
6823 cfs_rq = tg->cfs_rq[0];
6824
6825 /* wait for possible concurrent references to cfs_rqs complete */
6826 call_rcu(&cfs_rq->rcu, free_sched_group);
6840} 6827}
6841 6828
6842/* change task's runqueue when it moves between groups */ 6829/* change task's runqueue when it moves between groups.
6843static void sched_move_task(struct container_subsys *ss, struct container *cont, 6830 * The caller of this function should have put the task in its new group
6844 struct container *old_cont, struct task_struct *tsk) 6831 * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to
6832 * reflect its new group.
6833 */
6834void sched_move_task(struct task_struct *tsk)
6845{ 6835{
6846 int on_rq, running; 6836 int on_rq, running;
6847 unsigned long flags; 6837 unsigned long flags;
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares)
6896 spin_unlock_irq(&rq->lock); 6886 spin_unlock_irq(&rq->lock);
6897} 6887}
6898 6888
6899static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype, 6889int sched_group_set_shares(struct task_grp *tg, unsigned long shares)
6900 struct file *file, const char __user *userbuf,
6901 size_t nbytes, loff_t *ppos)
6902{ 6890{
6903 int i; 6891 int i;
6904 unsigned long shareval;
6905 struct task_grp *tg = container_tg(cont);
6906 char buffer[2*sizeof(unsigned long) + 1];
6907
6908 if (nbytes > 2*sizeof(unsigned long)) /* safety check */
6909 return -E2BIG;
6910 6892
6911 if (copy_from_user(buffer, userbuf, nbytes)) 6893 if (tg->shares == shares)
6912 return -EFAULT; 6894 return 0;
6913 6895
6914 buffer[nbytes] = 0; /* nul-terminate */ 6896 /* return -EINVAL if the new value is not sane */
6915 shareval = simple_strtoul(buffer, NULL, 10);
6916 6897
6917 tg->shares = shareval; 6898 tg->shares = shares;
6918 for_each_possible_cpu(i) 6899 for_each_possible_cpu(i)
6919 set_se_shares(tg->se[i], shareval); 6900 set_se_shares(tg->se[i], shares);
6920
6921 return nbytes;
6922}
6923
6924static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft)
6925{
6926 struct task_grp *tg = container_tg(cont);
6927
6928 return (u64) tg->shares;
6929}
6930 6901
6931struct cftype cpuctl_share = { 6902 return 0;
6932 .name = "shares",
6933 .read_uint = cpu_shares_read_uint,
6934 .write = cpu_shares_write,
6935};
6936
6937static int sched_populate(struct container_subsys *ss, struct container *cont)
6938{
6939 return container_add_file(cont, ss, &cpuctl_share);
6940} 6903}
6941 6904
6942struct container_subsys cpu_subsys = { 6905#endif /* CONFIG_FAIR_GROUP_SCHED */
6943 .name = "cpu",
6944 .create = sched_create_group,
6945 .destroy = sched_destroy_group,
6946 .can_attach = sched_can_attach,
6947 .attach = sched_move_task,
6948 .populate = sched_populate,
6949 .subsys_id = cpu_subsys_id,
6950 .early_init = 1,
6951};
6952
6953#endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 556942cf2606..abd65ed9f2a5 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
877 if (!cfs_rq->nr_running) 877 if (!cfs_rq->nr_running)
878 return MAX_PRIO; 878 return MAX_PRIO;
879 879
880 curr = __pick_next_entity(cfs_rq); 880 curr = cfs_rq->curr;
881 if (!curr)
882 curr = __pick_next_entity(cfs_rq);
883
881 p = task_of(curr); 884 p = task_of(curr);
882 885
883 return p->prio; 886 return p->prio;