diff options
author | Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | 2007-10-15 11:00:09 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-10-15 11:00:09 -0400 |
commit | 9b5b77512dce239fa168183fa71896712232e95a (patch) | |
tree | 2a95cf9151b4b6bd745236858f5b73b639dbc0d9 | |
parent | 75c28ace9f2b2f403674e045939424a77c95b47c (diff) |
sched: clean up code under CONFIG_FAIR_GROUP_SCHED
With the view of supporting user-id based fair scheduling (and not just
container-based fair scheduling), this patch renames several functions
and makes them independent of whether they are being used for container
or user-id based fair scheduling.
Also fix a problem reported by KAMEZAWA Hiroyuki (wrt allocating
less-sized array for tg->cfs_rq[] and tf->se[]).
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | include/linux/sched.h | 12 | ||||
-rw-r--r-- | init/Kconfig | 11 | ||||
-rw-r--r-- | kernel/sched.c | 172 | ||||
-rw-r--r-- | kernel/sched_fair.c | 5 |
4 files changed, 83 insertions, 117 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 66169005f008..03c13b663e4b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -136,6 +136,7 @@ extern unsigned long weighted_cpuload(const int cpu); | |||
136 | 136 | ||
137 | struct seq_file; | 137 | struct seq_file; |
138 | struct cfs_rq; | 138 | struct cfs_rq; |
139 | struct task_grp; | ||
139 | #ifdef CONFIG_SCHED_DEBUG | 140 | #ifdef CONFIG_SCHED_DEBUG |
140 | extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); | 141 | extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); |
141 | extern void proc_sched_set_task(struct task_struct *p); | 142 | extern void proc_sched_set_task(struct task_struct *p); |
@@ -1834,6 +1835,17 @@ extern int sched_mc_power_savings, sched_smt_power_savings; | |||
1834 | 1835 | ||
1835 | extern void normalize_rt_tasks(void); | 1836 | extern void normalize_rt_tasks(void); |
1836 | 1837 | ||
1838 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1839 | |||
1840 | extern struct task_grp init_task_grp; | ||
1841 | |||
1842 | extern struct task_grp *sched_create_group(void); | ||
1843 | extern void sched_destroy_group(struct task_grp *tg); | ||
1844 | extern void sched_move_task(struct task_struct *tsk); | ||
1845 | extern int sched_group_set_shares(struct task_grp *tg, unsigned long shares); | ||
1846 | |||
1847 | #endif | ||
1848 | |||
1837 | #ifdef CONFIG_TASK_XACCT | 1849 | #ifdef CONFIG_TASK_XACCT |
1838 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) | 1850 | static inline void add_rchar(struct task_struct *tsk, ssize_t amt) |
1839 | { | 1851 | { |
diff --git a/init/Kconfig b/init/Kconfig index 11c6762a6529..ef90a154dd90 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -282,13 +282,12 @@ config CPUSETS | |||
282 | Say N if unsure. | 282 | Say N if unsure. |
283 | 283 | ||
284 | config FAIR_GROUP_SCHED | 284 | config FAIR_GROUP_SCHED |
285 | bool "Fair group scheduler" | 285 | bool "Fair group cpu scheduler" |
286 | depends on EXPERIMENTAL && CONTAINERS | 286 | default n |
287 | depends on EXPERIMENTAL | ||
287 | help | 288 | help |
288 | This option enables you to group tasks and control CPU resource | 289 | This feature lets cpu scheduler recognize task groups and control cpu |
289 | allocation to such groups. | 290 | bandwidth allocation to such task groups. |
290 | |||
291 | Say N if unsure. | ||
292 | 291 | ||
293 | config SYSFS_DEPRECATED | 292 | config SYSFS_DEPRECATED |
294 | bool "Create deprecated sysfs files" | 293 | bool "Create deprecated sysfs files" |
diff --git a/kernel/sched.c b/kernel/sched.c index ee7ac71b12f8..e10c403b1213 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -173,13 +173,10 @@ struct rt_prio_array { | |||
173 | 173 | ||
174 | #ifdef CONFIG_FAIR_GROUP_SCHED | 174 | #ifdef CONFIG_FAIR_GROUP_SCHED |
175 | 175 | ||
176 | #include <linux/container.h> | ||
177 | |||
178 | struct cfs_rq; | 176 | struct cfs_rq; |
179 | 177 | ||
180 | /* task group related information */ | 178 | /* task group related information */ |
181 | struct task_grp { | 179 | struct task_grp { |
182 | struct container_subsys_state css; | ||
183 | /* schedulable entities of this group on each cpu */ | 180 | /* schedulable entities of this group on each cpu */ |
184 | struct sched_entity **se; | 181 | struct sched_entity **se; |
185 | /* runqueue "owned" by this group on each cpu */ | 182 | /* runqueue "owned" by this group on each cpu */ |
@@ -192,22 +189,28 @@ static DEFINE_PER_CPU(struct sched_entity, init_sched_entity); | |||
192 | /* Default task group's cfs_rq on each cpu */ | 189 | /* Default task group's cfs_rq on each cpu */ |
193 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; | 190 | static DEFINE_PER_CPU(struct cfs_rq, init_cfs_rq) ____cacheline_aligned_in_smp; |
194 | 191 | ||
195 | static struct sched_entity *init_sched_entity_p[CONFIG_NR_CPUS]; | 192 | static struct sched_entity *init_sched_entity_p[NR_CPUS]; |
196 | static struct cfs_rq *init_cfs_rq_p[CONFIG_NR_CPUS]; | 193 | static struct cfs_rq *init_cfs_rq_p[NR_CPUS]; |
197 | 194 | ||
198 | /* Default task group. | 195 | /* Default task group. |
199 | * Every task in system belong to this group at bootup. | 196 | * Every task in system belong to this group at bootup. |
200 | */ | 197 | */ |
201 | static struct task_grp init_task_grp = { | 198 | struct task_grp init_task_grp = { |
202 | .se = init_sched_entity_p, | 199 | .se = init_sched_entity_p, |
203 | .cfs_rq = init_cfs_rq_p, | 200 | .cfs_rq = init_cfs_rq_p, |
204 | }; | 201 | }; |
202 | |||
203 | #define INIT_TASK_GRP_LOAD NICE_0_LOAD | ||
204 | static int init_task_grp_load = INIT_TASK_GRP_LOAD; | ||
205 | 205 | ||
206 | /* return group to which a task belongs */ | 206 | /* return group to which a task belongs */ |
207 | static inline struct task_grp *task_grp(struct task_struct *p) | 207 | static inline struct task_grp *task_grp(struct task_struct *p) |
208 | { | 208 | { |
209 | return container_of(task_subsys_state(p, cpu_subsys_id), | 209 | struct task_grp *tg; |
210 | struct task_grp, css); | 210 | |
211 | tg = &init_task_grp; | ||
212 | |||
213 | return tg; | ||
211 | } | 214 | } |
212 | 215 | ||
213 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 216 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
@@ -250,6 +253,7 @@ struct cfs_rq { | |||
250 | */ | 253 | */ |
251 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ | 254 | struct list_head leaf_cfs_rq_list; /* Better name : task_cfs_rq_list? */ |
252 | struct task_grp *tg; /* group that "owns" this runqueue */ | 255 | struct task_grp *tg; /* group that "owns" this runqueue */ |
256 | struct rcu_head rcu; | ||
253 | #endif | 257 | #endif |
254 | }; | 258 | }; |
255 | 259 | ||
@@ -6513,11 +6517,12 @@ void __init sched_init(void) | |||
6513 | init_sched_entity_p[i] = se; | 6517 | init_sched_entity_p[i] = se; |
6514 | se->cfs_rq = &rq->cfs; | 6518 | se->cfs_rq = &rq->cfs; |
6515 | se->my_q = cfs_rq; | 6519 | se->my_q = cfs_rq; |
6516 | se->load.weight = NICE_0_LOAD; | 6520 | se->load.weight = init_task_grp_load; |
6517 | se->load.inv_weight = div64_64(1ULL<<32, NICE_0_LOAD); | 6521 | se->load.inv_weight = |
6522 | div64_64(1ULL<<32, init_task_grp_load); | ||
6518 | se->parent = NULL; | 6523 | se->parent = NULL; |
6519 | } | 6524 | } |
6520 | init_task_grp.shares = NICE_0_LOAD; | 6525 | init_task_grp.shares = init_task_grp_load; |
6521 | #endif | 6526 | #endif |
6522 | 6527 | ||
6523 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 6528 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
@@ -6707,45 +6712,28 @@ void set_curr_task(int cpu, struct task_struct *p) | |||
6707 | 6712 | ||
6708 | #ifdef CONFIG_FAIR_GROUP_SCHED | 6713 | #ifdef CONFIG_FAIR_GROUP_SCHED |
6709 | 6714 | ||
6710 | /* return corresponding task_grp object of a container */ | ||
6711 | static inline struct task_grp *container_tg(struct container *cont) | ||
6712 | { | ||
6713 | return container_of(container_subsys_state(cont, cpu_subsys_id), | ||
6714 | struct task_grp, css); | ||
6715 | } | ||
6716 | |||
6717 | /* allocate runqueue etc for a new task group */ | 6715 | /* allocate runqueue etc for a new task group */ |
6718 | static struct container_subsys_state * | 6716 | struct task_grp *sched_create_group(void) |
6719 | sched_create_group(struct container_subsys *ss, struct container *cont) | ||
6720 | { | 6717 | { |
6721 | struct task_grp *tg; | 6718 | struct task_grp *tg; |
6722 | struct cfs_rq *cfs_rq; | 6719 | struct cfs_rq *cfs_rq; |
6723 | struct sched_entity *se; | 6720 | struct sched_entity *se; |
6721 | struct rq *rq; | ||
6724 | int i; | 6722 | int i; |
6725 | 6723 | ||
6726 | if (!cont->parent) { | ||
6727 | /* This is early initialization for the top container */ | ||
6728 | init_task_grp.css.container = cont; | ||
6729 | return &init_task_grp.css; | ||
6730 | } | ||
6731 | |||
6732 | /* we support only 1-level deep hierarchical scheduler atm */ | ||
6733 | if (cont->parent->parent) | ||
6734 | return ERR_PTR(-EINVAL); | ||
6735 | |||
6736 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); | 6724 | tg = kzalloc(sizeof(*tg), GFP_KERNEL); |
6737 | if (!tg) | 6725 | if (!tg) |
6738 | return ERR_PTR(-ENOMEM); | 6726 | return ERR_PTR(-ENOMEM); |
6739 | 6727 | ||
6740 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * num_possible_cpus(), GFP_KERNEL); | 6728 | tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL); |
6741 | if (!tg->cfs_rq) | 6729 | if (!tg->cfs_rq) |
6742 | goto err; | 6730 | goto err; |
6743 | tg->se = kzalloc(sizeof(se) * num_possible_cpus(), GFP_KERNEL); | 6731 | tg->se = kzalloc(sizeof(se) * NR_CPUS, GFP_KERNEL); |
6744 | if (!tg->se) | 6732 | if (!tg->se) |
6745 | goto err; | 6733 | goto err; |
6746 | 6734 | ||
6747 | for_each_possible_cpu(i) { | 6735 | for_each_possible_cpu(i) { |
6748 | struct rq *rq = cpu_rq(i); | 6736 | rq = cpu_rq(i); |
6749 | 6737 | ||
6750 | cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, | 6738 | cfs_rq = kmalloc_node(sizeof(struct cfs_rq), GFP_KERNEL, |
6751 | cpu_to_node(i)); | 6739 | cpu_to_node(i)); |
@@ -6763,7 +6751,6 @@ sched_create_group(struct container_subsys *ss, struct container *cont) | |||
6763 | tg->cfs_rq[i] = cfs_rq; | 6751 | tg->cfs_rq[i] = cfs_rq; |
6764 | init_cfs_rq(cfs_rq, rq); | 6752 | init_cfs_rq(cfs_rq, rq); |
6765 | cfs_rq->tg = tg; | 6753 | cfs_rq->tg = tg; |
6766 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
6767 | 6754 | ||
6768 | tg->se[i] = se; | 6755 | tg->se[i] = se; |
6769 | se->cfs_rq = &rq->cfs; | 6756 | se->cfs_rq = &rq->cfs; |
@@ -6773,12 +6760,15 @@ sched_create_group(struct container_subsys *ss, struct container *cont) | |||
6773 | se->parent = NULL; | 6760 | se->parent = NULL; |
6774 | } | 6761 | } |
6775 | 6762 | ||
6776 | tg->shares = NICE_0_LOAD; | 6763 | for_each_possible_cpu(i) { |
6764 | rq = cpu_rq(i); | ||
6765 | cfs_rq = tg->cfs_rq[i]; | ||
6766 | list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list); | ||
6767 | } | ||
6777 | 6768 | ||
6778 | /* Bind the container to task_grp object we just created */ | 6769 | tg->shares = NICE_0_LOAD; |
6779 | tg->css.container = cont; | ||
6780 | 6770 | ||
6781 | return &tg->css; | 6771 | return tg; |
6782 | 6772 | ||
6783 | err: | 6773 | err: |
6784 | for_each_possible_cpu(i) { | 6774 | for_each_possible_cpu(i) { |
@@ -6797,24 +6787,14 @@ err: | |||
6797 | return ERR_PTR(-ENOMEM); | 6787 | return ERR_PTR(-ENOMEM); |
6798 | } | 6788 | } |
6799 | 6789 | ||
6800 | 6790 | /* rcu callback to free various structures associated with a task group */ | |
6801 | /* destroy runqueue etc associated with a task group */ | 6791 | static void free_sched_group(struct rcu_head *rhp) |
6802 | static void sched_destroy_group(struct container_subsys *ss, | ||
6803 | struct container *cont) | ||
6804 | { | 6792 | { |
6805 | struct task_grp *tg = container_tg(cont); | 6793 | struct cfs_rq *cfs_rq = container_of(rhp, struct cfs_rq, rcu); |
6806 | struct cfs_rq *cfs_rq; | 6794 | struct task_grp *tg = cfs_rq->tg; |
6807 | struct sched_entity *se; | 6795 | struct sched_entity *se; |
6808 | int i; | 6796 | int i; |
6809 | 6797 | ||
6810 | for_each_possible_cpu(i) { | ||
6811 | cfs_rq = tg->cfs_rq[i]; | ||
6812 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | ||
6813 | } | ||
6814 | |||
6815 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
6816 | synchronize_sched(); | ||
6817 | |||
6818 | /* now it should be safe to free those cfs_rqs */ | 6798 | /* now it should be safe to free those cfs_rqs */ |
6819 | for_each_possible_cpu(i) { | 6799 | for_each_possible_cpu(i) { |
6820 | cfs_rq = tg->cfs_rq[i]; | 6800 | cfs_rq = tg->cfs_rq[i]; |
@@ -6829,19 +6809,29 @@ static void sched_destroy_group(struct container_subsys *ss, | |||
6829 | kfree(tg); | 6809 | kfree(tg); |
6830 | } | 6810 | } |
6831 | 6811 | ||
6832 | static int sched_can_attach(struct container_subsys *ss, | 6812 | /* Destroy runqueue etc associated with a task group */ |
6833 | struct container *cont, struct task_struct *tsk) | 6813 | void sched_destroy_group(struct task_grp *tg) |
6834 | { | 6814 | { |
6835 | /* We don't support RT-tasks being in separate groups */ | 6815 | struct cfs_rq *cfs_rq; |
6836 | if (tsk->sched_class != &fair_sched_class) | 6816 | int i; |
6837 | return -EINVAL; | ||
6838 | 6817 | ||
6839 | return 0; | 6818 | for_each_possible_cpu(i) { |
6819 | cfs_rq = tg->cfs_rq[i]; | ||
6820 | list_del_rcu(&cfs_rq->leaf_cfs_rq_list); | ||
6821 | } | ||
6822 | |||
6823 | cfs_rq = tg->cfs_rq[0]; | ||
6824 | |||
6825 | /* wait for possible concurrent references to cfs_rqs complete */ | ||
6826 | call_rcu(&cfs_rq->rcu, free_sched_group); | ||
6840 | } | 6827 | } |
6841 | 6828 | ||
6842 | /* change task's runqueue when it moves between groups */ | 6829 | /* change task's runqueue when it moves between groups. |
6843 | static void sched_move_task(struct container_subsys *ss, struct container *cont, | 6830 | * The caller of this function should have put the task in its new group |
6844 | struct container *old_cont, struct task_struct *tsk) | 6831 | * by now. This function just updates tsk->se.cfs_rq and tsk->se.parent to |
6832 | * reflect its new group. | ||
6833 | */ | ||
6834 | void sched_move_task(struct task_struct *tsk) | ||
6845 | { | 6835 | { |
6846 | int on_rq, running; | 6836 | int on_rq, running; |
6847 | unsigned long flags; | 6837 | unsigned long flags; |
@@ -6896,58 +6886,20 @@ static void set_se_shares(struct sched_entity *se, unsigned long shares) | |||
6896 | spin_unlock_irq(&rq->lock); | 6886 | spin_unlock_irq(&rq->lock); |
6897 | } | 6887 | } |
6898 | 6888 | ||
6899 | static ssize_t cpu_shares_write(struct container *cont, struct cftype *cftype, | 6889 | int sched_group_set_shares(struct task_grp *tg, unsigned long shares) |
6900 | struct file *file, const char __user *userbuf, | ||
6901 | size_t nbytes, loff_t *ppos) | ||
6902 | { | 6890 | { |
6903 | int i; | 6891 | int i; |
6904 | unsigned long shareval; | ||
6905 | struct task_grp *tg = container_tg(cont); | ||
6906 | char buffer[2*sizeof(unsigned long) + 1]; | ||
6907 | |||
6908 | if (nbytes > 2*sizeof(unsigned long)) /* safety check */ | ||
6909 | return -E2BIG; | ||
6910 | 6892 | ||
6911 | if (copy_from_user(buffer, userbuf, nbytes)) | 6893 | if (tg->shares == shares) |
6912 | return -EFAULT; | 6894 | return 0; |
6913 | 6895 | ||
6914 | buffer[nbytes] = 0; /* nul-terminate */ | 6896 | /* return -EINVAL if the new value is not sane */ |
6915 | shareval = simple_strtoul(buffer, NULL, 10); | ||
6916 | 6897 | ||
6917 | tg->shares = shareval; | 6898 | tg->shares = shares; |
6918 | for_each_possible_cpu(i) | 6899 | for_each_possible_cpu(i) |
6919 | set_se_shares(tg->se[i], shareval); | 6900 | set_se_shares(tg->se[i], shares); |
6920 | |||
6921 | return nbytes; | ||
6922 | } | ||
6923 | |||
6924 | static u64 cpu_shares_read_uint(struct container *cont, struct cftype *cft) | ||
6925 | { | ||
6926 | struct task_grp *tg = container_tg(cont); | ||
6927 | |||
6928 | return (u64) tg->shares; | ||
6929 | } | ||
6930 | 6901 | ||
6931 | struct cftype cpuctl_share = { | 6902 | return 0; |
6932 | .name = "shares", | ||
6933 | .read_uint = cpu_shares_read_uint, | ||
6934 | .write = cpu_shares_write, | ||
6935 | }; | ||
6936 | |||
6937 | static int sched_populate(struct container_subsys *ss, struct container *cont) | ||
6938 | { | ||
6939 | return container_add_file(cont, ss, &cpuctl_share); | ||
6940 | } | 6903 | } |
6941 | 6904 | ||
6942 | struct container_subsys cpu_subsys = { | 6905 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
6943 | .name = "cpu", | ||
6944 | .create = sched_create_group, | ||
6945 | .destroy = sched_destroy_group, | ||
6946 | .can_attach = sched_can_attach, | ||
6947 | .attach = sched_move_task, | ||
6948 | .populate = sched_populate, | ||
6949 | .subsys_id = cpu_subsys_id, | ||
6950 | .early_init = 1, | ||
6951 | }; | ||
6952 | |||
6953 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 556942cf2606..abd65ed9f2a5 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -877,7 +877,10 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | |||
877 | if (!cfs_rq->nr_running) | 877 | if (!cfs_rq->nr_running) |
878 | return MAX_PRIO; | 878 | return MAX_PRIO; |
879 | 879 | ||
880 | curr = __pick_next_entity(cfs_rq); | 880 | curr = cfs_rq->curr; |
881 | if (!curr) | ||
882 | curr = __pick_next_entity(cfs_rq); | ||
883 | |||
881 | p = task_of(curr); | 884 | p = task_of(curr); |
882 | 885 | ||
883 | return p->prio; | 886 | return p->prio; |