aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sched-design-CFS.txt67
-rw-r--r--include/linux/sched.h11
-rw-r--r--kernel/ksysfs.c8
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_debug.c48
-rw-r--r--kernel/user.c240
6 files changed, 309 insertions, 79 deletions
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt
index 84901e7c0508..88bcb8767335 100644
--- a/Documentation/sched-design-CFS.txt
+++ b/Documentation/sched-design-CFS.txt
@@ -117,3 +117,70 @@ Some implementation details:
117 iterators of the scheduling modules are used. The balancing code got 117 iterators of the scheduling modules are used. The balancing code got
118 quite a bit simpler as a result. 118 quite a bit simpler as a result.
119 119
120
121Group scheduler extension to CFS
122================================
123
124Normally the scheduler operates on individual tasks and strives to provide
125fair CPU time to each task. Sometimes, it may be desirable to group tasks
126and provide fair CPU time to each such task group. For example, it may
127be desirable to first provide fair CPU time to each user on the system
128and then to each task belonging to a user.
129
130CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets
131SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such
132groups. At present, there are two (mutually exclusive) mechanisms to group
133tasks for CPU bandwidth control purpose:
134
135 - Based on user id (CONFIG_FAIR_USER_SCHED)
136 In this option, tasks are grouped according to their user id.
137 - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED)
138 This options lets the administrator create arbitrary groups
139 of tasks, using the "cgroup" pseudo filesystem. See
140 Documentation/cgroups.txt for more information about this
141 filesystem.
142
143Only one of these options to group tasks can be chosen and not both.
144
145Group scheduler tunables:
146
147When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for
148each new user and a "cpu_share" file is added in that directory.
149
150 # cd /sys/kernel/uids
151 # cat 512/cpu_share # Display user 512's CPU share
152 1024
153 # echo 2048 > 512/cpu_share # Modify user 512's CPU share
154 # cat 512/cpu_share # Display user 512's CPU share
155 2048
156 #
157
158CPU bandwidth between two users are divided in the ratio of their CPU shares.
159For ex: if you would like user "root" to get twice the bandwidth of user
160"guest", then set the cpu_share for both the users such that "root"'s
161cpu_share is twice "guest"'s cpu_share
162
163
164When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created
165for each group created using the pseudo filesystem. See example steps
166below to create task groups and modify their CPU share using the "cgroups"
167pseudo filesystem
168
169 # mkdir /dev/cpuctl
170 # mount -t cgroup -ocpu none /dev/cpuctl
171 # cd /dev/cpuctl
172
173 # mkdir multimedia # create "multimedia" group of tasks
174 # mkdir browser # create "browser" group of tasks
175
176 # #Configure the multimedia group to receive twice the CPU bandwidth
177 # #that of browser group
178
179 # echo 2048 > multimedia/cpu.shares
180 # echo 1024 > browser/cpu.shares
181
182 # firefox & # Launch firefox and move it to "browser" group
183 # echo <firefox_pid> > browser/tasks
184
185 # #Launch gmplayer (or your favourite movie player)
186 # echo <movie_player_pid> > multimedia/tasks
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3cddbfc0c91d..04233c8974d9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -87,6 +87,7 @@ struct sched_param {
87#include <linux/timer.h> 87#include <linux/timer.h>
88#include <linux/hrtimer.h> 88#include <linux/hrtimer.h>
89#include <linux/task_io_accounting.h> 89#include <linux/task_io_accounting.h>
90#include <linux/kobject.h>
90 91
91#include <asm/processor.h> 92#include <asm/processor.h>
92 93
@@ -599,9 +600,18 @@ struct user_struct {
599 600
600#ifdef CONFIG_FAIR_USER_SCHED 601#ifdef CONFIG_FAIR_USER_SCHED
601 struct task_group *tg; 602 struct task_group *tg;
603 struct kset kset;
604 struct subsys_attribute user_attr;
605 struct work_struct work;
602#endif 606#endif
603}; 607};
604 608
609#ifdef CONFIG_FAIR_USER_SCHED
610extern int uids_kobject_init(void);
611#else
612static inline int uids_kobject_init(void) { return 0; }
613#endif
614
605extern struct user_struct *find_user(uid_t); 615extern struct user_struct *find_user(uid_t);
606 616
607extern struct user_struct root_user; 617extern struct user_struct root_user;
@@ -1848,6 +1858,7 @@ extern struct task_group *sched_create_group(void);
1848extern void sched_destroy_group(struct task_group *tg); 1858extern void sched_destroy_group(struct task_group *tg);
1849extern void sched_move_task(struct task_struct *tsk); 1859extern void sched_move_task(struct task_struct *tsk);
1850extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); 1860extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
1861extern unsigned long sched_group_shares(struct task_group *tg);
1851 1862
1852#endif 1863#endif
1853 1864
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index d0e5c48e18c7..6046939d0804 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -14,6 +14,7 @@
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/init.h> 15#include <linux/init.h>
16#include <linux/kexec.h> 16#include <linux/kexec.h>
17#include <linux/sched.h>
17 18
18#define KERNEL_ATTR_RO(_name) \ 19#define KERNEL_ATTR_RO(_name) \
19static struct subsys_attribute _name##_attr = __ATTR_RO(_name) 20static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
@@ -116,6 +117,13 @@ static int __init ksysfs_init(void)
116 &notes_attr); 117 &notes_attr);
117 } 118 }
118 119
120 /*
121 * Create "/sys/kernel/uids" directory and corresponding root user's
122 * directory under it.
123 */
124 if (!error)
125 error = uids_kobject_init();
126
119 return error; 127 return error;
120} 128}
121 129
diff --git a/kernel/sched.c b/kernel/sched.c
index a3c3ec825f42..9ac99896db8f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -162,6 +162,8 @@ struct task_group {
162 /* runqueue "owned" by this group on each cpu */ 162 /* runqueue "owned" by this group on each cpu */
163 struct cfs_rq **cfs_rq; 163 struct cfs_rq **cfs_rq;
164 unsigned long shares; 164 unsigned long shares;
165 /* spinlock to serialize modification to shares */
166 spinlock_t lock;
165}; 167};
166 168
167/* Default task group's sched entity on each cpu */ 169/* Default task group's sched entity on each cpu */
@@ -6533,6 +6535,7 @@ void __init sched_init(void)
6533 se->parent = NULL; 6535 se->parent = NULL;
6534 } 6536 }
6535 init_task_group.shares = init_task_group_load; 6537 init_task_group.shares = init_task_group_load;
6538 spin_lock_init(&init_task_group.lock);
6536#endif 6539#endif
6537 6540
6538 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 6541 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -6777,6 +6780,7 @@ struct task_group *sched_create_group(void)
6777 } 6780 }
6778 6781
6779 tg->shares = NICE_0_LOAD; 6782 tg->shares = NICE_0_LOAD;
6783 spin_lock_init(&tg->lock);
6780 6784
6781 return tg; 6785 return tg;
6782 6786
@@ -6897,8 +6901,9 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
6897{ 6901{
6898 int i; 6902 int i;
6899 6903
6904 spin_lock(&tg->lock);
6900 if (tg->shares == shares) 6905 if (tg->shares == shares)
6901 return 0; 6906 goto done;
6902 6907
6903 /* return -EINVAL if the new value is not sane */ 6908 /* return -EINVAL if the new value is not sane */
6904 6909
@@ -6906,7 +6911,14 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
6906 for_each_possible_cpu(i) 6911 for_each_possible_cpu(i)
6907 set_se_shares(tg->se[i], shares); 6912 set_se_shares(tg->se[i], shares);
6908 6913
6914done:
6915 spin_unlock(&tg->lock);
6909 return 0; 6916 return 0;
6910} 6917}
6911 6918
6919unsigned long sched_group_shares(struct task_group *tg)
6920{
6921 return tg->shares;
6922}
6923
6912#endif /* CONFIG_FAIR_GROUP_SCHED */ 6924#endif /* CONFIG_FAIR_GROUP_SCHED */
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 6f87b31d233c..0aab455a7b41 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -231,45 +231,6 @@ static void sysrq_sched_debug_show(void)
231 sched_debug_show(NULL, NULL); 231 sched_debug_show(NULL, NULL);
232} 232}
233 233
234#ifdef CONFIG_FAIR_USER_SCHED
235
236static DEFINE_MUTEX(root_user_share_mutex);
237
238static int
239root_user_share_read_proc(char *page, char **start, off_t off, int count,
240 int *eof, void *data)
241{
242 return sprintf(page, "%d\n", init_task_group_load);
243}
244
245static int
246root_user_share_write_proc(struct file *file, const char __user *buffer,
247 unsigned long count, void *data)
248{
249 unsigned long shares;
250 char kbuf[sizeof(unsigned long)+1];
251 int rc = 0;
252
253 if (copy_from_user(kbuf, buffer, sizeof(kbuf)))
254 return -EFAULT;
255
256 shares = simple_strtoul(kbuf, NULL, 0);
257
258 if (!shares)
259 shares = NICE_0_LOAD;
260
261 mutex_lock(&root_user_share_mutex);
262
263 init_task_group_load = shares;
264 rc = sched_group_set_shares(&init_task_group, shares);
265
266 mutex_unlock(&root_user_share_mutex);
267
268 return (rc < 0 ? rc : count);
269}
270
271#endif /* CONFIG_FAIR_USER_SCHED */
272
273static int sched_debug_open(struct inode *inode, struct file *filp) 234static int sched_debug_open(struct inode *inode, struct file *filp)
274{ 235{
275 return single_open(filp, sched_debug_show, NULL); 236 return single_open(filp, sched_debug_show, NULL);
@@ -292,15 +253,6 @@ static int __init init_sched_debug_procfs(void)
292 253
293 pe->proc_fops = &sched_debug_fops; 254 pe->proc_fops = &sched_debug_fops;
294 255
295#ifdef CONFIG_FAIR_USER_SCHED
296 pe = create_proc_entry("root_user_cpu_share", 0644, NULL);
297 if (!pe)
298 return -ENOMEM;
299
300 pe->read_proc = root_user_share_read_proc;
301 pe->write_proc = root_user_share_write_proc;
302#endif
303
304 return 0; 256 return 0;
305} 257}
306 258
diff --git a/kernel/user.c b/kernel/user.c
index 0c9a7870d08f..74cadea8466f 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -55,7 +55,41 @@ struct user_struct root_user = {
55#endif 55#endif
56}; 56};
57 57
58/*
59 * These routines must be called with the uidhash spinlock held!
60 */
61static inline void uid_hash_insert(struct user_struct *up,
62 struct hlist_head *hashent)
63{
64 hlist_add_head(&up->uidhash_node, hashent);
65}
66
67static inline void uid_hash_remove(struct user_struct *up)
68{
69 hlist_del_init(&up->uidhash_node);
70}
71
72static inline struct user_struct *uid_hash_find(uid_t uid,
73 struct hlist_head *hashent)
74{
75 struct user_struct *user;
76 struct hlist_node *h;
77
78 hlist_for_each_entry(user, h, hashent, uidhash_node) {
79 if (user->uid == uid) {
80 atomic_inc(&user->__count);
81 return user;
82 }
83 }
84
85 return NULL;
86}
87
58#ifdef CONFIG_FAIR_USER_SCHED 88#ifdef CONFIG_FAIR_USER_SCHED
89
90static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */
91static DEFINE_MUTEX(uids_mutex);
92
59static void sched_destroy_user(struct user_struct *up) 93static void sched_destroy_user(struct user_struct *up)
60{ 94{
61 sched_destroy_group(up->tg); 95 sched_destroy_group(up->tg);
@@ -77,42 +111,173 @@ static void sched_switch_user(struct task_struct *p)
77 sched_move_task(p); 111 sched_move_task(p);
78} 112}
79 113
80#else /* CONFIG_FAIR_USER_SCHED */ 114static inline void uids_mutex_lock(void)
115{
116 mutex_lock(&uids_mutex);
117}
81 118
82static void sched_destroy_user(struct user_struct *up) { } 119static inline void uids_mutex_unlock(void)
83static int sched_create_user(struct user_struct *up) { return 0; } 120{
84static void sched_switch_user(struct task_struct *p) { } 121 mutex_unlock(&uids_mutex);
122}
85 123
86#endif /* CONFIG_FAIR_USER_SCHED */ 124/* return cpu shares held by the user */
125ssize_t cpu_shares_show(struct kset *kset, char *buffer)
126{
127 struct user_struct *up = container_of(kset, struct user_struct, kset);
87 128
88/* 129 return sprintf(buffer, "%lu\n", sched_group_shares(up->tg));
89 * These routines must be called with the uidhash spinlock held! 130}
131
132/* modify cpu shares held by the user */
133ssize_t cpu_shares_store(struct kset *kset, const char *buffer, size_t size)
134{
135 struct user_struct *up = container_of(kset, struct user_struct, kset);
136 unsigned long shares;
137 int rc;
138
139 sscanf(buffer, "%lu", &shares);
140
141 rc = sched_group_set_shares(up->tg, shares);
142
143 return (rc ? rc : size);
144}
145
146static void user_attr_init(struct subsys_attribute *sa, char *name, int mode)
147{
148 sa->attr.name = name;
149 sa->attr.mode = mode;
150 sa->show = cpu_shares_show;
151 sa->store = cpu_shares_store;
152}
153
154/* Create "/sys/kernel/uids/<uid>" directory and
155 * "/sys/kernel/uids/<uid>/cpu_share" file for this user.
90 */ 156 */
91static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) 157static int user_kobject_create(struct user_struct *up)
92{ 158{
93 hlist_add_head(&up->uidhash_node, hashent); 159 struct kset *kset = &up->kset;
160 struct kobject *kobj = &kset->kobj;
161 int error;
162
163 memset(kset, 0, sizeof(struct kset));
164 kobj->parent = &uids_kobject; /* create under /sys/kernel/uids dir */
165 kobject_set_name(kobj, "%d", up->uid);
166 kset_init(kset);
167 user_attr_init(&up->user_attr, "cpu_share", 0644);
168
169 error = kobject_add(kobj);
170 if (error)
171 goto done;
172
173 error = sysfs_create_file(kobj, &up->user_attr.attr);
174 if (error)
175 kobject_del(kobj);
176
177done:
178 return error;
94} 179}
95 180
96static inline void uid_hash_remove(struct user_struct *up) 181/* create these in sysfs filesystem:
182 * "/sys/kernel/uids" directory
183 * "/sys/kernel/uids/0" directory (for root user)
184 * "/sys/kernel/uids/0/cpu_share" file (for root user)
185 */
186int __init uids_kobject_init(void)
97{ 187{
98 hlist_del_init(&up->uidhash_node); 188 int error;
189
190 /* create under /sys/kernel dir */
191 uids_kobject.parent = &kernel_subsys.kobj;
192 kobject_set_name(&uids_kobject, "uids");
193 kobject_init(&uids_kobject);
194
195 error = kobject_add(&uids_kobject);
196 if (!error)
197 error = user_kobject_create(&root_user);
198
199 return error;
99} 200}
100 201
101static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) 202/* work function to remove sysfs directory for a user and free up
203 * corresponding structures.
204 */
205static void remove_user_sysfs_dir(struct work_struct *w)
102{ 206{
103 struct user_struct *user; 207 struct user_struct *up = container_of(w, struct user_struct, work);
104 struct hlist_node *h; 208 struct kobject *kobj = &up->kset.kobj;
209 unsigned long flags;
210 int remove_user = 0;
105 211
106 hlist_for_each_entry(user, h, hashent, uidhash_node) { 212 /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
107 if(user->uid == uid) { 213 * atomic.
108 atomic_inc(&user->__count); 214 */
109 return user; 215 uids_mutex_lock();
110 } 216
217 local_irq_save(flags);
218
219 if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
220 uid_hash_remove(up);
221 remove_user = 1;
222 spin_unlock_irqrestore(&uidhash_lock, flags);
223 } else {
224 local_irq_restore(flags);
111 } 225 }
112 226
113 return NULL; 227 if (!remove_user)
228 goto done;
229
230 sysfs_remove_file(kobj, &up->user_attr.attr);
231 kobject_del(kobj);
232
233 sched_destroy_user(up);
234 key_put(up->uid_keyring);
235 key_put(up->session_keyring);
236 kmem_cache_free(uid_cachep, up);
237
238done:
239 uids_mutex_unlock();
240}
241
242/* IRQs are disabled and uidhash_lock is held upon function entry.
243 * IRQ state (as stored in flags) is restored and uidhash_lock released
244 * upon function exit.
245 */
246static inline void free_user(struct user_struct *up, unsigned long flags)
247{
248 /* restore back the count */
249 atomic_inc(&up->__count);
250 spin_unlock_irqrestore(&uidhash_lock, flags);
251
252 INIT_WORK(&up->work, remove_user_sysfs_dir);
253 schedule_work(&up->work);
114} 254}
115 255
256#else /* CONFIG_FAIR_USER_SCHED */
257
258static void sched_destroy_user(struct user_struct *up) { }
259static int sched_create_user(struct user_struct *up) { return 0; }
260static void sched_switch_user(struct task_struct *p) { }
261static inline int user_kobject_create(struct user_struct *up) { return 0; }
262static inline void uids_mutex_lock(void) { }
263static inline void uids_mutex_unlock(void) { }
264
265/* IRQs are disabled and uidhash_lock is held upon function entry.
266 * IRQ state (as stored in flags) is restored and uidhash_lock released
267 * upon function exit.
268 */
269static inline void free_user(struct user_struct *up, unsigned long flags)
270{
271 uid_hash_remove(up);
272 spin_unlock_irqrestore(&uidhash_lock, flags);
273 sched_destroy_user(up);
274 key_put(up->uid_keyring);
275 key_put(up->session_keyring);
276 kmem_cache_free(uid_cachep, up);
277}
278
279#endif /* CONFIG_FAIR_USER_SCHED */
280
116/* 281/*
117 * Locate the user_struct for the passed UID. If found, take a ref on it. The 282 * Locate the user_struct for the passed UID. If found, take a ref on it. The
118 * caller must undo that ref with free_uid(). 283 * caller must undo that ref with free_uid().
@@ -139,16 +304,10 @@ void free_uid(struct user_struct *up)
139 return; 304 return;
140 305
141 local_irq_save(flags); 306 local_irq_save(flags);
142 if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { 307 if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
143 uid_hash_remove(up); 308 free_user(up, flags);
144 spin_unlock_irqrestore(&uidhash_lock, flags); 309 else
145 sched_destroy_user(up);
146 key_put(up->uid_keyring);
147 key_put(up->session_keyring);
148 kmem_cache_free(uid_cachep, up);
149 } else {
150 local_irq_restore(flags); 310 local_irq_restore(flags);
151 }
152} 311}
153 312
154struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) 313struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
@@ -156,6 +315,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
156 struct hlist_head *hashent = uidhashentry(ns, uid); 315 struct hlist_head *hashent = uidhashentry(ns, uid);
157 struct user_struct *up; 316 struct user_struct *up;
158 317
318 /* Make uid_hash_find() + user_kobject_create() + uid_hash_insert()
319 * atomic.
320 */
321 uids_mutex_lock();
322
159 spin_lock_irq(&uidhash_lock); 323 spin_lock_irq(&uidhash_lock);
160 up = uid_hash_find(uid, hashent); 324 up = uid_hash_find(uid, hashent);
161 spin_unlock_irq(&uidhash_lock); 325 spin_unlock_irq(&uidhash_lock);
@@ -191,6 +355,15 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
191 return NULL; 355 return NULL;
192 } 356 }
193 357
358 if (user_kobject_create(new)) {
359 sched_destroy_user(new);
360 key_put(new->uid_keyring);
361 key_put(new->session_keyring);
362 kmem_cache_free(uid_cachep, new);
363 uids_mutex_unlock();
364 return NULL;
365 }
366
194 /* 367 /*
195 * Before adding this, check whether we raced 368 * Before adding this, check whether we raced
196 * on adding the same user already.. 369 * on adding the same user already..
@@ -198,7 +371,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
198 spin_lock_irq(&uidhash_lock); 371 spin_lock_irq(&uidhash_lock);
199 up = uid_hash_find(uid, hashent); 372 up = uid_hash_find(uid, hashent);
200 if (up) { 373 if (up) {
201 sched_destroy_user(new); 374 /* This case is not possible when CONFIG_FAIR_USER_SCHED
375 * is defined, since we serialize alloc_uid() using
376 * uids_mutex. Hence no need to call
377 * sched_destroy_user() or remove_user_sysfs_dir().
378 */
202 key_put(new->uid_keyring); 379 key_put(new->uid_keyring);
203 key_put(new->session_keyring); 380 key_put(new->session_keyring);
204 kmem_cache_free(uid_cachep, new); 381 kmem_cache_free(uid_cachep, new);
@@ -209,6 +386,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
209 spin_unlock_irq(&uidhash_lock); 386 spin_unlock_irq(&uidhash_lock);
210 387
211 } 388 }
389
390 uids_mutex_unlock();
391
212 return up; 392 return up;
213} 393}
214 394