diff options
author | Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com> | 2007-10-15 11:00:09 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-10-15 11:00:09 -0400 |
commit | 24e377a83220ef05c9b5bec7e01d65eed6609aa6 (patch) | |
tree | 9303b3d9f91ee39517d379aaac06c0432be8a9b8 | |
parent | 9b5b77512dce239fa168183fa71896712232e95a (diff) |
sched: add fair-user scheduler
Enable user-id based fair group scheduling. This is useful for anyone
who wants to test the group scheduler w/o having to enable
CONFIG_CGROUPS.
A separate scheduling group (i.e struct task_grp) is automatically created for
every new user added to the system. Upon uid change for a task, it is made to
move to the corresponding scheduling group.
A /proc tunable (/proc/root_user_share) is also provided to tune root
user's quota of cpu bandwidth.
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | init/Kconfig | 13 | ||||
-rw-r--r-- | kernel/sched.c | 9 | ||||
-rw-r--r-- | kernel/sched_debug.c | 52 | ||||
-rw-r--r-- | kernel/user.c | 43 |
5 files changed, 121 insertions, 0 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 03c13b663e4b..d0cc58311b13 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -597,6 +597,10 @@ struct user_struct { | |||
597 | /* Hash table maintenance information */ | 597 | /* Hash table maintenance information */ |
598 | struct hlist_node uidhash_node; | 598 | struct hlist_node uidhash_node; |
599 | uid_t uid; | 599 | uid_t uid; |
600 | |||
601 | #ifdef CONFIG_FAIR_USER_SCHED | ||
602 | struct task_grp *tg; | ||
603 | #endif | ||
600 | }; | 604 | }; |
601 | 605 | ||
602 | extern struct user_struct *find_user(uid_t); | 606 | extern struct user_struct *find_user(uid_t); |
diff --git a/init/Kconfig b/init/Kconfig index ef90a154dd90..37711fe3c01c 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -289,6 +289,19 @@ config FAIR_GROUP_SCHED | |||
289 | This feature lets cpu scheduler recognize task groups and control cpu | 289 | This feature lets cpu scheduler recognize task groups and control cpu |
290 | bandwidth allocation to such task groups. | 290 | bandwidth allocation to such task groups. |
291 | 291 | ||
292 | choice | ||
293 | depends on FAIR_GROUP_SCHED | ||
294 | prompt "Basis for grouping tasks" | ||
295 | default FAIR_USER_SCHED | ||
296 | |||
297 | config FAIR_USER_SCHED | ||
298 | bool "user id" | ||
299 | help | ||
300 | This option will choose userid as the basis for grouping | ||
301 | tasks, thus providing equal cpu bandwidth to each user. | ||
302 | |||
303 | endchoice | ||
304 | |||
292 | config SYSFS_DEPRECATED | 305 | config SYSFS_DEPRECATED |
293 | bool "Create deprecated sysfs files" | 306 | bool "Create deprecated sysfs files" |
294 | default y | 307 | default y |
diff --git a/kernel/sched.c b/kernel/sched.c index e10c403b1213..f33608e9e1a2 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -200,7 +200,12 @@ struct task_grp init_task_grp = { | |||
200 | .cfs_rq = init_cfs_rq_p, | 200 | .cfs_rq = init_cfs_rq_p, |
201 | }; | 201 | }; |
202 | 202 | ||
203 | #ifdef CONFIG_FAIR_USER_SCHED | ||
204 | #define INIT_TASK_GRP_LOAD 2*NICE_0_LOAD | ||
205 | #else | ||
203 | #define INIT_TASK_GRP_LOAD NICE_0_LOAD | 206 | #define INIT_TASK_GRP_LOAD NICE_0_LOAD |
207 | #endif | ||
208 | |||
204 | static int init_task_grp_load = INIT_TASK_GRP_LOAD; | 209 | static int init_task_grp_load = INIT_TASK_GRP_LOAD; |
205 | 210 | ||
206 | /* return group to which a task belongs */ | 211 | /* return group to which a task belongs */ |
@@ -208,7 +213,11 @@ static inline struct task_grp *task_grp(struct task_struct *p) | |||
208 | { | 213 | { |
209 | struct task_grp *tg; | 214 | struct task_grp *tg; |
210 | 215 | ||
216 | #ifdef CONFIG_FAIR_USER_SCHED | ||
217 | tg = p->user->tg; | ||
218 | #else | ||
211 | tg = &init_task_grp; | 219 | tg = &init_task_grp; |
220 | #endif | ||
212 | 221 | ||
213 | return tg; | 222 | return tg; |
214 | } | 223 | } |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 3e47e870b043..57ee9d5630a8 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -212,6 +212,49 @@ static void sysrq_sched_debug_show(void) | |||
212 | sched_debug_show(NULL, NULL); | 212 | sched_debug_show(NULL, NULL); |
213 | } | 213 | } |
214 | 214 | ||
215 | #ifdef CONFIG_FAIR_USER_SCHED | ||
216 | |||
217 | static DEFINE_MUTEX(root_user_share_mutex); | ||
218 | |||
219 | static int | ||
220 | root_user_share_read_proc(char *page, char **start, off_t off, int count, | ||
221 | int *eof, void *data) | ||
222 | { | ||
223 | int len; | ||
224 | |||
225 | len = sprintf(page, "%d\n", init_task_grp_load); | ||
226 | |||
227 | return len; | ||
228 | } | ||
229 | |||
230 | static int | ||
231 | root_user_share_write_proc(struct file *file, const char __user *buffer, | ||
232 | unsigned long count, void *data) | ||
233 | { | ||
234 | unsigned long shares; | ||
235 | char kbuf[sizeof(unsigned long)+1]; | ||
236 | int rc = 0; | ||
237 | |||
238 | if (copy_from_user(kbuf, buffer, sizeof(kbuf))) | ||
239 | return -EFAULT; | ||
240 | |||
241 | shares = simple_strtoul(kbuf, NULL, 0); | ||
242 | |||
243 | if (!shares) | ||
244 | shares = NICE_0_LOAD; | ||
245 | |||
246 | mutex_lock(&root_user_share_mutex); | ||
247 | |||
248 | init_task_grp_load = shares; | ||
249 | rc = sched_group_set_shares(&init_task_grp, shares); | ||
250 | |||
251 | mutex_unlock(&root_user_share_mutex); | ||
252 | |||
253 | return (rc < 0 ? rc : count); | ||
254 | } | ||
255 | |||
256 | #endif /* CONFIG_FAIR_USER_SCHED */ | ||
257 | |||
215 | static int sched_debug_open(struct inode *inode, struct file *filp) | 258 | static int sched_debug_open(struct inode *inode, struct file *filp) |
216 | { | 259 | { |
217 | return single_open(filp, sched_debug_show, NULL); | 260 | return single_open(filp, sched_debug_show, NULL); |
@@ -234,6 +277,15 @@ static int __init init_sched_debug_procfs(void) | |||
234 | 277 | ||
235 | pe->proc_fops = &sched_debug_fops; | 278 | pe->proc_fops = &sched_debug_fops; |
236 | 279 | ||
280 | #ifdef CONFIG_FAIR_USER_SCHED | ||
281 | pe = create_proc_entry("root_user_share", 0644, NULL); | ||
282 | if (!pe) | ||
283 | return -ENOMEM; | ||
284 | |||
285 | pe->read_proc = root_user_share_read_proc; | ||
286 | pe->write_proc = root_user_share_write_proc; | ||
287 | #endif | ||
288 | |||
237 | return 0; | 289 | return 0; |
238 | } | 290 | } |
239 | 291 | ||
diff --git a/kernel/user.c b/kernel/user.c index 9ca2848fc356..c6387fac932d 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -50,8 +50,41 @@ struct user_struct root_user = { | |||
50 | .uid_keyring = &root_user_keyring, | 50 | .uid_keyring = &root_user_keyring, |
51 | .session_keyring = &root_session_keyring, | 51 | .session_keyring = &root_session_keyring, |
52 | #endif | 52 | #endif |
53 | #ifdef CONFIG_FAIR_USER_SCHED | ||
54 | .tg = &init_task_grp, | ||
55 | #endif | ||
53 | }; | 56 | }; |
54 | 57 | ||
58 | #ifdef CONFIG_FAIR_USER_SCHED | ||
59 | static void sched_destroy_user(struct user_struct *up) | ||
60 | { | ||
61 | sched_destroy_group(up->tg); | ||
62 | } | ||
63 | |||
64 | static int sched_create_user(struct user_struct *up) | ||
65 | { | ||
66 | int rc = 0; | ||
67 | |||
68 | up->tg = sched_create_group(); | ||
69 | if (IS_ERR(up->tg)) | ||
70 | rc = -ENOMEM; | ||
71 | |||
72 | return rc; | ||
73 | } | ||
74 | |||
75 | static void sched_switch_user(struct task_struct *p) | ||
76 | { | ||
77 | sched_move_task(p); | ||
78 | } | ||
79 | |||
80 | #else /* CONFIG_FAIR_USER_SCHED */ | ||
81 | |||
82 | static void sched_destroy_user(struct user_struct *up) { } | ||
83 | static int sched_create_user(struct user_struct *up) { return 0; } | ||
84 | static void sched_switch_user(struct task_struct *p) { } | ||
85 | |||
86 | #endif /* CONFIG_FAIR_USER_SCHED */ | ||
87 | |||
55 | /* | 88 | /* |
56 | * These routines must be called with the uidhash spinlock held! | 89 | * These routines must be called with the uidhash spinlock held! |
57 | */ | 90 | */ |
@@ -109,6 +142,7 @@ void free_uid(struct user_struct *up) | |||
109 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { | 142 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { |
110 | uid_hash_remove(up); | 143 | uid_hash_remove(up); |
111 | spin_unlock_irqrestore(&uidhash_lock, flags); | 144 | spin_unlock_irqrestore(&uidhash_lock, flags); |
145 | sched_destroy_user(up); | ||
112 | key_put(up->uid_keyring); | 146 | key_put(up->uid_keyring); |
113 | key_put(up->session_keyring); | 147 | key_put(up->session_keyring); |
114 | kmem_cache_free(uid_cachep, up); | 148 | kmem_cache_free(uid_cachep, up); |
@@ -150,6 +184,13 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
150 | return NULL; | 184 | return NULL; |
151 | } | 185 | } |
152 | 186 | ||
187 | if (sched_create_user(new) < 0) { | ||
188 | key_put(new->uid_keyring); | ||
189 | key_put(new->session_keyring); | ||
190 | kmem_cache_free(uid_cachep, new); | ||
191 | return NULL; | ||
192 | } | ||
193 | |||
153 | /* | 194 | /* |
154 | * Before adding this, check whether we raced | 195 | * Before adding this, check whether we raced |
155 | * on adding the same user already.. | 196 | * on adding the same user already.. |
@@ -157,6 +198,7 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
157 | spin_lock_irq(&uidhash_lock); | 198 | spin_lock_irq(&uidhash_lock); |
158 | up = uid_hash_find(uid, hashent); | 199 | up = uid_hash_find(uid, hashent); |
159 | if (up) { | 200 | if (up) { |
201 | sched_destroy_user(new); | ||
160 | key_put(new->uid_keyring); | 202 | key_put(new->uid_keyring); |
161 | key_put(new->session_keyring); | 203 | key_put(new->session_keyring); |
162 | kmem_cache_free(uid_cachep, new); | 204 | kmem_cache_free(uid_cachep, new); |
@@ -184,6 +226,7 @@ void switch_uid(struct user_struct *new_user) | |||
184 | atomic_dec(&old_user->processes); | 226 | atomic_dec(&old_user->processes); |
185 | switch_uid_keyring(new_user); | 227 | switch_uid_keyring(new_user); |
186 | current->user = new_user; | 228 | current->user = new_user; |
229 | sched_switch_user(current); | ||
187 | 230 | ||
188 | /* | 231 | /* |
189 | * We need to synchronize with __sigqueue_alloc() | 232 | * We need to synchronize with __sigqueue_alloc() |