diff options
author | Dhaval Giani <dhaval@linux.vnet.ibm.com> | 2007-10-15 11:00:14 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-10-15 11:00:14 -0400 |
commit | 5cb350baf580017da38199625b7365b1763d7180 (patch) | |
tree | 3830339798b1c6f19f1580700ea6ba240fb56ef2 | |
parent | 8ca0e14ffb12c257de591571a9e96102acdb1c64 (diff) |
sched: group scheduling, sysfs tunables
Add tunables in sysfs to modify a user's cpu share.
A directory is created in sysfs for each new user in the system.
/sys/kernel/uids/<uid>/cpu_share
Reading this file returns the cpu shares granted for the user.
Writing into this file modifies the cpu share for the user. Only an
administrator is allowed to modify a user's cpu share.
Ex:
# cd /sys/kernel/uids/
# cat 512/cpu_share
1024
# echo 2048 > 512/cpu_share
# cat 512/cpu_share
2048
#
Signed-off-by: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
Signed-off-by: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | Documentation/sched-design-CFS.txt | 67 | ||||
-rw-r--r-- | include/linux/sched.h | 11 | ||||
-rw-r--r-- | kernel/ksysfs.c | 8 | ||||
-rw-r--r-- | kernel/sched.c | 14 | ||||
-rw-r--r-- | kernel/sched_debug.c | 48 | ||||
-rw-r--r-- | kernel/user.c | 240 |
6 files changed, 309 insertions, 79 deletions
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt index 84901e7c0508..88bcb8767335 100644 --- a/Documentation/sched-design-CFS.txt +++ b/Documentation/sched-design-CFS.txt | |||
@@ -117,3 +117,70 @@ Some implementation details: | |||
117 | iterators of the scheduling modules are used. The balancing code got | 117 | iterators of the scheduling modules are used. The balancing code got |
118 | quite a bit simpler as a result. | 118 | quite a bit simpler as a result. |
119 | 119 | ||
120 | |||
121 | Group scheduler extension to CFS | ||
122 | ================================ | ||
123 | |||
124 | Normally the scheduler operates on individual tasks and strives to provide | ||
125 | fair CPU time to each task. Sometimes, it may be desirable to group tasks | ||
126 | and provide fair CPU time to each such task group. For example, it may | ||
127 | be desirable to first provide fair CPU time to each user on the system | ||
128 | and then to each task belonging to a user. | ||
129 | |||
130 | CONFIG_FAIR_GROUP_SCHED strives to achieve exactly that. It lets | ||
131 | SCHED_NORMAL/BATCH tasks be be grouped and divides CPU time fairly among such | ||
132 | groups. At present, there are two (mutually exclusive) mechanisms to group | ||
133 | tasks for CPU bandwidth control purpose: | ||
134 | |||
135 | - Based on user id (CONFIG_FAIR_USER_SCHED) | ||
136 | In this option, tasks are grouped according to their user id. | ||
137 | - Based on "cgroup" pseudo filesystem (CONFIG_FAIR_CGROUP_SCHED) | ||
138 | This options lets the administrator create arbitrary groups | ||
139 | of tasks, using the "cgroup" pseudo filesystem. See | ||
140 | Documentation/cgroups.txt for more information about this | ||
141 | filesystem. | ||
142 | |||
143 | Only one of these options to group tasks can be chosen and not both. | ||
144 | |||
145 | Group scheduler tunables: | ||
146 | |||
147 | When CONFIG_FAIR_USER_SCHED is defined, a directory is created in sysfs for | ||
148 | each new user and a "cpu_share" file is added in that directory. | ||
149 | |||
150 | # cd /sys/kernel/uids | ||
151 | # cat 512/cpu_share # Display user 512's CPU share | ||
152 | 1024 | ||
153 | # echo 2048 > 512/cpu_share # Modify user 512's CPU share | ||
154 | # cat 512/cpu_share # Display user 512's CPU share | ||
155 | 2048 | ||
156 | # | ||
157 | |||
158 | CPU bandwidth between two users are divided in the ratio of their CPU shares. | ||
159 | For ex: if you would like user "root" to get twice the bandwidth of user | ||
160 | "guest", then set the cpu_share for both the users such that "root"'s | ||
161 | cpu_share is twice "guest"'s cpu_share | ||
162 | |||
163 | |||
164 | When CONFIG_FAIR_CGROUP_SCHED is defined, a "cpu.shares" file is created | ||
165 | for each group created using the pseudo filesystem. See example steps | ||
166 | below to create task groups and modify their CPU share using the "cgroups" | ||
167 | pseudo filesystem | ||
168 | |||
169 | # mkdir /dev/cpuctl | ||
170 | # mount -t cgroup -ocpu none /dev/cpuctl | ||
171 | # cd /dev/cpuctl | ||
172 | |||
173 | # mkdir multimedia # create "multimedia" group of tasks | ||
174 | # mkdir browser # create "browser" group of tasks | ||
175 | |||
176 | # #Configure the multimedia group to receive twice the CPU bandwidth | ||
177 | # #that of browser group | ||
178 | |||
179 | # echo 2048 > multimedia/cpu.shares | ||
180 | # echo 1024 > browser/cpu.shares | ||
181 | |||
182 | # firefox & # Launch firefox and move it to "browser" group | ||
183 | # echo <firefox_pid> > browser/tasks | ||
184 | |||
185 | # #Launch gmplayer (or your favourite movie player) | ||
186 | # echo <movie_player_pid> > multimedia/tasks | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 3cddbfc0c91d..04233c8974d9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -87,6 +87,7 @@ struct sched_param { | |||
87 | #include <linux/timer.h> | 87 | #include <linux/timer.h> |
88 | #include <linux/hrtimer.h> | 88 | #include <linux/hrtimer.h> |
89 | #include <linux/task_io_accounting.h> | 89 | #include <linux/task_io_accounting.h> |
90 | #include <linux/kobject.h> | ||
90 | 91 | ||
91 | #include <asm/processor.h> | 92 | #include <asm/processor.h> |
92 | 93 | ||
@@ -599,9 +600,18 @@ struct user_struct { | |||
599 | 600 | ||
600 | #ifdef CONFIG_FAIR_USER_SCHED | 601 | #ifdef CONFIG_FAIR_USER_SCHED |
601 | struct task_group *tg; | 602 | struct task_group *tg; |
603 | struct kset kset; | ||
604 | struct subsys_attribute user_attr; | ||
605 | struct work_struct work; | ||
602 | #endif | 606 | #endif |
603 | }; | 607 | }; |
604 | 608 | ||
609 | #ifdef CONFIG_FAIR_USER_SCHED | ||
610 | extern int uids_kobject_init(void); | ||
611 | #else | ||
612 | static inline int uids_kobject_init(void) { return 0; } | ||
613 | #endif | ||
614 | |||
605 | extern struct user_struct *find_user(uid_t); | 615 | extern struct user_struct *find_user(uid_t); |
606 | 616 | ||
607 | extern struct user_struct root_user; | 617 | extern struct user_struct root_user; |
@@ -1848,6 +1858,7 @@ extern struct task_group *sched_create_group(void); | |||
1848 | extern void sched_destroy_group(struct task_group *tg); | 1858 | extern void sched_destroy_group(struct task_group *tg); |
1849 | extern void sched_move_task(struct task_struct *tsk); | 1859 | extern void sched_move_task(struct task_struct *tsk); |
1850 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); | 1860 | extern int sched_group_set_shares(struct task_group *tg, unsigned long shares); |
1861 | extern unsigned long sched_group_shares(struct task_group *tg); | ||
1851 | 1862 | ||
1852 | #endif | 1863 | #endif |
1853 | 1864 | ||
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index d0e5c48e18c7..6046939d0804 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/init.h> | 15 | #include <linux/init.h> |
16 | #include <linux/kexec.h> | 16 | #include <linux/kexec.h> |
17 | #include <linux/sched.h> | ||
17 | 18 | ||
18 | #define KERNEL_ATTR_RO(_name) \ | 19 | #define KERNEL_ATTR_RO(_name) \ |
19 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) | 20 | static struct subsys_attribute _name##_attr = __ATTR_RO(_name) |
@@ -116,6 +117,13 @@ static int __init ksysfs_init(void) | |||
116 | ¬es_attr); | 117 | ¬es_attr); |
117 | } | 118 | } |
118 | 119 | ||
120 | /* | ||
121 | * Create "/sys/kernel/uids" directory and corresponding root user's | ||
122 | * directory under it. | ||
123 | */ | ||
124 | if (!error) | ||
125 | error = uids_kobject_init(); | ||
126 | |||
119 | return error; | 127 | return error; |
120 | } | 128 | } |
121 | 129 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index a3c3ec825f42..9ac99896db8f 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -162,6 +162,8 @@ struct task_group { | |||
162 | /* runqueue "owned" by this group on each cpu */ | 162 | /* runqueue "owned" by this group on each cpu */ |
163 | struct cfs_rq **cfs_rq; | 163 | struct cfs_rq **cfs_rq; |
164 | unsigned long shares; | 164 | unsigned long shares; |
165 | /* spinlock to serialize modification to shares */ | ||
166 | spinlock_t lock; | ||
165 | }; | 167 | }; |
166 | 168 | ||
167 | /* Default task group's sched entity on each cpu */ | 169 | /* Default task group's sched entity on each cpu */ |
@@ -6533,6 +6535,7 @@ void __init sched_init(void) | |||
6533 | se->parent = NULL; | 6535 | se->parent = NULL; |
6534 | } | 6536 | } |
6535 | init_task_group.shares = init_task_group_load; | 6537 | init_task_group.shares = init_task_group_load; |
6538 | spin_lock_init(&init_task_group.lock); | ||
6536 | #endif | 6539 | #endif |
6537 | 6540 | ||
6538 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) | 6541 | for (j = 0; j < CPU_LOAD_IDX_MAX; j++) |
@@ -6777,6 +6780,7 @@ struct task_group *sched_create_group(void) | |||
6777 | } | 6780 | } |
6778 | 6781 | ||
6779 | tg->shares = NICE_0_LOAD; | 6782 | tg->shares = NICE_0_LOAD; |
6783 | spin_lock_init(&tg->lock); | ||
6780 | 6784 | ||
6781 | return tg; | 6785 | return tg; |
6782 | 6786 | ||
@@ -6897,8 +6901,9 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
6897 | { | 6901 | { |
6898 | int i; | 6902 | int i; |
6899 | 6903 | ||
6904 | spin_lock(&tg->lock); | ||
6900 | if (tg->shares == shares) | 6905 | if (tg->shares == shares) |
6901 | return 0; | 6906 | goto done; |
6902 | 6907 | ||
6903 | /* return -EINVAL if the new value is not sane */ | 6908 | /* return -EINVAL if the new value is not sane */ |
6904 | 6909 | ||
@@ -6906,7 +6911,14 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares) | |||
6906 | for_each_possible_cpu(i) | 6911 | for_each_possible_cpu(i) |
6907 | set_se_shares(tg->se[i], shares); | 6912 | set_se_shares(tg->se[i], shares); |
6908 | 6913 | ||
6914 | done: | ||
6915 | spin_unlock(&tg->lock); | ||
6909 | return 0; | 6916 | return 0; |
6910 | } | 6917 | } |
6911 | 6918 | ||
6919 | unsigned long sched_group_shares(struct task_group *tg) | ||
6920 | { | ||
6921 | return tg->shares; | ||
6922 | } | ||
6923 | |||
6912 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 6924 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 6f87b31d233c..0aab455a7b41 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -231,45 +231,6 @@ static void sysrq_sched_debug_show(void) | |||
231 | sched_debug_show(NULL, NULL); | 231 | sched_debug_show(NULL, NULL); |
232 | } | 232 | } |
233 | 233 | ||
234 | #ifdef CONFIG_FAIR_USER_SCHED | ||
235 | |||
236 | static DEFINE_MUTEX(root_user_share_mutex); | ||
237 | |||
238 | static int | ||
239 | root_user_share_read_proc(char *page, char **start, off_t off, int count, | ||
240 | int *eof, void *data) | ||
241 | { | ||
242 | return sprintf(page, "%d\n", init_task_group_load); | ||
243 | } | ||
244 | |||
245 | static int | ||
246 | root_user_share_write_proc(struct file *file, const char __user *buffer, | ||
247 | unsigned long count, void *data) | ||
248 | { | ||
249 | unsigned long shares; | ||
250 | char kbuf[sizeof(unsigned long)+1]; | ||
251 | int rc = 0; | ||
252 | |||
253 | if (copy_from_user(kbuf, buffer, sizeof(kbuf))) | ||
254 | return -EFAULT; | ||
255 | |||
256 | shares = simple_strtoul(kbuf, NULL, 0); | ||
257 | |||
258 | if (!shares) | ||
259 | shares = NICE_0_LOAD; | ||
260 | |||
261 | mutex_lock(&root_user_share_mutex); | ||
262 | |||
263 | init_task_group_load = shares; | ||
264 | rc = sched_group_set_shares(&init_task_group, shares); | ||
265 | |||
266 | mutex_unlock(&root_user_share_mutex); | ||
267 | |||
268 | return (rc < 0 ? rc : count); | ||
269 | } | ||
270 | |||
271 | #endif /* CONFIG_FAIR_USER_SCHED */ | ||
272 | |||
273 | static int sched_debug_open(struct inode *inode, struct file *filp) | 234 | static int sched_debug_open(struct inode *inode, struct file *filp) |
274 | { | 235 | { |
275 | return single_open(filp, sched_debug_show, NULL); | 236 | return single_open(filp, sched_debug_show, NULL); |
@@ -292,15 +253,6 @@ static int __init init_sched_debug_procfs(void) | |||
292 | 253 | ||
293 | pe->proc_fops = &sched_debug_fops; | 254 | pe->proc_fops = &sched_debug_fops; |
294 | 255 | ||
295 | #ifdef CONFIG_FAIR_USER_SCHED | ||
296 | pe = create_proc_entry("root_user_cpu_share", 0644, NULL); | ||
297 | if (!pe) | ||
298 | return -ENOMEM; | ||
299 | |||
300 | pe->read_proc = root_user_share_read_proc; | ||
301 | pe->write_proc = root_user_share_write_proc; | ||
302 | #endif | ||
303 | |||
304 | return 0; | 256 | return 0; |
305 | } | 257 | } |
306 | 258 | ||
diff --git a/kernel/user.c b/kernel/user.c index 0c9a7870d08f..74cadea8466f 100644 --- a/kernel/user.c +++ b/kernel/user.c | |||
@@ -55,7 +55,41 @@ struct user_struct root_user = { | |||
55 | #endif | 55 | #endif |
56 | }; | 56 | }; |
57 | 57 | ||
58 | /* | ||
59 | * These routines must be called with the uidhash spinlock held! | ||
60 | */ | ||
61 | static inline void uid_hash_insert(struct user_struct *up, | ||
62 | struct hlist_head *hashent) | ||
63 | { | ||
64 | hlist_add_head(&up->uidhash_node, hashent); | ||
65 | } | ||
66 | |||
67 | static inline void uid_hash_remove(struct user_struct *up) | ||
68 | { | ||
69 | hlist_del_init(&up->uidhash_node); | ||
70 | } | ||
71 | |||
72 | static inline struct user_struct *uid_hash_find(uid_t uid, | ||
73 | struct hlist_head *hashent) | ||
74 | { | ||
75 | struct user_struct *user; | ||
76 | struct hlist_node *h; | ||
77 | |||
78 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | ||
79 | if (user->uid == uid) { | ||
80 | atomic_inc(&user->__count); | ||
81 | return user; | ||
82 | } | ||
83 | } | ||
84 | |||
85 | return NULL; | ||
86 | } | ||
87 | |||
58 | #ifdef CONFIG_FAIR_USER_SCHED | 88 | #ifdef CONFIG_FAIR_USER_SCHED |
89 | |||
90 | static struct kobject uids_kobject; /* represents /sys/kernel/uids directory */ | ||
91 | static DEFINE_MUTEX(uids_mutex); | ||
92 | |||
59 | static void sched_destroy_user(struct user_struct *up) | 93 | static void sched_destroy_user(struct user_struct *up) |
60 | { | 94 | { |
61 | sched_destroy_group(up->tg); | 95 | sched_destroy_group(up->tg); |
@@ -77,42 +111,173 @@ static void sched_switch_user(struct task_struct *p) | |||
77 | sched_move_task(p); | 111 | sched_move_task(p); |
78 | } | 112 | } |
79 | 113 | ||
80 | #else /* CONFIG_FAIR_USER_SCHED */ | 114 | static inline void uids_mutex_lock(void) |
115 | { | ||
116 | mutex_lock(&uids_mutex); | ||
117 | } | ||
81 | 118 | ||
82 | static void sched_destroy_user(struct user_struct *up) { } | 119 | static inline void uids_mutex_unlock(void) |
83 | static int sched_create_user(struct user_struct *up) { return 0; } | 120 | { |
84 | static void sched_switch_user(struct task_struct *p) { } | 121 | mutex_unlock(&uids_mutex); |
122 | } | ||
85 | 123 | ||
86 | #endif /* CONFIG_FAIR_USER_SCHED */ | 124 | /* return cpu shares held by the user */ |
125 | ssize_t cpu_shares_show(struct kset *kset, char *buffer) | ||
126 | { | ||
127 | struct user_struct *up = container_of(kset, struct user_struct, kset); | ||
87 | 128 | ||
88 | /* | 129 | return sprintf(buffer, "%lu\n", sched_group_shares(up->tg)); |
89 | * These routines must be called with the uidhash spinlock held! | 130 | } |
131 | |||
132 | /* modify cpu shares held by the user */ | ||
133 | ssize_t cpu_shares_store(struct kset *kset, const char *buffer, size_t size) | ||
134 | { | ||
135 | struct user_struct *up = container_of(kset, struct user_struct, kset); | ||
136 | unsigned long shares; | ||
137 | int rc; | ||
138 | |||
139 | sscanf(buffer, "%lu", &shares); | ||
140 | |||
141 | rc = sched_group_set_shares(up->tg, shares); | ||
142 | |||
143 | return (rc ? rc : size); | ||
144 | } | ||
145 | |||
146 | static void user_attr_init(struct subsys_attribute *sa, char *name, int mode) | ||
147 | { | ||
148 | sa->attr.name = name; | ||
149 | sa->attr.mode = mode; | ||
150 | sa->show = cpu_shares_show; | ||
151 | sa->store = cpu_shares_store; | ||
152 | } | ||
153 | |||
154 | /* Create "/sys/kernel/uids/<uid>" directory and | ||
155 | * "/sys/kernel/uids/<uid>/cpu_share" file for this user. | ||
90 | */ | 156 | */ |
91 | static inline void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent) | 157 | static int user_kobject_create(struct user_struct *up) |
92 | { | 158 | { |
93 | hlist_add_head(&up->uidhash_node, hashent); | 159 | struct kset *kset = &up->kset; |
160 | struct kobject *kobj = &kset->kobj; | ||
161 | int error; | ||
162 | |||
163 | memset(kset, 0, sizeof(struct kset)); | ||
164 | kobj->parent = &uids_kobject; /* create under /sys/kernel/uids dir */ | ||
165 | kobject_set_name(kobj, "%d", up->uid); | ||
166 | kset_init(kset); | ||
167 | user_attr_init(&up->user_attr, "cpu_share", 0644); | ||
168 | |||
169 | error = kobject_add(kobj); | ||
170 | if (error) | ||
171 | goto done; | ||
172 | |||
173 | error = sysfs_create_file(kobj, &up->user_attr.attr); | ||
174 | if (error) | ||
175 | kobject_del(kobj); | ||
176 | |||
177 | done: | ||
178 | return error; | ||
94 | } | 179 | } |
95 | 180 | ||
96 | static inline void uid_hash_remove(struct user_struct *up) | 181 | /* create these in sysfs filesystem: |
182 | * "/sys/kernel/uids" directory | ||
183 | * "/sys/kernel/uids/0" directory (for root user) | ||
184 | * "/sys/kernel/uids/0/cpu_share" file (for root user) | ||
185 | */ | ||
186 | int __init uids_kobject_init(void) | ||
97 | { | 187 | { |
98 | hlist_del_init(&up->uidhash_node); | 188 | int error; |
189 | |||
190 | /* create under /sys/kernel dir */ | ||
191 | uids_kobject.parent = &kernel_subsys.kobj; | ||
192 | kobject_set_name(&uids_kobject, "uids"); | ||
193 | kobject_init(&uids_kobject); | ||
194 | |||
195 | error = kobject_add(&uids_kobject); | ||
196 | if (!error) | ||
197 | error = user_kobject_create(&root_user); | ||
198 | |||
199 | return error; | ||
99 | } | 200 | } |
100 | 201 | ||
101 | static inline struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) | 202 | /* work function to remove sysfs directory for a user and free up |
203 | * corresponding structures. | ||
204 | */ | ||
205 | static void remove_user_sysfs_dir(struct work_struct *w) | ||
102 | { | 206 | { |
103 | struct user_struct *user; | 207 | struct user_struct *up = container_of(w, struct user_struct, work); |
104 | struct hlist_node *h; | 208 | struct kobject *kobj = &up->kset.kobj; |
209 | unsigned long flags; | ||
210 | int remove_user = 0; | ||
105 | 211 | ||
106 | hlist_for_each_entry(user, h, hashent, uidhash_node) { | 212 | /* Make uid_hash_remove() + sysfs_remove_file() + kobject_del() |
107 | if(user->uid == uid) { | 213 | * atomic. |
108 | atomic_inc(&user->__count); | 214 | */ |
109 | return user; | 215 | uids_mutex_lock(); |
110 | } | 216 | |
217 | local_irq_save(flags); | ||
218 | |||
219 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { | ||
220 | uid_hash_remove(up); | ||
221 | remove_user = 1; | ||
222 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
223 | } else { | ||
224 | local_irq_restore(flags); | ||
111 | } | 225 | } |
112 | 226 | ||
113 | return NULL; | 227 | if (!remove_user) |
228 | goto done; | ||
229 | |||
230 | sysfs_remove_file(kobj, &up->user_attr.attr); | ||
231 | kobject_del(kobj); | ||
232 | |||
233 | sched_destroy_user(up); | ||
234 | key_put(up->uid_keyring); | ||
235 | key_put(up->session_keyring); | ||
236 | kmem_cache_free(uid_cachep, up); | ||
237 | |||
238 | done: | ||
239 | uids_mutex_unlock(); | ||
240 | } | ||
241 | |||
242 | /* IRQs are disabled and uidhash_lock is held upon function entry. | ||
243 | * IRQ state (as stored in flags) is restored and uidhash_lock released | ||
244 | * upon function exit. | ||
245 | */ | ||
246 | static inline void free_user(struct user_struct *up, unsigned long flags) | ||
247 | { | ||
248 | /* restore back the count */ | ||
249 | atomic_inc(&up->__count); | ||
250 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
251 | |||
252 | INIT_WORK(&up->work, remove_user_sysfs_dir); | ||
253 | schedule_work(&up->work); | ||
114 | } | 254 | } |
115 | 255 | ||
256 | #else /* CONFIG_FAIR_USER_SCHED */ | ||
257 | |||
258 | static void sched_destroy_user(struct user_struct *up) { } | ||
259 | static int sched_create_user(struct user_struct *up) { return 0; } | ||
260 | static void sched_switch_user(struct task_struct *p) { } | ||
261 | static inline int user_kobject_create(struct user_struct *up) { return 0; } | ||
262 | static inline void uids_mutex_lock(void) { } | ||
263 | static inline void uids_mutex_unlock(void) { } | ||
264 | |||
265 | /* IRQs are disabled and uidhash_lock is held upon function entry. | ||
266 | * IRQ state (as stored in flags) is restored and uidhash_lock released | ||
267 | * upon function exit. | ||
268 | */ | ||
269 | static inline void free_user(struct user_struct *up, unsigned long flags) | ||
270 | { | ||
271 | uid_hash_remove(up); | ||
272 | spin_unlock_irqrestore(&uidhash_lock, flags); | ||
273 | sched_destroy_user(up); | ||
274 | key_put(up->uid_keyring); | ||
275 | key_put(up->session_keyring); | ||
276 | kmem_cache_free(uid_cachep, up); | ||
277 | } | ||
278 | |||
279 | #endif /* CONFIG_FAIR_USER_SCHED */ | ||
280 | |||
116 | /* | 281 | /* |
117 | * Locate the user_struct for the passed UID. If found, take a ref on it. The | 282 | * Locate the user_struct for the passed UID. If found, take a ref on it. The |
118 | * caller must undo that ref with free_uid(). | 283 | * caller must undo that ref with free_uid(). |
@@ -139,16 +304,10 @@ void free_uid(struct user_struct *up) | |||
139 | return; | 304 | return; |
140 | 305 | ||
141 | local_irq_save(flags); | 306 | local_irq_save(flags); |
142 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) { | 307 | if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) |
143 | uid_hash_remove(up); | 308 | free_user(up, flags); |
144 | spin_unlock_irqrestore(&uidhash_lock, flags); | 309 | else |
145 | sched_destroy_user(up); | ||
146 | key_put(up->uid_keyring); | ||
147 | key_put(up->session_keyring); | ||
148 | kmem_cache_free(uid_cachep, up); | ||
149 | } else { | ||
150 | local_irq_restore(flags); | 310 | local_irq_restore(flags); |
151 | } | ||
152 | } | 311 | } |
153 | 312 | ||
154 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | 313 | struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) |
@@ -156,6 +315,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
156 | struct hlist_head *hashent = uidhashentry(ns, uid); | 315 | struct hlist_head *hashent = uidhashentry(ns, uid); |
157 | struct user_struct *up; | 316 | struct user_struct *up; |
158 | 317 | ||
318 | /* Make uid_hash_find() + user_kobject_create() + uid_hash_insert() | ||
319 | * atomic. | ||
320 | */ | ||
321 | uids_mutex_lock(); | ||
322 | |||
159 | spin_lock_irq(&uidhash_lock); | 323 | spin_lock_irq(&uidhash_lock); |
160 | up = uid_hash_find(uid, hashent); | 324 | up = uid_hash_find(uid, hashent); |
161 | spin_unlock_irq(&uidhash_lock); | 325 | spin_unlock_irq(&uidhash_lock); |
@@ -191,6 +355,15 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
191 | return NULL; | 355 | return NULL; |
192 | } | 356 | } |
193 | 357 | ||
358 | if (user_kobject_create(new)) { | ||
359 | sched_destroy_user(new); | ||
360 | key_put(new->uid_keyring); | ||
361 | key_put(new->session_keyring); | ||
362 | kmem_cache_free(uid_cachep, new); | ||
363 | uids_mutex_unlock(); | ||
364 | return NULL; | ||
365 | } | ||
366 | |||
194 | /* | 367 | /* |
195 | * Before adding this, check whether we raced | 368 | * Before adding this, check whether we raced |
196 | * on adding the same user already.. | 369 | * on adding the same user already.. |
@@ -198,7 +371,11 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
198 | spin_lock_irq(&uidhash_lock); | 371 | spin_lock_irq(&uidhash_lock); |
199 | up = uid_hash_find(uid, hashent); | 372 | up = uid_hash_find(uid, hashent); |
200 | if (up) { | 373 | if (up) { |
201 | sched_destroy_user(new); | 374 | /* This case is not possible when CONFIG_FAIR_USER_SCHED |
375 | * is defined, since we serialize alloc_uid() using | ||
376 | * uids_mutex. Hence no need to call | ||
377 | * sched_destroy_user() or remove_user_sysfs_dir(). | ||
378 | */ | ||
202 | key_put(new->uid_keyring); | 379 | key_put(new->uid_keyring); |
203 | key_put(new->session_keyring); | 380 | key_put(new->session_keyring); |
204 | kmem_cache_free(uid_cachep, new); | 381 | kmem_cache_free(uid_cachep, new); |
@@ -209,6 +386,9 @@ struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) | |||
209 | spin_unlock_irq(&uidhash_lock); | 386 | spin_unlock_irq(&uidhash_lock); |
210 | 387 | ||
211 | } | 388 | } |
389 | |||
390 | uids_mutex_unlock(); | ||
391 | |||
212 | return up; | 392 | return up; |
213 | } | 393 | } |
214 | 394 | ||