aboutsummaryrefslogtreecommitdiffstats
path: root/security/device_cgroup.c
diff options
context:
space:
mode:
authorSerge E. Hallyn <serue@us.ibm.com>2008-04-29 04:00:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-29 11:06:09 -0400
commit08ce5f16ee466ffc5bf243800deeecd77d9eaf50 (patch)
tree8fb921137a677d463f11727dab7e683db426b810 /security/device_cgroup.c
parentd447ea2f30ec60370ddb99a668e5ac12995f043d (diff)
cgroups: implement device whitelist
Implement a cgroup to track and enforce open and mknod restrictions on device files. A device cgroup associates a device access whitelist with each cgroup. A whitelist entry has 4 fields. 'type' is a (all), c (char), or b (block). 'all' means it applies to all types and all major and minor numbers. Major and minor are either an integer or * for all. Access is a composition of r (read), w (write), and m (mknod). The root device cgroup starts with rwm to 'all'. A child devcg gets a copy of the parent. Admins can then remove devices from the whitelist or add new entries. A child cgroup can never receive a device access which is denied its parent. However when a device access is removed from a parent it will not also be removed from the child(ren). An entry is added using devices.allow, and removed using devices.deny. For instance echo 'c 1:3 mr' > /cgroups/1/devices.allow allows cgroup 1 to read and mknod the device usually known as /dev/null. Doing echo a > /cgroups/1/devices.deny will remove the default 'a *:* mrw' entry. CAP_SYS_ADMIN is needed to change permissions or move another task to a new cgroup. A cgroup may not be granted more permissions than the cgroup's parent has. Any task can move itself between cgroups. This won't be sufficient, but we can decide the best way to adequately restrict movement later. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: fix may-be-used-uninitialized warning] Signed-off-by: Serge E. Hallyn <serue@us.ibm.com> Acked-by: James Morris <jmorris@namei.org> Looks-good-to: Pavel Emelyanov <xemul@openvz.org> Cc: Daniel Hokka Zakrisson <daniel@hozac.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Balbir Singh <balbir@in.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'security/device_cgroup.c')
-rw-r--r--security/device_cgroup.c603
1 files changed, 603 insertions, 0 deletions
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
new file mode 100644
index 000000000000..4237b19e8fb3
--- /dev/null
+++ b/security/device_cgroup.c
@@ -0,0 +1,603 @@
1/*
2 * dev_cgroup.c - device cgroup subsystem
3 *
4 * Copyright 2007 IBM Corp
5 */
6
7#include <linux/device_cgroup.h>
8#include <linux/cgroup.h>
9#include <linux/ctype.h>
10#include <linux/list.h>
11#include <linux/uaccess.h>
12
13#define ACC_MKNOD 1
14#define ACC_READ 2
15#define ACC_WRITE 4
16#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)
17
18#define DEV_BLOCK 1
19#define DEV_CHAR 2
20#define DEV_ALL 4 /* this represents all devices */
21
22/*
23 * whitelist locking rules:
24 * cgroup_lock() cannot be taken under dev_cgroup->lock.
25 * dev_cgroup->lock can be taken with or without cgroup_lock().
26 *
27 * modifications always require cgroup_lock
28 * modifications to a list which is visible require the
29 * dev_cgroup->lock *and* cgroup_lock()
30 * walking the list requires dev_cgroup->lock or cgroup_lock().
31 *
32 * reasoning: dev_whitelist_copy() needs to kmalloc, so needs
33 * a mutex, which the cgroup_lock() is. Since modifying
34 * a visible list requires both locks, either lock can be
35 * taken for walking the list.
36 */
37
38struct dev_whitelist_item {
39 u32 major, minor;
40 short type;
41 short access;
42 struct list_head list;
43};
44
45struct dev_cgroup {
46 struct cgroup_subsys_state css;
47 struct list_head whitelist;
48 spinlock_t lock;
49};
50
51static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
52{
53 return container_of(cgroup_subsys_state(cgroup, devices_subsys_id),
54 struct dev_cgroup, css);
55}
56
57struct cgroup_subsys devices_subsys;
58
59static int devcgroup_can_attach(struct cgroup_subsys *ss,
60 struct cgroup *new_cgroup, struct task_struct *task)
61{
62 if (current != task && !capable(CAP_SYS_ADMIN))
63 return -EPERM;
64
65 return 0;
66}
67
68/*
69 * called under cgroup_lock()
70 */
71static int dev_whitelist_copy(struct list_head *dest, struct list_head *orig)
72{
73 struct dev_whitelist_item *wh, *tmp, *new;
74
75 list_for_each_entry(wh, orig, list) {
76 new = kmalloc(sizeof(*wh), GFP_KERNEL);
77 if (!new)
78 goto free_and_exit;
79 new->major = wh->major;
80 new->minor = wh->minor;
81 new->type = wh->type;
82 new->access = wh->access;
83 list_add_tail(&new->list, dest);
84 }
85
86 return 0;
87
88free_and_exit:
89 list_for_each_entry_safe(wh, tmp, dest, list) {
90 list_del(&wh->list);
91 kfree(wh);
92 }
93 return -ENOMEM;
94}
95
96/* Stupid prototype - don't bother combining existing entries */
97/*
98 * called under cgroup_lock()
99 * since the list is visible to other tasks, we need the spinlock also
100 */
101static int dev_whitelist_add(struct dev_cgroup *dev_cgroup,
102 struct dev_whitelist_item *wh)
103{
104 struct dev_whitelist_item *whcopy;
105
106 whcopy = kmalloc(sizeof(*whcopy), GFP_KERNEL);
107 if (!whcopy)
108 return -ENOMEM;
109
110 memcpy(whcopy, wh, sizeof(*whcopy));
111 spin_lock(&dev_cgroup->lock);
112 list_add_tail(&whcopy->list, &dev_cgroup->whitelist);
113 spin_unlock(&dev_cgroup->lock);
114 return 0;
115}
116
117/*
118 * called under cgroup_lock()
119 * since the list is visible to other tasks, we need the spinlock also
120 */
121static void dev_whitelist_rm(struct dev_cgroup *dev_cgroup,
122 struct dev_whitelist_item *wh)
123{
124 struct dev_whitelist_item *walk, *tmp;
125
126 spin_lock(&dev_cgroup->lock);
127 list_for_each_entry_safe(walk, tmp, &dev_cgroup->whitelist, list) {
128 if (walk->type == DEV_ALL)
129 goto remove;
130 if (walk->type != wh->type)
131 continue;
132 if (walk->major != ~0 && walk->major != wh->major)
133 continue;
134 if (walk->minor != ~0 && walk->minor != wh->minor)
135 continue;
136
137remove:
138 walk->access &= ~wh->access;
139 if (!walk->access) {
140 list_del(&walk->list);
141 kfree(walk);
142 }
143 }
144 spin_unlock(&dev_cgroup->lock);
145}
146
147/*
148 * called from kernel/cgroup.c with cgroup_lock() held.
149 */
150static struct cgroup_subsys_state *devcgroup_create(struct cgroup_subsys *ss,
151 struct cgroup *cgroup)
152{
153 struct dev_cgroup *dev_cgroup, *parent_dev_cgroup;
154 struct cgroup *parent_cgroup;
155 int ret;
156
157 dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
158 if (!dev_cgroup)
159 return ERR_PTR(-ENOMEM);
160 INIT_LIST_HEAD(&dev_cgroup->whitelist);
161 parent_cgroup = cgroup->parent;
162
163 if (parent_cgroup == NULL) {
164 struct dev_whitelist_item *wh;
165 wh = kmalloc(sizeof(*wh), GFP_KERNEL);
166 if (!wh) {
167 kfree(dev_cgroup);
168 return ERR_PTR(-ENOMEM);
169 }
170 wh->minor = wh->major = ~0;
171 wh->type = DEV_ALL;
172 wh->access = ACC_MKNOD | ACC_READ | ACC_WRITE;
173 list_add(&wh->list, &dev_cgroup->whitelist);
174 } else {
175 parent_dev_cgroup = cgroup_to_devcgroup(parent_cgroup);
176 ret = dev_whitelist_copy(&dev_cgroup->whitelist,
177 &parent_dev_cgroup->whitelist);
178 if (ret) {
179 kfree(dev_cgroup);
180 return ERR_PTR(ret);
181 }
182 }
183
184 spin_lock_init(&dev_cgroup->lock);
185 return &dev_cgroup->css;
186}
187
188static void devcgroup_destroy(struct cgroup_subsys *ss,
189 struct cgroup *cgroup)
190{
191 struct dev_cgroup *dev_cgroup;
192 struct dev_whitelist_item *wh, *tmp;
193
194 dev_cgroup = cgroup_to_devcgroup(cgroup);
195 list_for_each_entry_safe(wh, tmp, &dev_cgroup->whitelist, list) {
196 list_del(&wh->list);
197 kfree(wh);
198 }
199 kfree(dev_cgroup);
200}
201
202#define DEVCG_ALLOW 1
203#define DEVCG_DENY 2
204
205static void set_access(char *acc, short access)
206{
207 int idx = 0;
208 memset(acc, 0, 4);
209 if (access & ACC_READ)
210 acc[idx++] = 'r';
211 if (access & ACC_WRITE)
212 acc[idx++] = 'w';
213 if (access & ACC_MKNOD)
214 acc[idx++] = 'm';
215}
216
217static char type_to_char(short type)
218{
219 if (type == DEV_ALL)
220 return 'a';
221 if (type == DEV_CHAR)
222 return 'c';
223 if (type == DEV_BLOCK)
224 return 'b';
225 return 'X';
226}
227
228static void set_majmin(char *str, int len, unsigned m)
229{
230 memset(str, 0, len);
231 if (m == ~0)
232 sprintf(str, "*");
233 else
234 snprintf(str, len, "%d", m);
235}
236
237static char *print_whitelist(struct dev_cgroup *devcgroup, int *len)
238{
239 char *buf, *s, acc[4];
240 struct dev_whitelist_item *wh;
241 int ret;
242 int count = 0;
243 char maj[10], min[10];
244
245 buf = kmalloc(4096, GFP_KERNEL);
246 if (!buf)
247 return ERR_PTR(-ENOMEM);
248 s = buf;
249 *s = '\0';
250 *len = 0;
251
252 spin_lock(&devcgroup->lock);
253 list_for_each_entry(wh, &devcgroup->whitelist, list) {
254 set_access(acc, wh->access);
255 set_majmin(maj, 10, wh->major);
256 set_majmin(min, 10, wh->minor);
257 ret = snprintf(s, 4095-(s-buf), "%c %s:%s %s\n",
258 type_to_char(wh->type), maj, min, acc);
259 if (s+ret >= buf+4095) {
260 kfree(buf);
261 buf = ERR_PTR(-ENOMEM);
262 break;
263 }
264 s += ret;
265 *len += ret;
266 count++;
267 }
268 spin_unlock(&devcgroup->lock);
269
270 return buf;
271}
272
273static ssize_t devcgroup_access_read(struct cgroup *cgroup,
274 struct cftype *cft, struct file *file,
275 char __user *userbuf, size_t nbytes, loff_t *ppos)
276{
277 struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
278 int filetype = cft->private;
279 char *buffer;
280 int uninitialized_var(len);
281 int retval;
282
283 if (filetype != DEVCG_ALLOW)
284 return -EINVAL;
285 buffer = print_whitelist(devcgroup, &len);
286 if (IS_ERR(buffer))
287 return PTR_ERR(buffer);
288
289 retval = simple_read_from_buffer(userbuf, nbytes, ppos, buffer, len);
290 kfree(buffer);
291 return retval;
292}
293
294/*
295 * may_access_whitelist:
296 * does the access granted to dev_cgroup c contain the access
297 * requested in whitelist item refwh.
298 * return 1 if yes, 0 if no.
299 * call with c->lock held
300 */
301static int may_access_whitelist(struct dev_cgroup *c,
302 struct dev_whitelist_item *refwh)
303{
304 struct dev_whitelist_item *whitem;
305
306 list_for_each_entry(whitem, &c->whitelist, list) {
307 if (whitem->type & DEV_ALL)
308 return 1;
309 if ((refwh->type & DEV_BLOCK) && !(whitem->type & DEV_BLOCK))
310 continue;
311 if ((refwh->type & DEV_CHAR) && !(whitem->type & DEV_CHAR))
312 continue;
313 if (whitem->major != ~0 && whitem->major != refwh->major)
314 continue;
315 if (whitem->minor != ~0 && whitem->minor != refwh->minor)
316 continue;
317 if (refwh->access & (~(whitem->access | ACC_MASK)))
318 continue;
319 return 1;
320 }
321 return 0;
322}
323
324/*
325 * parent_has_perm:
326 * when adding a new allow rule to a device whitelist, the rule
327 * must be allowed in the parent device
328 */
329static int parent_has_perm(struct cgroup *childcg,
330 struct dev_whitelist_item *wh)
331{
332 struct cgroup *pcg = childcg->parent;
333 struct dev_cgroup *parent;
334 int ret;
335
336 if (!pcg)
337 return 1;
338 parent = cgroup_to_devcgroup(pcg);
339 spin_lock(&parent->lock);
340 ret = may_access_whitelist(parent, wh);
341 spin_unlock(&parent->lock);
342 return ret;
343}
344
345/*
346 * Modify the whitelist using allow/deny rules.
347 * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD
348 * so we can give a container CAP_MKNOD to let it create devices but not
349 * modify the whitelist.
350 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
351 * us to also grant CAP_SYS_ADMIN to containers without giving away the
352 * device whitelist controls, but for now we'll stick with CAP_SYS_ADMIN
353 *
354 * Taking rules away is always allowed (given CAP_SYS_ADMIN). Granting
355 * new access is only allowed if you're in the top-level cgroup, or your
356 * parent cgroup has the access you're asking for.
357 */
358static ssize_t devcgroup_access_write(struct cgroup *cgroup, struct cftype *cft,
359 struct file *file, const char __user *userbuf,
360 size_t nbytes, loff_t *ppos)
361{
362 struct cgroup *cur_cgroup;
363 struct dev_cgroup *devcgroup, *cur_devcgroup;
364 int filetype = cft->private;
365 char *buffer, *b;
366 int retval = 0, count;
367 struct dev_whitelist_item wh;
368
369 if (!capable(CAP_SYS_ADMIN))
370 return -EPERM;
371
372 devcgroup = cgroup_to_devcgroup(cgroup);
373 cur_cgroup = task_cgroup(current, devices_subsys.subsys_id);
374 cur_devcgroup = cgroup_to_devcgroup(cur_cgroup);
375
376 buffer = kmalloc(nbytes+1, GFP_KERNEL);
377 if (!buffer)
378 return -ENOMEM;
379
380 if (copy_from_user(buffer, userbuf, nbytes)) {
381 retval = -EFAULT;
382 goto out1;
383 }
384 buffer[nbytes] = 0; /* nul-terminate */
385
386 cgroup_lock();
387 if (cgroup_is_removed(cgroup)) {
388 retval = -ENODEV;
389 goto out2;
390 }
391
392 memset(&wh, 0, sizeof(wh));
393 b = buffer;
394
395 switch (*b) {
396 case 'a':
397 wh.type = DEV_ALL;
398 wh.access = ACC_MASK;
399 goto handle;
400 case 'b':
401 wh.type = DEV_BLOCK;
402 break;
403 case 'c':
404 wh.type = DEV_CHAR;
405 break;
406 default:
407 retval = -EINVAL;
408 goto out2;
409 }
410 b++;
411 if (!isspace(*b)) {
412 retval = -EINVAL;
413 goto out2;
414 }
415 b++;
416 if (*b == '*') {
417 wh.major = ~0;
418 b++;
419 } else if (isdigit(*b)) {
420 wh.major = 0;
421 while (isdigit(*b)) {
422 wh.major = wh.major*10+(*b-'0');
423 b++;
424 }
425 } else {
426 retval = -EINVAL;
427 goto out2;
428 }
429 if (*b != ':') {
430 retval = -EINVAL;
431 goto out2;
432 }
433 b++;
434
435 /* read minor */
436 if (*b == '*') {
437 wh.minor = ~0;
438 b++;
439 } else if (isdigit(*b)) {
440 wh.minor = 0;
441 while (isdigit(*b)) {
442 wh.minor = wh.minor*10+(*b-'0');
443 b++;
444 }
445 } else {
446 retval = -EINVAL;
447 goto out2;
448 }
449 if (!isspace(*b)) {
450 retval = -EINVAL;
451 goto out2;
452 }
453 for (b++, count = 0; count < 3; count++, b++) {
454 switch (*b) {
455 case 'r':
456 wh.access |= ACC_READ;
457 break;
458 case 'w':
459 wh.access |= ACC_WRITE;
460 break;
461 case 'm':
462 wh.access |= ACC_MKNOD;
463 break;
464 case '\n':
465 case '\0':
466 count = 3;
467 break;
468 default:
469 retval = -EINVAL;
470 goto out2;
471 }
472 }
473
474handle:
475 retval = 0;
476 switch (filetype) {
477 case DEVCG_ALLOW:
478 if (!parent_has_perm(cgroup, &wh))
479 retval = -EPERM;
480 else
481 retval = dev_whitelist_add(devcgroup, &wh);
482 break;
483 case DEVCG_DENY:
484 dev_whitelist_rm(devcgroup, &wh);
485 break;
486 default:
487 retval = -EINVAL;
488 goto out2;
489 }
490
491 if (retval == 0)
492 retval = nbytes;
493
494out2:
495 cgroup_unlock();
496out1:
497 kfree(buffer);
498 return retval;
499}
500
501static struct cftype dev_cgroup_files[] = {
502 {
503 .name = "allow",
504 .read = devcgroup_access_read,
505 .write = devcgroup_access_write,
506 .private = DEVCG_ALLOW,
507 },
508 {
509 .name = "deny",
510 .write = devcgroup_access_write,
511 .private = DEVCG_DENY,
512 },
513};
514
515static int devcgroup_populate(struct cgroup_subsys *ss,
516 struct cgroup *cgroup)
517{
518 return cgroup_add_files(cgroup, ss, dev_cgroup_files,
519 ARRAY_SIZE(dev_cgroup_files));
520}
521
522struct cgroup_subsys devices_subsys = {
523 .name = "devices",
524 .can_attach = devcgroup_can_attach,
525 .create = devcgroup_create,
526 .destroy = devcgroup_destroy,
527 .populate = devcgroup_populate,
528 .subsys_id = devices_subsys_id,
529};
530
531int devcgroup_inode_permission(struct inode *inode, int mask)
532{
533 struct cgroup *cgroup;
534 struct dev_cgroup *dev_cgroup;
535 struct dev_whitelist_item *wh;
536
537 dev_t device = inode->i_rdev;
538 if (!device)
539 return 0;
540 if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))
541 return 0;
542 cgroup = task_cgroup(current, devices_subsys.subsys_id);
543 dev_cgroup = cgroup_to_devcgroup(cgroup);
544 if (!dev_cgroup)
545 return 0;
546
547 spin_lock(&dev_cgroup->lock);
548 list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
549 if (wh->type & DEV_ALL)
550 goto acc_check;
551 if ((wh->type & DEV_BLOCK) && !S_ISBLK(inode->i_mode))
552 continue;
553 if ((wh->type & DEV_CHAR) && !S_ISCHR(inode->i_mode))
554 continue;
555 if (wh->major != ~0 && wh->major != imajor(inode))
556 continue;
557 if (wh->minor != ~0 && wh->minor != iminor(inode))
558 continue;
559acc_check:
560 if ((mask & MAY_WRITE) && !(wh->access & ACC_WRITE))
561 continue;
562 if ((mask & MAY_READ) && !(wh->access & ACC_READ))
563 continue;
564 spin_unlock(&dev_cgroup->lock);
565 return 0;
566 }
567 spin_unlock(&dev_cgroup->lock);
568
569 return -EPERM;
570}
571
572int devcgroup_inode_mknod(int mode, dev_t dev)
573{
574 struct cgroup *cgroup;
575 struct dev_cgroup *dev_cgroup;
576 struct dev_whitelist_item *wh;
577
578 cgroup = task_cgroup(current, devices_subsys.subsys_id);
579 dev_cgroup = cgroup_to_devcgroup(cgroup);
580 if (!dev_cgroup)
581 return 0;
582
583 spin_lock(&dev_cgroup->lock);
584 list_for_each_entry(wh, &dev_cgroup->whitelist, list) {
585 if (wh->type & DEV_ALL)
586 goto acc_check;
587 if ((wh->type & DEV_BLOCK) && !S_ISBLK(mode))
588 continue;
589 if ((wh->type & DEV_CHAR) && !S_ISCHR(mode))
590 continue;
591 if (wh->major != ~0 && wh->major != MAJOR(dev))
592 continue;
593 if (wh->minor != ~0 && wh->minor != MINOR(dev))
594 continue;
595acc_check:
596 if (!(wh->access & ACC_MKNOD))
597 continue;
598 spin_unlock(&dev_cgroup->lock);
599 return 0;
600 }
601 spin_unlock(&dev_cgroup->lock);
602 return -EPERM;
603}