diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 331 |
1 files changed, 213 insertions, 118 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 6d8de051382b..b9d467d83fc1 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -44,6 +44,7 @@ | |||
44 | #include <linux/kmod.h> | 44 | #include <linux/kmod.h> |
45 | #include <linux/delayacct.h> | 45 | #include <linux/delayacct.h> |
46 | #include <linux/cgroupstats.h> | 46 | #include <linux/cgroupstats.h> |
47 | #include <linux/hash.h> | ||
47 | 48 | ||
48 | #include <asm/atomic.h> | 49 | #include <asm/atomic.h> |
49 | 50 | ||
@@ -118,17 +119,7 @@ static int root_count; | |||
118 | * be called. | 119 | * be called. |
119 | */ | 120 | */ |
120 | static int need_forkexit_callback; | 121 | static int need_forkexit_callback; |
121 | 122 | static int need_mm_owner_callback __read_mostly; | |
122 | /* bits in struct cgroup flags field */ | ||
123 | enum { | ||
124 | /* Control Group is dead */ | ||
125 | CGRP_REMOVED, | ||
126 | /* Control Group has previously had a child cgroup or a task, | ||
127 | * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */ | ||
128 | CGRP_RELEASABLE, | ||
129 | /* Control Group requires release notifications to userspace */ | ||
130 | CGRP_NOTIFY_ON_RELEASE, | ||
131 | }; | ||
132 | 123 | ||
133 | /* convenient tests for these bits */ | 124 | /* convenient tests for these bits */ |
134 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 125 | inline int cgroup_is_removed(const struct cgroup *cgrp) |
@@ -204,6 +195,27 @@ static struct cg_cgroup_link init_css_set_link; | |||
204 | static DEFINE_RWLOCK(css_set_lock); | 195 | static DEFINE_RWLOCK(css_set_lock); |
205 | static int css_set_count; | 196 | static int css_set_count; |
206 | 197 | ||
198 | /* hash table for cgroup groups. This improves the performance to | ||
199 | * find an existing css_set */ | ||
200 | #define CSS_SET_HASH_BITS 7 | ||
201 | #define CSS_SET_TABLE_SIZE (1 << CSS_SET_HASH_BITS) | ||
202 | static struct hlist_head css_set_table[CSS_SET_TABLE_SIZE]; | ||
203 | |||
204 | static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[]) | ||
205 | { | ||
206 | int i; | ||
207 | int index; | ||
208 | unsigned long tmp = 0UL; | ||
209 | |||
210 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | ||
211 | tmp += (unsigned long)css[i]; | ||
212 | tmp = (tmp >> 16) ^ tmp; | ||
213 | |||
214 | index = hash_long(tmp, CSS_SET_HASH_BITS); | ||
215 | |||
216 | return &css_set_table[index]; | ||
217 | } | ||
218 | |||
207 | /* We don't maintain the lists running through each css_set to its | 219 | /* We don't maintain the lists running through each css_set to its |
208 | * task until after the first call to cgroup_iter_start(). This | 220 | * task until after the first call to cgroup_iter_start(). This |
209 | * reduces the fork()/exit() overhead for people who have cgroups | 221 | * reduces the fork()/exit() overhead for people who have cgroups |
@@ -230,7 +242,7 @@ static int use_task_css_set_links; | |||
230 | static void unlink_css_set(struct css_set *cg) | 242 | static void unlink_css_set(struct css_set *cg) |
231 | { | 243 | { |
232 | write_lock(&css_set_lock); | 244 | write_lock(&css_set_lock); |
233 | list_del(&cg->list); | 245 | hlist_del(&cg->hlist); |
234 | css_set_count--; | 246 | css_set_count--; |
235 | while (!list_empty(&cg->cg_links)) { | 247 | while (!list_empty(&cg->cg_links)) { |
236 | struct cg_cgroup_link *link; | 248 | struct cg_cgroup_link *link; |
@@ -295,9 +307,7 @@ static inline void put_css_set_taskexit(struct css_set *cg) | |||
295 | /* | 307 | /* |
296 | * find_existing_css_set() is a helper for | 308 | * find_existing_css_set() is a helper for |
297 | * find_css_set(), and checks to see whether an existing | 309 | * find_css_set(), and checks to see whether an existing |
298 | * css_set is suitable. This currently walks a linked-list for | 310 | * css_set is suitable. |
299 | * simplicity; a later patch will use a hash table for better | ||
300 | * performance | ||
301 | * | 311 | * |
302 | * oldcg: the cgroup group that we're using before the cgroup | 312 | * oldcg: the cgroup group that we're using before the cgroup |
303 | * transition | 313 | * transition |
@@ -314,7 +324,9 @@ static struct css_set *find_existing_css_set( | |||
314 | { | 324 | { |
315 | int i; | 325 | int i; |
316 | struct cgroupfs_root *root = cgrp->root; | 326 | struct cgroupfs_root *root = cgrp->root; |
317 | struct list_head *l = &init_css_set.list; | 327 | struct hlist_head *hhead; |
328 | struct hlist_node *node; | ||
329 | struct css_set *cg; | ||
318 | 330 | ||
319 | /* Built the set of subsystem state objects that we want to | 331 | /* Built the set of subsystem state objects that we want to |
320 | * see in the new css_set */ | 332 | * see in the new css_set */ |
@@ -331,18 +343,13 @@ static struct css_set *find_existing_css_set( | |||
331 | } | 343 | } |
332 | } | 344 | } |
333 | 345 | ||
334 | /* Look through existing cgroup groups to find one to reuse */ | 346 | hhead = css_set_hash(template); |
335 | do { | 347 | hlist_for_each_entry(cg, node, hhead, hlist) { |
336 | struct css_set *cg = | ||
337 | list_entry(l, struct css_set, list); | ||
338 | |||
339 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 348 | if (!memcmp(template, cg->subsys, sizeof(cg->subsys))) { |
340 | /* All subsystems matched */ | 349 | /* All subsystems matched */ |
341 | return cg; | 350 | return cg; |
342 | } | 351 | } |
343 | /* Try the next cgroup group */ | 352 | } |
344 | l = l->next; | ||
345 | } while (l != &init_css_set.list); | ||
346 | 353 | ||
347 | /* No existing cgroup group matched */ | 354 | /* No existing cgroup group matched */ |
348 | return NULL; | 355 | return NULL; |
@@ -404,6 +411,8 @@ static struct css_set *find_css_set( | |||
404 | struct list_head tmp_cg_links; | 411 | struct list_head tmp_cg_links; |
405 | struct cg_cgroup_link *link; | 412 | struct cg_cgroup_link *link; |
406 | 413 | ||
414 | struct hlist_head *hhead; | ||
415 | |||
407 | /* First see if we already have a cgroup group that matches | 416 | /* First see if we already have a cgroup group that matches |
408 | * the desired set */ | 417 | * the desired set */ |
409 | write_lock(&css_set_lock); | 418 | write_lock(&css_set_lock); |
@@ -428,6 +437,7 @@ static struct css_set *find_css_set( | |||
428 | kref_init(&res->ref); | 437 | kref_init(&res->ref); |
429 | INIT_LIST_HEAD(&res->cg_links); | 438 | INIT_LIST_HEAD(&res->cg_links); |
430 | INIT_LIST_HEAD(&res->tasks); | 439 | INIT_LIST_HEAD(&res->tasks); |
440 | INIT_HLIST_NODE(&res->hlist); | ||
431 | 441 | ||
432 | /* Copy the set of subsystem state objects generated in | 442 | /* Copy the set of subsystem state objects generated in |
433 | * find_existing_css_set() */ | 443 | * find_existing_css_set() */ |
@@ -467,9 +477,12 @@ static struct css_set *find_css_set( | |||
467 | 477 | ||
468 | BUG_ON(!list_empty(&tmp_cg_links)); | 478 | BUG_ON(!list_empty(&tmp_cg_links)); |
469 | 479 | ||
470 | /* Link this cgroup group into the list */ | ||
471 | list_add(&res->list, &init_css_set.list); | ||
472 | css_set_count++; | 480 | css_set_count++; |
481 | |||
482 | /* Add this cgroup group to the hash table */ | ||
483 | hhead = css_set_hash(res->subsys); | ||
484 | hlist_add_head(&res->hlist, hhead); | ||
485 | |||
473 | write_unlock(&css_set_lock); | 486 | write_unlock(&css_set_lock); |
474 | 487 | ||
475 | return res; | 488 | return res; |
@@ -948,7 +961,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
948 | int ret = 0; | 961 | int ret = 0; |
949 | struct super_block *sb; | 962 | struct super_block *sb; |
950 | struct cgroupfs_root *root; | 963 | struct cgroupfs_root *root; |
951 | struct list_head tmp_cg_links, *l; | 964 | struct list_head tmp_cg_links; |
952 | INIT_LIST_HEAD(&tmp_cg_links); | 965 | INIT_LIST_HEAD(&tmp_cg_links); |
953 | 966 | ||
954 | /* First find the desired set of subsystems */ | 967 | /* First find the desired set of subsystems */ |
@@ -990,6 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
990 | /* New superblock */ | 1003 | /* New superblock */ |
991 | struct cgroup *cgrp = &root->top_cgroup; | 1004 | struct cgroup *cgrp = &root->top_cgroup; |
992 | struct inode *inode; | 1005 | struct inode *inode; |
1006 | int i; | ||
993 | 1007 | ||
994 | BUG_ON(sb->s_root != NULL); | 1008 | BUG_ON(sb->s_root != NULL); |
995 | 1009 | ||
@@ -1034,22 +1048,25 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1034 | /* Link the top cgroup in this hierarchy into all | 1048 | /* Link the top cgroup in this hierarchy into all |
1035 | * the css_set objects */ | 1049 | * the css_set objects */ |
1036 | write_lock(&css_set_lock); | 1050 | write_lock(&css_set_lock); |
1037 | l = &init_css_set.list; | 1051 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) { |
1038 | do { | 1052 | struct hlist_head *hhead = &css_set_table[i]; |
1053 | struct hlist_node *node; | ||
1039 | struct css_set *cg; | 1054 | struct css_set *cg; |
1040 | struct cg_cgroup_link *link; | 1055 | |
1041 | cg = list_entry(l, struct css_set, list); | 1056 | hlist_for_each_entry(cg, node, hhead, hlist) { |
1042 | BUG_ON(list_empty(&tmp_cg_links)); | 1057 | struct cg_cgroup_link *link; |
1043 | link = list_entry(tmp_cg_links.next, | 1058 | |
1044 | struct cg_cgroup_link, | 1059 | BUG_ON(list_empty(&tmp_cg_links)); |
1045 | cgrp_link_list); | 1060 | link = list_entry(tmp_cg_links.next, |
1046 | list_del(&link->cgrp_link_list); | 1061 | struct cg_cgroup_link, |
1047 | link->cg = cg; | 1062 | cgrp_link_list); |
1048 | list_add(&link->cgrp_link_list, | 1063 | list_del(&link->cgrp_link_list); |
1049 | &root->top_cgroup.css_sets); | 1064 | link->cg = cg; |
1050 | list_add(&link->cg_link_list, &cg->cg_links); | 1065 | list_add(&link->cgrp_link_list, |
1051 | l = l->next; | 1066 | &root->top_cgroup.css_sets); |
1052 | } while (l != &init_css_set.list); | 1067 | list_add(&link->cg_link_list, &cg->cg_links); |
1068 | } | ||
1069 | } | ||
1053 | write_unlock(&css_set_lock); | 1070 | write_unlock(&css_set_lock); |
1054 | 1071 | ||
1055 | free_cg_links(&tmp_cg_links); | 1072 | free_cg_links(&tmp_cg_links); |
@@ -1307,18 +1324,16 @@ enum cgroup_filetype { | |||
1307 | FILE_DIR, | 1324 | FILE_DIR, |
1308 | FILE_TASKLIST, | 1325 | FILE_TASKLIST, |
1309 | FILE_NOTIFY_ON_RELEASE, | 1326 | FILE_NOTIFY_ON_RELEASE, |
1310 | FILE_RELEASABLE, | ||
1311 | FILE_RELEASE_AGENT, | 1327 | FILE_RELEASE_AGENT, |
1312 | }; | 1328 | }; |
1313 | 1329 | ||
1314 | static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | 1330 | static ssize_t cgroup_write_X64(struct cgroup *cgrp, struct cftype *cft, |
1315 | struct file *file, | 1331 | struct file *file, |
1316 | const char __user *userbuf, | 1332 | const char __user *userbuf, |
1317 | size_t nbytes, loff_t *unused_ppos) | 1333 | size_t nbytes, loff_t *unused_ppos) |
1318 | { | 1334 | { |
1319 | char buffer[64]; | 1335 | char buffer[64]; |
1320 | int retval = 0; | 1336 | int retval = 0; |
1321 | u64 val; | ||
1322 | char *end; | 1337 | char *end; |
1323 | 1338 | ||
1324 | if (!nbytes) | 1339 | if (!nbytes) |
@@ -1329,16 +1344,18 @@ static ssize_t cgroup_write_uint(struct cgroup *cgrp, struct cftype *cft, | |||
1329 | return -EFAULT; | 1344 | return -EFAULT; |
1330 | 1345 | ||
1331 | buffer[nbytes] = 0; /* nul-terminate */ | 1346 | buffer[nbytes] = 0; /* nul-terminate */ |
1332 | 1347 | strstrip(buffer); | |
1333 | /* strip newline if necessary */ | 1348 | if (cft->write_u64) { |
1334 | if (nbytes && (buffer[nbytes-1] == '\n')) | 1349 | u64 val = simple_strtoull(buffer, &end, 0); |
1335 | buffer[nbytes-1] = 0; | 1350 | if (*end) |
1336 | val = simple_strtoull(buffer, &end, 0); | 1351 | return -EINVAL; |
1337 | if (*end) | 1352 | retval = cft->write_u64(cgrp, cft, val); |
1338 | return -EINVAL; | 1353 | } else { |
1339 | 1354 | s64 val = simple_strtoll(buffer, &end, 0); | |
1340 | /* Pass to subsystem */ | 1355 | if (*end) |
1341 | retval = cft->write_uint(cgrp, cft, val); | 1356 | return -EINVAL; |
1357 | retval = cft->write_s64(cgrp, cft, val); | ||
1358 | } | ||
1342 | if (!retval) | 1359 | if (!retval) |
1343 | retval = nbytes; | 1360 | retval = nbytes; |
1344 | return retval; | 1361 | return retval; |
@@ -1419,23 +1436,39 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1419 | return -ENODEV; | 1436 | return -ENODEV; |
1420 | if (cft->write) | 1437 | if (cft->write) |
1421 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1438 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
1422 | if (cft->write_uint) | 1439 | if (cft->write_u64 || cft->write_s64) |
1423 | return cgroup_write_uint(cgrp, cft, file, buf, nbytes, ppos); | 1440 | return cgroup_write_X64(cgrp, cft, file, buf, nbytes, ppos); |
1441 | if (cft->trigger) { | ||
1442 | int ret = cft->trigger(cgrp, (unsigned int)cft->private); | ||
1443 | return ret ? ret : nbytes; | ||
1444 | } | ||
1424 | return -EINVAL; | 1445 | return -EINVAL; |
1425 | } | 1446 | } |
1426 | 1447 | ||
1427 | static ssize_t cgroup_read_uint(struct cgroup *cgrp, struct cftype *cft, | 1448 | static ssize_t cgroup_read_u64(struct cgroup *cgrp, struct cftype *cft, |
1428 | struct file *file, | 1449 | struct file *file, |
1429 | char __user *buf, size_t nbytes, | 1450 | char __user *buf, size_t nbytes, |
1430 | loff_t *ppos) | 1451 | loff_t *ppos) |
1431 | { | 1452 | { |
1432 | char tmp[64]; | 1453 | char tmp[64]; |
1433 | u64 val = cft->read_uint(cgrp, cft); | 1454 | u64 val = cft->read_u64(cgrp, cft); |
1434 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); | 1455 | int len = sprintf(tmp, "%llu\n", (unsigned long long) val); |
1435 | 1456 | ||
1436 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | 1457 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); |
1437 | } | 1458 | } |
1438 | 1459 | ||
1460 | static ssize_t cgroup_read_s64(struct cgroup *cgrp, struct cftype *cft, | ||
1461 | struct file *file, | ||
1462 | char __user *buf, size_t nbytes, | ||
1463 | loff_t *ppos) | ||
1464 | { | ||
1465 | char tmp[64]; | ||
1466 | s64 val = cft->read_s64(cgrp, cft); | ||
1467 | int len = sprintf(tmp, "%lld\n", (long long) val); | ||
1468 | |||
1469 | return simple_read_from_buffer(buf, nbytes, ppos, tmp, len); | ||
1470 | } | ||
1471 | |||
1439 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, | 1472 | static ssize_t cgroup_common_file_read(struct cgroup *cgrp, |
1440 | struct cftype *cft, | 1473 | struct cftype *cft, |
1441 | struct file *file, | 1474 | struct file *file, |
@@ -1490,11 +1523,56 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1490 | 1523 | ||
1491 | if (cft->read) | 1524 | if (cft->read) |
1492 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); | 1525 | return cft->read(cgrp, cft, file, buf, nbytes, ppos); |
1493 | if (cft->read_uint) | 1526 | if (cft->read_u64) |
1494 | return cgroup_read_uint(cgrp, cft, file, buf, nbytes, ppos); | 1527 | return cgroup_read_u64(cgrp, cft, file, buf, nbytes, ppos); |
1528 | if (cft->read_s64) | ||
1529 | return cgroup_read_s64(cgrp, cft, file, buf, nbytes, ppos); | ||
1495 | return -EINVAL; | 1530 | return -EINVAL; |
1496 | } | 1531 | } |
1497 | 1532 | ||
1533 | /* | ||
1534 | * seqfile ops/methods for returning structured data. Currently just | ||
1535 | * supports string->u64 maps, but can be extended in future. | ||
1536 | */ | ||
1537 | |||
1538 | struct cgroup_seqfile_state { | ||
1539 | struct cftype *cft; | ||
1540 | struct cgroup *cgroup; | ||
1541 | }; | ||
1542 | |||
1543 | static int cgroup_map_add(struct cgroup_map_cb *cb, const char *key, u64 value) | ||
1544 | { | ||
1545 | struct seq_file *sf = cb->state; | ||
1546 | return seq_printf(sf, "%s %llu\n", key, (unsigned long long)value); | ||
1547 | } | ||
1548 | |||
1549 | static int cgroup_seqfile_show(struct seq_file *m, void *arg) | ||
1550 | { | ||
1551 | struct cgroup_seqfile_state *state = m->private; | ||
1552 | struct cftype *cft = state->cft; | ||
1553 | if (cft->read_map) { | ||
1554 | struct cgroup_map_cb cb = { | ||
1555 | .fill = cgroup_map_add, | ||
1556 | .state = m, | ||
1557 | }; | ||
1558 | return cft->read_map(state->cgroup, cft, &cb); | ||
1559 | } | ||
1560 | return cft->read_seq_string(state->cgroup, cft, m); | ||
1561 | } | ||
1562 | |||
1563 | int cgroup_seqfile_release(struct inode *inode, struct file *file) | ||
1564 | { | ||
1565 | struct seq_file *seq = file->private_data; | ||
1566 | kfree(seq->private); | ||
1567 | return single_release(inode, file); | ||
1568 | } | ||
1569 | |||
1570 | static struct file_operations cgroup_seqfile_operations = { | ||
1571 | .read = seq_read, | ||
1572 | .llseek = seq_lseek, | ||
1573 | .release = cgroup_seqfile_release, | ||
1574 | }; | ||
1575 | |||
1498 | static int cgroup_file_open(struct inode *inode, struct file *file) | 1576 | static int cgroup_file_open(struct inode *inode, struct file *file) |
1499 | { | 1577 | { |
1500 | int err; | 1578 | int err; |
@@ -1507,7 +1585,18 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1507 | cft = __d_cft(file->f_dentry); | 1585 | cft = __d_cft(file->f_dentry); |
1508 | if (!cft) | 1586 | if (!cft) |
1509 | return -ENODEV; | 1587 | return -ENODEV; |
1510 | if (cft->open) | 1588 | if (cft->read_map || cft->read_seq_string) { |
1589 | struct cgroup_seqfile_state *state = | ||
1590 | kzalloc(sizeof(*state), GFP_USER); | ||
1591 | if (!state) | ||
1592 | return -ENOMEM; | ||
1593 | state->cft = cft; | ||
1594 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | ||
1595 | file->f_op = &cgroup_seqfile_operations; | ||
1596 | err = single_open(file, cgroup_seqfile_show, state); | ||
1597 | if (err < 0) | ||
1598 | kfree(state); | ||
1599 | } else if (cft->open) | ||
1511 | err = cft->open(inode, file); | 1600 | err = cft->open(inode, file); |
1512 | else | 1601 | else |
1513 | err = 0; | 1602 | err = 0; |
@@ -1715,7 +1804,7 @@ static void cgroup_advance_iter(struct cgroup *cgrp, | |||
1715 | * The tasklist_lock is not held here, as do_each_thread() and | 1804 | * The tasklist_lock is not held here, as do_each_thread() and |
1716 | * while_each_thread() are protected by RCU. | 1805 | * while_each_thread() are protected by RCU. |
1717 | */ | 1806 | */ |
1718 | void cgroup_enable_task_cg_lists(void) | 1807 | static void cgroup_enable_task_cg_lists(void) |
1719 | { | 1808 | { |
1720 | struct task_struct *p, *g; | 1809 | struct task_struct *p, *g; |
1721 | write_lock(&css_set_lock); | 1810 | write_lock(&css_set_lock); |
@@ -1913,14 +2002,14 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
1913 | 2002 | ||
1914 | if (heap->size) { | 2003 | if (heap->size) { |
1915 | for (i = 0; i < heap->size; i++) { | 2004 | for (i = 0; i < heap->size; i++) { |
1916 | struct task_struct *p = heap->ptrs[i]; | 2005 | struct task_struct *q = heap->ptrs[i]; |
1917 | if (i == 0) { | 2006 | if (i == 0) { |
1918 | latest_time = p->start_time; | 2007 | latest_time = q->start_time; |
1919 | latest_task = p; | 2008 | latest_task = q; |
1920 | } | 2009 | } |
1921 | /* Process the task per the caller's callback */ | 2010 | /* Process the task per the caller's callback */ |
1922 | scan->process_task(p, scan); | 2011 | scan->process_task(q, scan); |
1923 | put_task_struct(p); | 2012 | put_task_struct(q); |
1924 | } | 2013 | } |
1925 | /* | 2014 | /* |
1926 | * If we had to process any tasks at all, scan again | 2015 | * If we had to process any tasks at all, scan again |
@@ -2138,11 +2227,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup *cgrp, | |||
2138 | return notify_on_release(cgrp); | 2227 | return notify_on_release(cgrp); |
2139 | } | 2228 | } |
2140 | 2229 | ||
2141 | static u64 cgroup_read_releasable(struct cgroup *cgrp, struct cftype *cft) | ||
2142 | { | ||
2143 | return test_bit(CGRP_RELEASABLE, &cgrp->flags); | ||
2144 | } | ||
2145 | |||
2146 | /* | 2230 | /* |
2147 | * for the common functions, 'private' gives the type of file | 2231 | * for the common functions, 'private' gives the type of file |
2148 | */ | 2232 | */ |
@@ -2158,16 +2242,10 @@ static struct cftype files[] = { | |||
2158 | 2242 | ||
2159 | { | 2243 | { |
2160 | .name = "notify_on_release", | 2244 | .name = "notify_on_release", |
2161 | .read_uint = cgroup_read_notify_on_release, | 2245 | .read_u64 = cgroup_read_notify_on_release, |
2162 | .write = cgroup_common_file_write, | 2246 | .write = cgroup_common_file_write, |
2163 | .private = FILE_NOTIFY_ON_RELEASE, | 2247 | .private = FILE_NOTIFY_ON_RELEASE, |
2164 | }, | 2248 | }, |
2165 | |||
2166 | { | ||
2167 | .name = "releasable", | ||
2168 | .read_uint = cgroup_read_releasable, | ||
2169 | .private = FILE_RELEASABLE, | ||
2170 | } | ||
2171 | }; | 2249 | }; |
2172 | 2250 | ||
2173 | static struct cftype cft_release_agent = { | 2251 | static struct cftype cft_release_agent = { |
@@ -2401,10 +2479,9 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2401 | return 0; | 2479 | return 0; |
2402 | } | 2480 | } |
2403 | 2481 | ||
2404 | static void cgroup_init_subsys(struct cgroup_subsys *ss) | 2482 | static void __init cgroup_init_subsys(struct cgroup_subsys *ss) |
2405 | { | 2483 | { |
2406 | struct cgroup_subsys_state *css; | 2484 | struct cgroup_subsys_state *css; |
2407 | struct list_head *l; | ||
2408 | 2485 | ||
2409 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2486 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2410 | 2487 | ||
@@ -2415,34 +2492,19 @@ static void cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2415 | BUG_ON(IS_ERR(css)); | 2492 | BUG_ON(IS_ERR(css)); |
2416 | init_cgroup_css(css, ss, dummytop); | 2493 | init_cgroup_css(css, ss, dummytop); |
2417 | 2494 | ||
2418 | /* Update all cgroup groups to contain a subsys | 2495 | /* Update the init_css_set to contain a subsys |
2419 | * pointer to this state - since the subsystem is | 2496 | * pointer to this state - since the subsystem is |
2420 | * newly registered, all tasks and hence all cgroup | 2497 | * newly registered, all tasks and hence the |
2421 | * groups are in the subsystem's top cgroup. */ | 2498 | * init_css_set is in the subsystem's top cgroup. */ |
2422 | write_lock(&css_set_lock); | 2499 | init_css_set.subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; |
2423 | l = &init_css_set.list; | ||
2424 | do { | ||
2425 | struct css_set *cg = | ||
2426 | list_entry(l, struct css_set, list); | ||
2427 | cg->subsys[ss->subsys_id] = dummytop->subsys[ss->subsys_id]; | ||
2428 | l = l->next; | ||
2429 | } while (l != &init_css_set.list); | ||
2430 | write_unlock(&css_set_lock); | ||
2431 | |||
2432 | /* If this subsystem requested that it be notified with fork | ||
2433 | * events, we should send it one now for every process in the | ||
2434 | * system */ | ||
2435 | if (ss->fork) { | ||
2436 | struct task_struct *g, *p; | ||
2437 | |||
2438 | read_lock(&tasklist_lock); | ||
2439 | do_each_thread(g, p) { | ||
2440 | ss->fork(ss, p); | ||
2441 | } while_each_thread(g, p); | ||
2442 | read_unlock(&tasklist_lock); | ||
2443 | } | ||
2444 | 2500 | ||
2445 | need_forkexit_callback |= ss->fork || ss->exit; | 2501 | need_forkexit_callback |= ss->fork || ss->exit; |
2502 | need_mm_owner_callback |= !!ss->mm_owner_changed; | ||
2503 | |||
2504 | /* At system boot, before all subsystems have been | ||
2505 | * registered, no tasks have been forked, so we don't | ||
2506 | * need to invoke fork callbacks here. */ | ||
2507 | BUG_ON(!list_empty(&init_task.tasks)); | ||
2446 | 2508 | ||
2447 | ss->active = 1; | 2509 | ss->active = 1; |
2448 | } | 2510 | } |
@@ -2458,9 +2520,9 @@ int __init cgroup_init_early(void) | |||
2458 | int i; | 2520 | int i; |
2459 | kref_init(&init_css_set.ref); | 2521 | kref_init(&init_css_set.ref); |
2460 | kref_get(&init_css_set.ref); | 2522 | kref_get(&init_css_set.ref); |
2461 | INIT_LIST_HEAD(&init_css_set.list); | ||
2462 | INIT_LIST_HEAD(&init_css_set.cg_links); | 2523 | INIT_LIST_HEAD(&init_css_set.cg_links); |
2463 | INIT_LIST_HEAD(&init_css_set.tasks); | 2524 | INIT_LIST_HEAD(&init_css_set.tasks); |
2525 | INIT_HLIST_NODE(&init_css_set.hlist); | ||
2464 | css_set_count = 1; | 2526 | css_set_count = 1; |
2465 | init_cgroup_root(&rootnode); | 2527 | init_cgroup_root(&rootnode); |
2466 | list_add(&rootnode.root_list, &roots); | 2528 | list_add(&rootnode.root_list, &roots); |
@@ -2473,6 +2535,9 @@ int __init cgroup_init_early(void) | |||
2473 | list_add(&init_css_set_link.cg_link_list, | 2535 | list_add(&init_css_set_link.cg_link_list, |
2474 | &init_css_set.cg_links); | 2536 | &init_css_set.cg_links); |
2475 | 2537 | ||
2538 | for (i = 0; i < CSS_SET_TABLE_SIZE; i++) | ||
2539 | INIT_HLIST_HEAD(&css_set_table[i]); | ||
2540 | |||
2476 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 2541 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
2477 | struct cgroup_subsys *ss = subsys[i]; | 2542 | struct cgroup_subsys *ss = subsys[i]; |
2478 | 2543 | ||
@@ -2502,7 +2567,7 @@ int __init cgroup_init(void) | |||
2502 | { | 2567 | { |
2503 | int err; | 2568 | int err; |
2504 | int i; | 2569 | int i; |
2505 | struct proc_dir_entry *entry; | 2570 | struct hlist_head *hhead; |
2506 | 2571 | ||
2507 | err = bdi_init(&cgroup_backing_dev_info); | 2572 | err = bdi_init(&cgroup_backing_dev_info); |
2508 | if (err) | 2573 | if (err) |
@@ -2514,13 +2579,15 @@ int __init cgroup_init(void) | |||
2514 | cgroup_init_subsys(ss); | 2579 | cgroup_init_subsys(ss); |
2515 | } | 2580 | } |
2516 | 2581 | ||
2582 | /* Add init_css_set to the hash table */ | ||
2583 | hhead = css_set_hash(init_css_set.subsys); | ||
2584 | hlist_add_head(&init_css_set.hlist, hhead); | ||
2585 | |||
2517 | err = register_filesystem(&cgroup_fs_type); | 2586 | err = register_filesystem(&cgroup_fs_type); |
2518 | if (err < 0) | 2587 | if (err < 0) |
2519 | goto out; | 2588 | goto out; |
2520 | 2589 | ||
2521 | entry = create_proc_entry("cgroups", 0, NULL); | 2590 | proc_create("cgroups", 0, NULL, &proc_cgroupstats_operations); |
2522 | if (entry) | ||
2523 | entry->proc_fops = &proc_cgroupstats_operations; | ||
2524 | 2591 | ||
2525 | out: | 2592 | out: |
2526 | if (err) | 2593 | if (err) |
@@ -2683,6 +2750,34 @@ void cgroup_fork_callbacks(struct task_struct *child) | |||
2683 | } | 2750 | } |
2684 | } | 2751 | } |
2685 | 2752 | ||
2753 | #ifdef CONFIG_MM_OWNER | ||
2754 | /** | ||
2755 | * cgroup_mm_owner_callbacks - run callbacks when the mm->owner changes | ||
2756 | * @p: the new owner | ||
2757 | * | ||
2758 | * Called on every change to mm->owner. mm_init_owner() does not | ||
2759 | * invoke this routine, since it assigns the mm->owner the first time | ||
2760 | * and does not change it. | ||
2761 | */ | ||
2762 | void cgroup_mm_owner_callbacks(struct task_struct *old, struct task_struct *new) | ||
2763 | { | ||
2764 | struct cgroup *oldcgrp, *newcgrp; | ||
2765 | |||
2766 | if (need_mm_owner_callback) { | ||
2767 | int i; | ||
2768 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2769 | struct cgroup_subsys *ss = subsys[i]; | ||
2770 | oldcgrp = task_cgroup(old, ss->subsys_id); | ||
2771 | newcgrp = task_cgroup(new, ss->subsys_id); | ||
2772 | if (oldcgrp == newcgrp) | ||
2773 | continue; | ||
2774 | if (ss->mm_owner_changed) | ||
2775 | ss->mm_owner_changed(ss, oldcgrp, newcgrp); | ||
2776 | } | ||
2777 | } | ||
2778 | } | ||
2779 | #endif /* CONFIG_MM_OWNER */ | ||
2780 | |||
2686 | /** | 2781 | /** |
2687 | * cgroup_post_fork - called on a new task after adding it to the task list | 2782 | * cgroup_post_fork - called on a new task after adding it to the task list |
2688 | * @child: the task in question | 2783 | * @child: the task in question |