diff options
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r-- | kernel/cgroup.c | 276 |
1 files changed, 178 insertions, 98 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f221446aa02d..c29831076e7a 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -84,7 +84,7 @@ struct cgroupfs_root { | |||
84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ | 84 | /* Tracks how many cgroups are currently defined in hierarchy.*/ |
85 | int number_of_cgroups; | 85 | int number_of_cgroups; |
86 | 86 | ||
87 | /* A list running through the mounted hierarchies */ | 87 | /* A list running through the active hierarchies */ |
88 | struct list_head root_list; | 88 | struct list_head root_list; |
89 | 89 | ||
90 | /* Hierarchy-specific flags */ | 90 | /* Hierarchy-specific flags */ |
@@ -148,8 +148,8 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
148 | #define for_each_subsys(_root, _ss) \ | 148 | #define for_each_subsys(_root, _ss) \ |
149 | list_for_each_entry(_ss, &_root->subsys_list, sibling) | 149 | list_for_each_entry(_ss, &_root->subsys_list, sibling) |
150 | 150 | ||
151 | /* for_each_root() allows you to iterate across the active hierarchies */ | 151 | /* for_each_active_root() allows you to iterate across the active hierarchies */ |
152 | #define for_each_root(_root) \ | 152 | #define for_each_active_root(_root) \ |
153 | list_for_each_entry(_root, &roots, root_list) | 153 | list_for_each_entry(_root, &roots, root_list) |
154 | 154 | ||
155 | /* the list of cgroups eligible for automatic release. Protected by | 155 | /* the list of cgroups eligible for automatic release. Protected by |
@@ -271,7 +271,7 @@ static void __put_css_set(struct css_set *cg, int taskexit) | |||
271 | 271 | ||
272 | rcu_read_lock(); | 272 | rcu_read_lock(); |
273 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 273 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { |
274 | struct cgroup *cgrp = cg->subsys[i]->cgroup; | 274 | struct cgroup *cgrp = rcu_dereference(cg->subsys[i]->cgroup); |
275 | if (atomic_dec_and_test(&cgrp->count) && | 275 | if (atomic_dec_and_test(&cgrp->count) && |
276 | notify_on_release(cgrp)) { | 276 | notify_on_release(cgrp)) { |
277 | if (taskexit) | 277 | if (taskexit) |
@@ -384,6 +384,25 @@ static int allocate_cg_links(int count, struct list_head *tmp) | |||
384 | return 0; | 384 | return 0; |
385 | } | 385 | } |
386 | 386 | ||
387 | /** | ||
388 | * link_css_set - a helper function to link a css_set to a cgroup | ||
389 | * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links() | ||
390 | * @cg: the css_set to be linked | ||
391 | * @cgrp: the destination cgroup | ||
392 | */ | ||
393 | static void link_css_set(struct list_head *tmp_cg_links, | ||
394 | struct css_set *cg, struct cgroup *cgrp) | ||
395 | { | ||
396 | struct cg_cgroup_link *link; | ||
397 | |||
398 | BUG_ON(list_empty(tmp_cg_links)); | ||
399 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | ||
400 | cgrp_link_list); | ||
401 | link->cg = cg; | ||
402 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | ||
403 | list_add(&link->cg_link_list, &cg->cg_links); | ||
404 | } | ||
405 | |||
387 | /* | 406 | /* |
388 | * find_css_set() takes an existing cgroup group and a | 407 | * find_css_set() takes an existing cgroup group and a |
389 | * cgroup object, and returns a css_set object that's | 408 | * cgroup object, and returns a css_set object that's |
@@ -399,7 +418,6 @@ static struct css_set *find_css_set( | |||
399 | int i; | 418 | int i; |
400 | 419 | ||
401 | struct list_head tmp_cg_links; | 420 | struct list_head tmp_cg_links; |
402 | struct cg_cgroup_link *link; | ||
403 | 421 | ||
404 | struct hlist_head *hhead; | 422 | struct hlist_head *hhead; |
405 | 423 | ||
@@ -444,26 +462,11 @@ static struct css_set *find_css_set( | |||
444 | * only do it for the first subsystem in each | 462 | * only do it for the first subsystem in each |
445 | * hierarchy | 463 | * hierarchy |
446 | */ | 464 | */ |
447 | if (ss->root->subsys_list.next == &ss->sibling) { | 465 | if (ss->root->subsys_list.next == &ss->sibling) |
448 | BUG_ON(list_empty(&tmp_cg_links)); | 466 | link_css_set(&tmp_cg_links, res, cgrp); |
449 | link = list_entry(tmp_cg_links.next, | ||
450 | struct cg_cgroup_link, | ||
451 | cgrp_link_list); | ||
452 | list_del(&link->cgrp_link_list); | ||
453 | list_add(&link->cgrp_link_list, &cgrp->css_sets); | ||
454 | link->cg = res; | ||
455 | list_add(&link->cg_link_list, &res->cg_links); | ||
456 | } | ||
457 | } | ||
458 | if (list_empty(&rootnode.subsys_list)) { | ||
459 | link = list_entry(tmp_cg_links.next, | ||
460 | struct cg_cgroup_link, | ||
461 | cgrp_link_list); | ||
462 | list_del(&link->cgrp_link_list); | ||
463 | list_add(&link->cgrp_link_list, &dummytop->css_sets); | ||
464 | link->cg = res; | ||
465 | list_add(&link->cg_link_list, &res->cg_links); | ||
466 | } | 467 | } |
468 | if (list_empty(&rootnode.subsys_list)) | ||
469 | link_css_set(&tmp_cg_links, res, dummytop); | ||
467 | 470 | ||
468 | BUG_ON(!list_empty(&tmp_cg_links)); | 471 | BUG_ON(!list_empty(&tmp_cg_links)); |
469 | 472 | ||
@@ -586,11 +589,18 @@ static void cgroup_call_pre_destroy(struct cgroup *cgrp) | |||
586 | { | 589 | { |
587 | struct cgroup_subsys *ss; | 590 | struct cgroup_subsys *ss; |
588 | for_each_subsys(cgrp->root, ss) | 591 | for_each_subsys(cgrp->root, ss) |
589 | if (ss->pre_destroy && cgrp->subsys[ss->subsys_id]) | 592 | if (ss->pre_destroy) |
590 | ss->pre_destroy(ss, cgrp); | 593 | ss->pre_destroy(ss, cgrp); |
591 | return; | 594 | return; |
592 | } | 595 | } |
593 | 596 | ||
597 | static void free_cgroup_rcu(struct rcu_head *obj) | ||
598 | { | ||
599 | struct cgroup *cgrp = container_of(obj, struct cgroup, rcu_head); | ||
600 | |||
601 | kfree(cgrp); | ||
602 | } | ||
603 | |||
594 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 604 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
595 | { | 605 | { |
596 | /* is dentry a directory ? if so, kfree() associated cgroup */ | 606 | /* is dentry a directory ? if so, kfree() associated cgroup */ |
@@ -610,19 +620,19 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
610 | /* | 620 | /* |
611 | * Release the subsystem state objects. | 621 | * Release the subsystem state objects. |
612 | */ | 622 | */ |
613 | for_each_subsys(cgrp->root, ss) { | 623 | for_each_subsys(cgrp->root, ss) |
614 | if (cgrp->subsys[ss->subsys_id]) | 624 | ss->destroy(ss, cgrp); |
615 | ss->destroy(ss, cgrp); | ||
616 | } | ||
617 | 625 | ||
618 | cgrp->root->number_of_cgroups--; | 626 | cgrp->root->number_of_cgroups--; |
619 | mutex_unlock(&cgroup_mutex); | 627 | mutex_unlock(&cgroup_mutex); |
620 | 628 | ||
621 | /* Drop the active superblock reference that we took when we | 629 | /* |
622 | * created the cgroup */ | 630 | * Drop the active superblock reference that we took when we |
631 | * created the cgroup | ||
632 | */ | ||
623 | deactivate_super(cgrp->root->sb); | 633 | deactivate_super(cgrp->root->sb); |
624 | 634 | ||
625 | kfree(cgrp); | 635 | call_rcu(&cgrp->rcu_head, free_cgroup_rcu); |
626 | } | 636 | } |
627 | iput(inode); | 637 | iput(inode); |
628 | } | 638 | } |
@@ -712,23 +722,26 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
712 | BUG_ON(cgrp->subsys[i]); | 722 | BUG_ON(cgrp->subsys[i]); |
713 | BUG_ON(!dummytop->subsys[i]); | 723 | BUG_ON(!dummytop->subsys[i]); |
714 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 724 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); |
725 | mutex_lock(&ss->hierarchy_mutex); | ||
715 | cgrp->subsys[i] = dummytop->subsys[i]; | 726 | cgrp->subsys[i] = dummytop->subsys[i]; |
716 | cgrp->subsys[i]->cgroup = cgrp; | 727 | cgrp->subsys[i]->cgroup = cgrp; |
717 | list_add(&ss->sibling, &root->subsys_list); | 728 | list_move(&ss->sibling, &root->subsys_list); |
718 | rcu_assign_pointer(ss->root, root); | 729 | ss->root = root; |
719 | if (ss->bind) | 730 | if (ss->bind) |
720 | ss->bind(ss, cgrp); | 731 | ss->bind(ss, cgrp); |
721 | 732 | mutex_unlock(&ss->hierarchy_mutex); | |
722 | } else if (bit & removed_bits) { | 733 | } else if (bit & removed_bits) { |
723 | /* We're removing this subsystem */ | 734 | /* We're removing this subsystem */ |
724 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | 735 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); |
725 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 736 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
737 | mutex_lock(&ss->hierarchy_mutex); | ||
726 | if (ss->bind) | 738 | if (ss->bind) |
727 | ss->bind(ss, dummytop); | 739 | ss->bind(ss, dummytop); |
728 | dummytop->subsys[i]->cgroup = dummytop; | 740 | dummytop->subsys[i]->cgroup = dummytop; |
729 | cgrp->subsys[i] = NULL; | 741 | cgrp->subsys[i] = NULL; |
730 | rcu_assign_pointer(subsys[i]->root, &rootnode); | 742 | subsys[i]->root = &rootnode; |
731 | list_del(&ss->sibling); | 743 | list_move(&ss->sibling, &rootnode.subsys_list); |
744 | mutex_unlock(&ss->hierarchy_mutex); | ||
732 | } else if (bit & final_bits) { | 745 | } else if (bit & final_bits) { |
733 | /* Subsystem state should already exist */ | 746 | /* Subsystem state should already exist */ |
734 | BUG_ON(!cgrp->subsys[i]); | 747 | BUG_ON(!cgrp->subsys[i]); |
@@ -990,7 +1003,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
990 | root = NULL; | 1003 | root = NULL; |
991 | } else { | 1004 | } else { |
992 | /* New superblock */ | 1005 | /* New superblock */ |
993 | struct cgroup *cgrp = &root->top_cgroup; | 1006 | struct cgroup *root_cgrp = &root->top_cgroup; |
994 | struct inode *inode; | 1007 | struct inode *inode; |
995 | int i; | 1008 | int i; |
996 | 1009 | ||
@@ -1031,7 +1044,7 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1031 | list_add(&root->root_list, &roots); | 1044 | list_add(&root->root_list, &roots); |
1032 | root_count++; | 1045 | root_count++; |
1033 | 1046 | ||
1034 | sb->s_root->d_fsdata = &root->top_cgroup; | 1047 | sb->s_root->d_fsdata = root_cgrp; |
1035 | root->top_cgroup.dentry = sb->s_root; | 1048 | root->top_cgroup.dentry = sb->s_root; |
1036 | 1049 | ||
1037 | /* Link the top cgroup in this hierarchy into all | 1050 | /* Link the top cgroup in this hierarchy into all |
@@ -1042,29 +1055,18 @@ static int cgroup_get_sb(struct file_system_type *fs_type, | |||
1042 | struct hlist_node *node; | 1055 | struct hlist_node *node; |
1043 | struct css_set *cg; | 1056 | struct css_set *cg; |
1044 | 1057 | ||
1045 | hlist_for_each_entry(cg, node, hhead, hlist) { | 1058 | hlist_for_each_entry(cg, node, hhead, hlist) |
1046 | struct cg_cgroup_link *link; | 1059 | link_css_set(&tmp_cg_links, cg, root_cgrp); |
1047 | |||
1048 | BUG_ON(list_empty(&tmp_cg_links)); | ||
1049 | link = list_entry(tmp_cg_links.next, | ||
1050 | struct cg_cgroup_link, | ||
1051 | cgrp_link_list); | ||
1052 | list_del(&link->cgrp_link_list); | ||
1053 | link->cg = cg; | ||
1054 | list_add(&link->cgrp_link_list, | ||
1055 | &root->top_cgroup.css_sets); | ||
1056 | list_add(&link->cg_link_list, &cg->cg_links); | ||
1057 | } | ||
1058 | } | 1060 | } |
1059 | write_unlock(&css_set_lock); | 1061 | write_unlock(&css_set_lock); |
1060 | 1062 | ||
1061 | free_cg_links(&tmp_cg_links); | 1063 | free_cg_links(&tmp_cg_links); |
1062 | 1064 | ||
1063 | BUG_ON(!list_empty(&cgrp->sibling)); | 1065 | BUG_ON(!list_empty(&root_cgrp->sibling)); |
1064 | BUG_ON(!list_empty(&cgrp->children)); | 1066 | BUG_ON(!list_empty(&root_cgrp->children)); |
1065 | BUG_ON(root->number_of_cgroups != 1); | 1067 | BUG_ON(root->number_of_cgroups != 1); |
1066 | 1068 | ||
1067 | cgroup_populate_dir(cgrp); | 1069 | cgroup_populate_dir(root_cgrp); |
1068 | mutex_unlock(&inode->i_mutex); | 1070 | mutex_unlock(&inode->i_mutex); |
1069 | mutex_unlock(&cgroup_mutex); | 1071 | mutex_unlock(&cgroup_mutex); |
1070 | } | 1072 | } |
@@ -1113,10 +1115,9 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
1113 | } | 1115 | } |
1114 | write_unlock(&css_set_lock); | 1116 | write_unlock(&css_set_lock); |
1115 | 1117 | ||
1116 | if (!list_empty(&root->root_list)) { | 1118 | list_del(&root->root_list); |
1117 | list_del(&root->root_list); | 1119 | root_count--; |
1118 | root_count--; | 1120 | |
1119 | } | ||
1120 | mutex_unlock(&cgroup_mutex); | 1121 | mutex_unlock(&cgroup_mutex); |
1121 | 1122 | ||
1122 | kfree(root); | 1123 | kfree(root); |
@@ -1145,14 +1146,16 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
1145 | * @buf: the buffer to write the path into | 1146 | * @buf: the buffer to write the path into |
1146 | * @buflen: the length of the buffer | 1147 | * @buflen: the length of the buffer |
1147 | * | 1148 | * |
1148 | * Called with cgroup_mutex held. Writes path of cgroup into buf. | 1149 | * Called with cgroup_mutex held or else with an RCU-protected cgroup |
1149 | * Returns 0 on success, -errno on error. | 1150 | * reference. Writes path of cgroup into buf. Returns 0 on success, |
1151 | * -errno on error. | ||
1150 | */ | 1152 | */ |
1151 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | 1153 | int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) |
1152 | { | 1154 | { |
1153 | char *start; | 1155 | char *start; |
1156 | struct dentry *dentry = rcu_dereference(cgrp->dentry); | ||
1154 | 1157 | ||
1155 | if (cgrp == dummytop) { | 1158 | if (!dentry || cgrp == dummytop) { |
1156 | /* | 1159 | /* |
1157 | * Inactive subsystems have no dentry for their root | 1160 | * Inactive subsystems have no dentry for their root |
1158 | * cgroup | 1161 | * cgroup |
@@ -1165,13 +1168,14 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) | |||
1165 | 1168 | ||
1166 | *--start = '\0'; | 1169 | *--start = '\0'; |
1167 | for (;;) { | 1170 | for (;;) { |
1168 | int len = cgrp->dentry->d_name.len; | 1171 | int len = dentry->d_name.len; |
1169 | if ((start -= len) < buf) | 1172 | if ((start -= len) < buf) |
1170 | return -ENAMETOOLONG; | 1173 | return -ENAMETOOLONG; |
1171 | memcpy(start, cgrp->dentry->d_name.name, len); | 1174 | memcpy(start, cgrp->dentry->d_name.name, len); |
1172 | cgrp = cgrp->parent; | 1175 | cgrp = cgrp->parent; |
1173 | if (!cgrp) | 1176 | if (!cgrp) |
1174 | break; | 1177 | break; |
1178 | dentry = rcu_dereference(cgrp->dentry); | ||
1175 | if (!cgrp->parent) | 1179 | if (!cgrp->parent) |
1176 | continue; | 1180 | continue; |
1177 | if (--start < buf) | 1181 | if (--start < buf) |
@@ -1216,7 +1220,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1216 | int retval = 0; | 1220 | int retval = 0; |
1217 | struct cgroup_subsys *ss; | 1221 | struct cgroup_subsys *ss; |
1218 | struct cgroup *oldcgrp; | 1222 | struct cgroup *oldcgrp; |
1219 | struct css_set *cg = tsk->cgroups; | 1223 | struct css_set *cg; |
1220 | struct css_set *newcg; | 1224 | struct css_set *newcg; |
1221 | struct cgroupfs_root *root = cgrp->root; | 1225 | struct cgroupfs_root *root = cgrp->root; |
1222 | int subsys_id; | 1226 | int subsys_id; |
@@ -1236,11 +1240,16 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) | |||
1236 | } | 1240 | } |
1237 | } | 1241 | } |
1238 | 1242 | ||
1243 | task_lock(tsk); | ||
1244 | cg = tsk->cgroups; | ||
1245 | get_css_set(cg); | ||
1246 | task_unlock(tsk); | ||
1239 | /* | 1247 | /* |
1240 | * Locate or allocate a new css_set for this task, | 1248 | * Locate or allocate a new css_set for this task, |
1241 | * based on its final set of cgroups | 1249 | * based on its final set of cgroups |
1242 | */ | 1250 | */ |
1243 | newcg = find_css_set(cg, cgrp); | 1251 | newcg = find_css_set(cg, cgrp); |
1252 | put_css_set(cg); | ||
1244 | if (!newcg) | 1253 | if (!newcg) |
1245 | return -ENOMEM; | 1254 | return -ENOMEM; |
1246 | 1255 | ||
@@ -1445,7 +1454,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
1445 | struct cftype *cft = __d_cft(file->f_dentry); | 1454 | struct cftype *cft = __d_cft(file->f_dentry); |
1446 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1455 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1447 | 1456 | ||
1448 | if (!cft || cgroup_is_removed(cgrp)) | 1457 | if (cgroup_is_removed(cgrp)) |
1449 | return -ENODEV; | 1458 | return -ENODEV; |
1450 | if (cft->write) | 1459 | if (cft->write) |
1451 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 1460 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
@@ -1490,7 +1499,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
1490 | struct cftype *cft = __d_cft(file->f_dentry); | 1499 | struct cftype *cft = __d_cft(file->f_dentry); |
1491 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 1500 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
1492 | 1501 | ||
1493 | if (!cft || cgroup_is_removed(cgrp)) | 1502 | if (cgroup_is_removed(cgrp)) |
1494 | return -ENODEV; | 1503 | return -ENODEV; |
1495 | 1504 | ||
1496 | if (cft->read) | 1505 | if (cft->read) |
@@ -1554,10 +1563,8 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
1554 | err = generic_file_open(inode, file); | 1563 | err = generic_file_open(inode, file); |
1555 | if (err) | 1564 | if (err) |
1556 | return err; | 1565 | return err; |
1557 | |||
1558 | cft = __d_cft(file->f_dentry); | 1566 | cft = __d_cft(file->f_dentry); |
1559 | if (!cft) | 1567 | |
1560 | return -ENODEV; | ||
1561 | if (cft->read_map || cft->read_seq_string) { | 1568 | if (cft->read_map || cft->read_seq_string) { |
1562 | struct cgroup_seqfile_state *state = | 1569 | struct cgroup_seqfile_state *state = |
1563 | kzalloc(sizeof(*state), GFP_USER); | 1570 | kzalloc(sizeof(*state), GFP_USER); |
@@ -1671,7 +1678,7 @@ static int cgroup_create_dir(struct cgroup *cgrp, struct dentry *dentry, | |||
1671 | if (!error) { | 1678 | if (!error) { |
1672 | dentry->d_fsdata = cgrp; | 1679 | dentry->d_fsdata = cgrp; |
1673 | inc_nlink(parent->d_inode); | 1680 | inc_nlink(parent->d_inode); |
1674 | cgrp->dentry = dentry; | 1681 | rcu_assign_pointer(cgrp->dentry, dentry); |
1675 | dget(dentry); | 1682 | dget(dentry); |
1676 | } | 1683 | } |
1677 | dput(dentry); | 1684 | dput(dentry); |
@@ -1812,6 +1819,7 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1812 | { | 1819 | { |
1813 | struct task_struct *res; | 1820 | struct task_struct *res; |
1814 | struct list_head *l = it->task; | 1821 | struct list_head *l = it->task; |
1822 | struct cg_cgroup_link *link; | ||
1815 | 1823 | ||
1816 | /* If the iterator cg is NULL, we have no tasks */ | 1824 | /* If the iterator cg is NULL, we have no tasks */ |
1817 | if (!it->cg_link) | 1825 | if (!it->cg_link) |
@@ -1819,7 +1827,8 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
1819 | res = list_entry(l, struct task_struct, cg_list); | 1827 | res = list_entry(l, struct task_struct, cg_list); |
1820 | /* Advance iterator to find next entry */ | 1828 | /* Advance iterator to find next entry */ |
1821 | l = l->next; | 1829 | l = l->next; |
1822 | if (l == &res->cgroups->tasks) { | 1830 | link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list); |
1831 | if (l == &link->cg->tasks) { | ||
1823 | /* We reached the end of this task list - move on to | 1832 | /* We reached the end of this task list - move on to |
1824 | * the next cg_cgroup_link */ | 1833 | * the next cg_cgroup_link */ |
1825 | cgroup_advance_iter(cgrp, it); | 1834 | cgroup_advance_iter(cgrp, it); |
@@ -2013,14 +2022,16 @@ int cgroup_scan_tasks(struct cgroup_scanner *scan) | |||
2013 | */ | 2022 | */ |
2014 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) | 2023 | static int pid_array_load(pid_t *pidarray, int npids, struct cgroup *cgrp) |
2015 | { | 2024 | { |
2016 | int n = 0; | 2025 | int n = 0, pid; |
2017 | struct cgroup_iter it; | 2026 | struct cgroup_iter it; |
2018 | struct task_struct *tsk; | 2027 | struct task_struct *tsk; |
2019 | cgroup_iter_start(cgrp, &it); | 2028 | cgroup_iter_start(cgrp, &it); |
2020 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2029 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
2021 | if (unlikely(n == npids)) | 2030 | if (unlikely(n == npids)) |
2022 | break; | 2031 | break; |
2023 | pidarray[n++] = task_pid_vnr(tsk); | 2032 | pid = task_pid_vnr(tsk); |
2033 | if (pid > 0) | ||
2034 | pidarray[n++] = pid; | ||
2024 | } | 2035 | } |
2025 | cgroup_iter_end(cgrp, &it); | 2036 | cgroup_iter_end(cgrp, &it); |
2026 | return n; | 2037 | return n; |
@@ -2052,7 +2063,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2052 | 2063 | ||
2053 | ret = 0; | 2064 | ret = 0; |
2054 | cgrp = dentry->d_fsdata; | 2065 | cgrp = dentry->d_fsdata; |
2055 | rcu_read_lock(); | ||
2056 | 2066 | ||
2057 | cgroup_iter_start(cgrp, &it); | 2067 | cgroup_iter_start(cgrp, &it); |
2058 | while ((tsk = cgroup_iter_next(cgrp, &it))) { | 2068 | while ((tsk = cgroup_iter_next(cgrp, &it))) { |
@@ -2077,7 +2087,6 @@ int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry) | |||
2077 | } | 2087 | } |
2078 | cgroup_iter_end(cgrp, &it); | 2088 | cgroup_iter_end(cgrp, &it); |
2079 | 2089 | ||
2080 | rcu_read_unlock(); | ||
2081 | err: | 2090 | err: |
2082 | return ret; | 2091 | return ret; |
2083 | } | 2092 | } |
@@ -2324,7 +2333,7 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2324 | struct cgroup *cgrp) | 2333 | struct cgroup *cgrp) |
2325 | { | 2334 | { |
2326 | css->cgroup = cgrp; | 2335 | css->cgroup = cgrp; |
2327 | atomic_set(&css->refcnt, 0); | 2336 | atomic_set(&css->refcnt, 1); |
2328 | css->flags = 0; | 2337 | css->flags = 0; |
2329 | if (cgrp == dummytop) | 2338 | if (cgrp == dummytop) |
2330 | set_bit(CSS_ROOT, &css->flags); | 2339 | set_bit(CSS_ROOT, &css->flags); |
@@ -2332,6 +2341,29 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
2332 | cgrp->subsys[ss->subsys_id] = css; | 2341 | cgrp->subsys[ss->subsys_id] = css; |
2333 | } | 2342 | } |
2334 | 2343 | ||
2344 | static void cgroup_lock_hierarchy(struct cgroupfs_root *root) | ||
2345 | { | ||
2346 | /* We need to take each hierarchy_mutex in a consistent order */ | ||
2347 | int i; | ||
2348 | |||
2349 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2350 | struct cgroup_subsys *ss = subsys[i]; | ||
2351 | if (ss->root == root) | ||
2352 | mutex_lock_nested(&ss->hierarchy_mutex, i); | ||
2353 | } | ||
2354 | } | ||
2355 | |||
2356 | static void cgroup_unlock_hierarchy(struct cgroupfs_root *root) | ||
2357 | { | ||
2358 | int i; | ||
2359 | |||
2360 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
2361 | struct cgroup_subsys *ss = subsys[i]; | ||
2362 | if (ss->root == root) | ||
2363 | mutex_unlock(&ss->hierarchy_mutex); | ||
2364 | } | ||
2365 | } | ||
2366 | |||
2335 | /* | 2367 | /* |
2336 | * cgroup_create - create a cgroup | 2368 | * cgroup_create - create a cgroup |
2337 | * @parent: cgroup that will be parent of the new cgroup | 2369 | * @parent: cgroup that will be parent of the new cgroup |
@@ -2380,7 +2412,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
2380 | init_cgroup_css(css, ss, cgrp); | 2412 | init_cgroup_css(css, ss, cgrp); |
2381 | } | 2413 | } |
2382 | 2414 | ||
2415 | cgroup_lock_hierarchy(root); | ||
2383 | list_add(&cgrp->sibling, &cgrp->parent->children); | 2416 | list_add(&cgrp->sibling, &cgrp->parent->children); |
2417 | cgroup_unlock_hierarchy(root); | ||
2384 | root->number_of_cgroups++; | 2418 | root->number_of_cgroups++; |
2385 | 2419 | ||
2386 | err = cgroup_create_dir(cgrp, dentry, mode); | 2420 | err = cgroup_create_dir(cgrp, dentry, mode); |
@@ -2431,7 +2465,7 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2431 | { | 2465 | { |
2432 | /* Check the reference count on each subsystem. Since we | 2466 | /* Check the reference count on each subsystem. Since we |
2433 | * already established that there are no tasks in the | 2467 | * already established that there are no tasks in the |
2434 | * cgroup, if the css refcount is also 0, then there should | 2468 | * cgroup, if the css refcount is also 1, then there should |
2435 | * be no outstanding references, so the subsystem is safe to | 2469 | * be no outstanding references, so the subsystem is safe to |
2436 | * destroy. We scan across all subsystems rather than using | 2470 | * destroy. We scan across all subsystems rather than using |
2437 | * the per-hierarchy linked list of mounted subsystems since | 2471 | * the per-hierarchy linked list of mounted subsystems since |
@@ -2452,19 +2486,67 @@ static int cgroup_has_css_refs(struct cgroup *cgrp) | |||
2452 | * matter, since it can only happen if the cgroup | 2486 | * matter, since it can only happen if the cgroup |
2453 | * has been deleted and hence no longer needs the | 2487 | * has been deleted and hence no longer needs the |
2454 | * release agent to be called anyway. */ | 2488 | * release agent to be called anyway. */ |
2455 | if (css && atomic_read(&css->refcnt)) | 2489 | if (css && (atomic_read(&css->refcnt) > 1)) |
2456 | return 1; | 2490 | return 1; |
2457 | } | 2491 | } |
2458 | return 0; | 2492 | return 0; |
2459 | } | 2493 | } |
2460 | 2494 | ||
2495 | /* | ||
2496 | * Atomically mark all (or else none) of the cgroup's CSS objects as | ||
2497 | * CSS_REMOVED. Return true on success, or false if the cgroup has | ||
2498 | * busy subsystems. Call with cgroup_mutex held | ||
2499 | */ | ||
2500 | |||
2501 | static int cgroup_clear_css_refs(struct cgroup *cgrp) | ||
2502 | { | ||
2503 | struct cgroup_subsys *ss; | ||
2504 | unsigned long flags; | ||
2505 | bool failed = false; | ||
2506 | local_irq_save(flags); | ||
2507 | for_each_subsys(cgrp->root, ss) { | ||
2508 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2509 | int refcnt; | ||
2510 | do { | ||
2511 | /* We can only remove a CSS with a refcnt==1 */ | ||
2512 | refcnt = atomic_read(&css->refcnt); | ||
2513 | if (refcnt > 1) { | ||
2514 | failed = true; | ||
2515 | goto done; | ||
2516 | } | ||
2517 | BUG_ON(!refcnt); | ||
2518 | /* | ||
2519 | * Drop the refcnt to 0 while we check other | ||
2520 | * subsystems. This will cause any racing | ||
2521 | * css_tryget() to spin until we set the | ||
2522 | * CSS_REMOVED bits or abort | ||
2523 | */ | ||
2524 | } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); | ||
2525 | } | ||
2526 | done: | ||
2527 | for_each_subsys(cgrp->root, ss) { | ||
2528 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | ||
2529 | if (failed) { | ||
2530 | /* | ||
2531 | * Restore old refcnt if we previously managed | ||
2532 | * to clear it from 1 to 0 | ||
2533 | */ | ||
2534 | if (!atomic_read(&css->refcnt)) | ||
2535 | atomic_set(&css->refcnt, 1); | ||
2536 | } else { | ||
2537 | /* Commit the fact that the CSS is removed */ | ||
2538 | set_bit(CSS_REMOVED, &css->flags); | ||
2539 | } | ||
2540 | } | ||
2541 | local_irq_restore(flags); | ||
2542 | return !failed; | ||
2543 | } | ||
2544 | |||
2461 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 2545 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
2462 | { | 2546 | { |
2463 | struct cgroup *cgrp = dentry->d_fsdata; | 2547 | struct cgroup *cgrp = dentry->d_fsdata; |
2464 | struct dentry *d; | 2548 | struct dentry *d; |
2465 | struct cgroup *parent; | 2549 | struct cgroup *parent; |
2466 | struct super_block *sb; | ||
2467 | struct cgroupfs_root *root; | ||
2468 | 2550 | ||
2469 | /* the vfs holds both inode->i_mutex already */ | 2551 | /* the vfs holds both inode->i_mutex already */ |
2470 | 2552 | ||
@@ -2487,12 +2569,10 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2487 | 2569 | ||
2488 | mutex_lock(&cgroup_mutex); | 2570 | mutex_lock(&cgroup_mutex); |
2489 | parent = cgrp->parent; | 2571 | parent = cgrp->parent; |
2490 | root = cgrp->root; | ||
2491 | sb = root->sb; | ||
2492 | 2572 | ||
2493 | if (atomic_read(&cgrp->count) | 2573 | if (atomic_read(&cgrp->count) |
2494 | || !list_empty(&cgrp->children) | 2574 | || !list_empty(&cgrp->children) |
2495 | || cgroup_has_css_refs(cgrp)) { | 2575 | || !cgroup_clear_css_refs(cgrp)) { |
2496 | mutex_unlock(&cgroup_mutex); | 2576 | mutex_unlock(&cgroup_mutex); |
2497 | return -EBUSY; | 2577 | return -EBUSY; |
2498 | } | 2578 | } |
@@ -2502,8 +2582,12 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | |||
2502 | if (!list_empty(&cgrp->release_list)) | 2582 | if (!list_empty(&cgrp->release_list)) |
2503 | list_del(&cgrp->release_list); | 2583 | list_del(&cgrp->release_list); |
2504 | spin_unlock(&release_list_lock); | 2584 | spin_unlock(&release_list_lock); |
2505 | /* delete my sibling from parent->children */ | 2585 | |
2586 | cgroup_lock_hierarchy(cgrp->root); | ||
2587 | /* delete this cgroup from parent->children */ | ||
2506 | list_del(&cgrp->sibling); | 2588 | list_del(&cgrp->sibling); |
2589 | cgroup_unlock_hierarchy(cgrp->root); | ||
2590 | |||
2507 | spin_lock(&cgrp->dentry->d_lock); | 2591 | spin_lock(&cgrp->dentry->d_lock); |
2508 | d = dget(cgrp->dentry); | 2592 | d = dget(cgrp->dentry); |
2509 | spin_unlock(&d->d_lock); | 2593 | spin_unlock(&d->d_lock); |
@@ -2525,6 +2609,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2525 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); | 2609 | printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); |
2526 | 2610 | ||
2527 | /* Create the top cgroup state for this subsystem */ | 2611 | /* Create the top cgroup state for this subsystem */ |
2612 | list_add(&ss->sibling, &rootnode.subsys_list); | ||
2528 | ss->root = &rootnode; | 2613 | ss->root = &rootnode; |
2529 | css = ss->create(ss, dummytop); | 2614 | css = ss->create(ss, dummytop); |
2530 | /* We don't handle early failures gracefully */ | 2615 | /* We don't handle early failures gracefully */ |
@@ -2544,6 +2629,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
2544 | * need to invoke fork callbacks here. */ | 2629 | * need to invoke fork callbacks here. */ |
2545 | BUG_ON(!list_empty(&init_task.tasks)); | 2630 | BUG_ON(!list_empty(&init_task.tasks)); |
2546 | 2631 | ||
2632 | mutex_init(&ss->hierarchy_mutex); | ||
2547 | ss->active = 1; | 2633 | ss->active = 1; |
2548 | } | 2634 | } |
2549 | 2635 | ||
@@ -2562,7 +2648,6 @@ int __init cgroup_init_early(void) | |||
2562 | INIT_HLIST_NODE(&init_css_set.hlist); | 2648 | INIT_HLIST_NODE(&init_css_set.hlist); |
2563 | css_set_count = 1; | 2649 | css_set_count = 1; |
2564 | init_cgroup_root(&rootnode); | 2650 | init_cgroup_root(&rootnode); |
2565 | list_add(&rootnode.root_list, &roots); | ||
2566 | root_count = 1; | 2651 | root_count = 1; |
2567 | init_task.cgroups = &init_css_set; | 2652 | init_task.cgroups = &init_css_set; |
2568 | 2653 | ||
@@ -2669,15 +2754,12 @@ static int proc_cgroup_show(struct seq_file *m, void *v) | |||
2669 | 2754 | ||
2670 | mutex_lock(&cgroup_mutex); | 2755 | mutex_lock(&cgroup_mutex); |
2671 | 2756 | ||
2672 | for_each_root(root) { | 2757 | for_each_active_root(root) { |
2673 | struct cgroup_subsys *ss; | 2758 | struct cgroup_subsys *ss; |
2674 | struct cgroup *cgrp; | 2759 | struct cgroup *cgrp; |
2675 | int subsys_id; | 2760 | int subsys_id; |
2676 | int count = 0; | 2761 | int count = 0; |
2677 | 2762 | ||
2678 | /* Skip this hierarchy if it has no active subsystems */ | ||
2679 | if (!root->actual_subsys_bits) | ||
2680 | continue; | ||
2681 | seq_printf(m, "%lu:", root->subsys_bits); | 2763 | seq_printf(m, "%lu:", root->subsys_bits); |
2682 | for_each_subsys(root, ss) | 2764 | for_each_subsys(root, ss) |
2683 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 2765 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
@@ -2800,8 +2882,10 @@ void cgroup_post_fork(struct task_struct *child) | |||
2800 | { | 2882 | { |
2801 | if (use_task_css_set_links) { | 2883 | if (use_task_css_set_links) { |
2802 | write_lock(&css_set_lock); | 2884 | write_lock(&css_set_lock); |
2885 | task_lock(child); | ||
2803 | if (list_empty(&child->cg_list)) | 2886 | if (list_empty(&child->cg_list)) |
2804 | list_add(&child->cg_list, &child->cgroups->tasks); | 2887 | list_add(&child->cg_list, &child->cgroups->tasks); |
2888 | task_unlock(child); | ||
2805 | write_unlock(&css_set_lock); | 2889 | write_unlock(&css_set_lock); |
2806 | } | 2890 | } |
2807 | } | 2891 | } |
@@ -2907,6 +2991,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2907 | mutex_unlock(&cgroup_mutex); | 2991 | mutex_unlock(&cgroup_mutex); |
2908 | return 0; | 2992 | return 0; |
2909 | } | 2993 | } |
2994 | task_lock(tsk); | ||
2910 | cg = tsk->cgroups; | 2995 | cg = tsk->cgroups; |
2911 | parent = task_cgroup(tsk, subsys->subsys_id); | 2996 | parent = task_cgroup(tsk, subsys->subsys_id); |
2912 | 2997 | ||
@@ -2919,6 +3004,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2919 | 3004 | ||
2920 | /* Keep the cgroup alive */ | 3005 | /* Keep the cgroup alive */ |
2921 | get_css_set(cg); | 3006 | get_css_set(cg); |
3007 | task_unlock(tsk); | ||
2922 | mutex_unlock(&cgroup_mutex); | 3008 | mutex_unlock(&cgroup_mutex); |
2923 | 3009 | ||
2924 | /* Now do the VFS work to create a cgroup */ | 3010 | /* Now do the VFS work to create a cgroup */ |
@@ -2937,7 +3023,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2937 | } | 3023 | } |
2938 | 3024 | ||
2939 | /* Create the cgroup directory, which also creates the cgroup */ | 3025 | /* Create the cgroup directory, which also creates the cgroup */ |
2940 | ret = vfs_mkdir(inode, dentry, S_IFDIR | 0755); | 3026 | ret = vfs_mkdir(inode, dentry, 0755); |
2941 | child = __d_cgrp(dentry); | 3027 | child = __d_cgrp(dentry); |
2942 | dput(dentry); | 3028 | dput(dentry); |
2943 | if (ret) { | 3029 | if (ret) { |
@@ -2947,13 +3033,6 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, | |||
2947 | goto out_release; | 3033 | goto out_release; |
2948 | } | 3034 | } |
2949 | 3035 | ||
2950 | if (!child) { | ||
2951 | printk(KERN_INFO | ||
2952 | "Couldn't find new cgroup %s\n", nodename); | ||
2953 | ret = -ENOMEM; | ||
2954 | goto out_release; | ||
2955 | } | ||
2956 | |||
2957 | /* The cgroup now exists. Retake cgroup_mutex and check | 3036 | /* The cgroup now exists. Retake cgroup_mutex and check |
2958 | * that we're still in the same state that we thought we | 3037 | * that we're still in the same state that we thought we |
2959 | * were. */ | 3038 | * were. */ |
@@ -3049,7 +3128,8 @@ void __css_put(struct cgroup_subsys_state *css) | |||
3049 | { | 3128 | { |
3050 | struct cgroup *cgrp = css->cgroup; | 3129 | struct cgroup *cgrp = css->cgroup; |
3051 | rcu_read_lock(); | 3130 | rcu_read_lock(); |
3052 | if (atomic_dec_and_test(&css->refcnt) && notify_on_release(cgrp)) { | 3131 | if ((atomic_dec_return(&css->refcnt) == 1) && |
3132 | notify_on_release(cgrp)) { | ||
3053 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 3133 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
3054 | check_for_release(cgrp); | 3134 | check_for_release(cgrp); |
3055 | } | 3135 | } |