diff options
Diffstat (limited to 'kernel/cgroup.c')
| -rw-r--r-- | kernel/cgroup.c | 1573 |
1 files changed, 907 insertions, 666 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index a7c9e6ddb979..e91963302c0d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
| @@ -63,9 +63,6 @@ | |||
| 63 | 63 | ||
| 64 | #include <linux/atomic.h> | 64 | #include <linux/atomic.h> |
| 65 | 65 | ||
| 66 | /* css deactivation bias, makes css->refcnt negative to deny new trygets */ | ||
| 67 | #define CSS_DEACT_BIAS INT_MIN | ||
| 68 | |||
| 69 | /* | 66 | /* |
| 70 | * cgroup_mutex is the master lock. Any modification to cgroup or its | 67 | * cgroup_mutex is the master lock. Any modification to cgroup or its |
| 71 | * hierarchy must be performed while holding it. | 68 | * hierarchy must be performed while holding it. |
| @@ -99,16 +96,19 @@ static DEFINE_MUTEX(cgroup_root_mutex); | |||
| 99 | */ | 96 | */ |
| 100 | #define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys, | 97 | #define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys, |
| 101 | #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) | 98 | #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) |
| 102 | static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { | 99 | static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = { |
| 103 | #include <linux/cgroup_subsys.h> | 100 | #include <linux/cgroup_subsys.h> |
| 104 | }; | 101 | }; |
| 105 | 102 | ||
| 106 | /* | 103 | /* |
| 107 | * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the | 104 | * The dummy hierarchy, reserved for the subsystems that are otherwise |
| 108 | * subsystems that are otherwise unattached - it never has more than a | 105 | * unattached - it never has more than a single cgroup, and all tasks are |
| 109 | * single cgroup, and all tasks are part of that cgroup. | 106 | * part of that cgroup. |
| 110 | */ | 107 | */ |
| 111 | static struct cgroupfs_root rootnode; | 108 | static struct cgroupfs_root cgroup_dummy_root; |
| 109 | |||
| 110 | /* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ | ||
| 111 | static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; | ||
| 112 | 112 | ||
| 113 | /* | 113 | /* |
| 114 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. | 114 | * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. |
| @@ -186,18 +186,28 @@ struct cgroup_event { | |||
| 186 | 186 | ||
| 187 | /* The list of hierarchy roots */ | 187 | /* The list of hierarchy roots */ |
| 188 | 188 | ||
| 189 | static LIST_HEAD(roots); | 189 | static LIST_HEAD(cgroup_roots); |
| 190 | static int root_count; | 190 | static int cgroup_root_count; |
| 191 | 191 | ||
| 192 | static DEFINE_IDA(hierarchy_ida); | 192 | /* |
| 193 | static int next_hierarchy_id; | 193 | * Hierarchy ID allocation and mapping. It follows the same exclusion |
| 194 | static DEFINE_SPINLOCK(hierarchy_id_lock); | 194 | * rules as other root ops - both cgroup_mutex and cgroup_root_mutex for |
| 195 | 195 | * writes, either for reads. | |
| 196 | /* dummytop is a shorthand for the dummy hierarchy's top cgroup */ | 196 | */ |
| 197 | #define dummytop (&rootnode.top_cgroup) | 197 | static DEFINE_IDR(cgroup_hierarchy_idr); |
| 198 | 198 | ||
| 199 | static struct cgroup_name root_cgroup_name = { .name = "/" }; | 199 | static struct cgroup_name root_cgroup_name = { .name = "/" }; |
| 200 | 200 | ||
| 201 | /* | ||
| 202 | * Assign a monotonically increasing serial number to cgroups. It | ||
| 203 | * guarantees cgroups with bigger numbers are newer than those with smaller | ||
| 204 | * numbers. Also, as cgroups are always appended to the parent's | ||
| 205 | * ->children list, it guarantees that sibling cgroups are always sorted in | ||
| 206 | * the ascending serial number order on the list. Protected by | ||
| 207 | * cgroup_mutex. | ||
| 208 | */ | ||
| 209 | static u64 cgroup_serial_nr_next = 1; | ||
| 210 | |||
| 201 | /* This flag indicates whether tasks in the fork and exit paths should | 211 | /* This flag indicates whether tasks in the fork and exit paths should |
| 202 | * check for fork/exit handlers to call. This avoids us having to do | 212 | * check for fork/exit handlers to call. This avoids us having to do |
| 203 | * extra work in the fork/exit path if none of the subsystems need to | 213 | * extra work in the fork/exit path if none of the subsystems need to |
| @@ -205,27 +215,15 @@ static struct cgroup_name root_cgroup_name = { .name = "/" }; | |||
| 205 | */ | 215 | */ |
| 206 | static int need_forkexit_callback __read_mostly; | 216 | static int need_forkexit_callback __read_mostly; |
| 207 | 217 | ||
| 218 | static void cgroup_offline_fn(struct work_struct *work); | ||
| 208 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 219 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
| 209 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | 220 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, |
| 210 | struct cftype cfts[], bool is_add); | 221 | struct cftype cfts[], bool is_add); |
| 211 | 222 | ||
| 212 | static int css_unbias_refcnt(int refcnt) | ||
| 213 | { | ||
| 214 | return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; | ||
| 215 | } | ||
| 216 | |||
| 217 | /* the current nr of refs, always >= 0 whether @css is deactivated or not */ | ||
| 218 | static int css_refcnt(struct cgroup_subsys_state *css) | ||
| 219 | { | ||
| 220 | int v = atomic_read(&css->refcnt); | ||
| 221 | |||
| 222 | return css_unbias_refcnt(v); | ||
| 223 | } | ||
| 224 | |||
| 225 | /* convenient tests for these bits */ | 223 | /* convenient tests for these bits */ |
| 226 | inline int cgroup_is_removed(const struct cgroup *cgrp) | 224 | static inline bool cgroup_is_dead(const struct cgroup *cgrp) |
| 227 | { | 225 | { |
| 228 | return test_bit(CGRP_REMOVED, &cgrp->flags); | 226 | return test_bit(CGRP_DEAD, &cgrp->flags); |
| 229 | } | 227 | } |
| 230 | 228 | ||
| 231 | /** | 229 | /** |
| @@ -261,16 +259,38 @@ static int notify_on_release(const struct cgroup *cgrp) | |||
| 261 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); | 259 | return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); |
| 262 | } | 260 | } |
| 263 | 261 | ||
| 264 | /* | 262 | /** |
| 265 | * for_each_subsys() allows you to iterate on each subsystem attached to | 263 | * for_each_subsys - iterate all loaded cgroup subsystems |
| 266 | * an active hierarchy | 264 | * @ss: the iteration cursor |
| 265 | * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end | ||
| 266 | * | ||
| 267 | * Should be called under cgroup_mutex. | ||
| 267 | */ | 268 | */ |
| 268 | #define for_each_subsys(_root, _ss) \ | 269 | #define for_each_subsys(ss, i) \ |
| 269 | list_for_each_entry(_ss, &_root->subsys_list, sibling) | 270 | for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \ |
| 271 | if (({ lockdep_assert_held(&cgroup_mutex); \ | ||
| 272 | !((ss) = cgroup_subsys[i]); })) { } \ | ||
| 273 | else | ||
| 270 | 274 | ||
| 271 | /* for_each_active_root() allows you to iterate across the active hierarchies */ | 275 | /** |
| 272 | #define for_each_active_root(_root) \ | 276 | * for_each_builtin_subsys - iterate all built-in cgroup subsystems |
| 273 | list_for_each_entry(_root, &roots, root_list) | 277 | * @ss: the iteration cursor |
| 278 | * @i: the index of @ss, CGROUP_BUILTIN_SUBSYS_COUNT after reaching the end | ||
| 279 | * | ||
| 280 | * Bulit-in subsystems are always present and iteration itself doesn't | ||
| 281 | * require any synchronization. | ||
| 282 | */ | ||
| 283 | #define for_each_builtin_subsys(ss, i) \ | ||
| 284 | for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ | ||
| 285 | (((ss) = cgroup_subsys[i]) || true); (i)++) | ||
| 286 | |||
| 287 | /* iterate each subsystem attached to a hierarchy */ | ||
| 288 | #define for_each_root_subsys(root, ss) \ | ||
| 289 | list_for_each_entry((ss), &(root)->subsys_list, sibling) | ||
| 290 | |||
| 291 | /* iterate across the active hierarchies */ | ||
| 292 | #define for_each_active_root(root) \ | ||
| 293 | list_for_each_entry((root), &cgroup_roots, root_list) | ||
| 274 | 294 | ||
| 275 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) | 295 | static inline struct cgroup *__d_cgrp(struct dentry *dentry) |
| 276 | { | 296 | { |
| @@ -297,7 +317,7 @@ static inline struct cftype *__d_cft(struct dentry *dentry) | |||
| 297 | static bool cgroup_lock_live_group(struct cgroup *cgrp) | 317 | static bool cgroup_lock_live_group(struct cgroup *cgrp) |
| 298 | { | 318 | { |
| 299 | mutex_lock(&cgroup_mutex); | 319 | mutex_lock(&cgroup_mutex); |
| 300 | if (cgroup_is_removed(cgrp)) { | 320 | if (cgroup_is_dead(cgrp)) { |
| 301 | mutex_unlock(&cgroup_mutex); | 321 | mutex_unlock(&cgroup_mutex); |
| 302 | return false; | 322 | return false; |
| 303 | } | 323 | } |
| @@ -312,20 +332,24 @@ static void cgroup_release_agent(struct work_struct *work); | |||
| 312 | static DECLARE_WORK(release_agent_work, cgroup_release_agent); | 332 | static DECLARE_WORK(release_agent_work, cgroup_release_agent); |
| 313 | static void check_for_release(struct cgroup *cgrp); | 333 | static void check_for_release(struct cgroup *cgrp); |
| 314 | 334 | ||
| 315 | /* Link structure for associating css_set objects with cgroups */ | 335 | /* |
| 316 | struct cg_cgroup_link { | 336 | * A cgroup can be associated with multiple css_sets as different tasks may |
| 317 | /* | 337 | * belong to different cgroups on different hierarchies. In the other |
| 318 | * List running through cg_cgroup_links associated with a | 338 | * direction, a css_set is naturally associated with multiple cgroups. |
| 319 | * cgroup, anchored on cgroup->css_sets | 339 | * This M:N relationship is represented by the following link structure |
| 320 | */ | 340 | * which exists for each association and allows traversing the associations |
| 321 | struct list_head cgrp_link_list; | 341 | * from both sides. |
| 322 | struct cgroup *cgrp; | 342 | */ |
| 323 | /* | 343 | struct cgrp_cset_link { |
| 324 | * List running through cg_cgroup_links pointing at a | 344 | /* the cgroup and css_set this link associates */ |
| 325 | * single css_set object, anchored on css_set->cg_links | 345 | struct cgroup *cgrp; |
| 326 | */ | 346 | struct css_set *cset; |
| 327 | struct list_head cg_link_list; | 347 | |
| 328 | struct css_set *cg; | 348 | /* list of cgrp_cset_links anchored at cgrp->cset_links */ |
| 349 | struct list_head cset_link; | ||
| 350 | |||
| 351 | /* list of cgrp_cset_links anchored at css_set->cgrp_links */ | ||
| 352 | struct list_head cgrp_link; | ||
| 329 | }; | 353 | }; |
| 330 | 354 | ||
| 331 | /* The default css_set - used by init and its children prior to any | 355 | /* The default css_set - used by init and its children prior to any |
| @@ -336,7 +360,7 @@ struct cg_cgroup_link { | |||
| 336 | */ | 360 | */ |
| 337 | 361 | ||
| 338 | static struct css_set init_css_set; | 362 | static struct css_set init_css_set; |
| 339 | static struct cg_cgroup_link init_css_set_link; | 363 | static struct cgrp_cset_link init_cgrp_cset_link; |
| 340 | 364 | ||
| 341 | static int cgroup_init_idr(struct cgroup_subsys *ss, | 365 | static int cgroup_init_idr(struct cgroup_subsys *ss, |
| 342 | struct cgroup_subsys_state *css); | 366 | struct cgroup_subsys_state *css); |
| @@ -357,10 +381,11 @@ static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); | |||
| 357 | 381 | ||
| 358 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) | 382 | static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) |
| 359 | { | 383 | { |
| 360 | int i; | ||
| 361 | unsigned long key = 0UL; | 384 | unsigned long key = 0UL; |
| 385 | struct cgroup_subsys *ss; | ||
| 386 | int i; | ||
| 362 | 387 | ||
| 363 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) | 388 | for_each_subsys(ss, i) |
| 364 | key += (unsigned long)css[i]; | 389 | key += (unsigned long)css[i]; |
| 365 | key = (key >> 16) ^ key; | 390 | key = (key >> 16) ^ key; |
| 366 | 391 | ||
| @@ -373,90 +398,83 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) | |||
| 373 | * compiled into their kernel but not actually in use */ | 398 | * compiled into their kernel but not actually in use */ |
| 374 | static int use_task_css_set_links __read_mostly; | 399 | static int use_task_css_set_links __read_mostly; |
| 375 | 400 | ||
| 376 | static void __put_css_set(struct css_set *cg, int taskexit) | 401 | static void __put_css_set(struct css_set *cset, int taskexit) |
| 377 | { | 402 | { |
| 378 | struct cg_cgroup_link *link; | 403 | struct cgrp_cset_link *link, *tmp_link; |
| 379 | struct cg_cgroup_link *saved_link; | 404 | |
| 380 | /* | 405 | /* |
| 381 | * Ensure that the refcount doesn't hit zero while any readers | 406 | * Ensure that the refcount doesn't hit zero while any readers |
| 382 | * can see it. Similar to atomic_dec_and_lock(), but for an | 407 | * can see it. Similar to atomic_dec_and_lock(), but for an |
| 383 | * rwlock | 408 | * rwlock |
| 384 | */ | 409 | */ |
| 385 | if (atomic_add_unless(&cg->refcount, -1, 1)) | 410 | if (atomic_add_unless(&cset->refcount, -1, 1)) |
| 386 | return; | 411 | return; |
| 387 | write_lock(&css_set_lock); | 412 | write_lock(&css_set_lock); |
| 388 | if (!atomic_dec_and_test(&cg->refcount)) { | 413 | if (!atomic_dec_and_test(&cset->refcount)) { |
| 389 | write_unlock(&css_set_lock); | 414 | write_unlock(&css_set_lock); |
| 390 | return; | 415 | return; |
| 391 | } | 416 | } |
| 392 | 417 | ||
| 393 | /* This css_set is dead. unlink it and release cgroup refcounts */ | 418 | /* This css_set is dead. unlink it and release cgroup refcounts */ |
| 394 | hash_del(&cg->hlist); | 419 | hash_del(&cset->hlist); |
| 395 | css_set_count--; | 420 | css_set_count--; |
| 396 | 421 | ||
| 397 | list_for_each_entry_safe(link, saved_link, &cg->cg_links, | 422 | list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) { |
| 398 | cg_link_list) { | ||
| 399 | struct cgroup *cgrp = link->cgrp; | 423 | struct cgroup *cgrp = link->cgrp; |
| 400 | list_del(&link->cg_link_list); | ||
| 401 | list_del(&link->cgrp_link_list); | ||
| 402 | 424 | ||
| 403 | /* | 425 | list_del(&link->cset_link); |
| 404 | * We may not be holding cgroup_mutex, and if cgrp->count is | 426 | list_del(&link->cgrp_link); |
| 405 | * dropped to 0 the cgroup can be destroyed at any time, hence | 427 | |
| 406 | * rcu_read_lock is used to keep it alive. | 428 | /* @cgrp can't go away while we're holding css_set_lock */ |
| 407 | */ | 429 | if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { |
| 408 | rcu_read_lock(); | ||
| 409 | if (atomic_dec_and_test(&cgrp->count) && | ||
| 410 | notify_on_release(cgrp)) { | ||
| 411 | if (taskexit) | 430 | if (taskexit) |
| 412 | set_bit(CGRP_RELEASABLE, &cgrp->flags); | 431 | set_bit(CGRP_RELEASABLE, &cgrp->flags); |
| 413 | check_for_release(cgrp); | 432 | check_for_release(cgrp); |
| 414 | } | 433 | } |
| 415 | rcu_read_unlock(); | ||
| 416 | 434 | ||
| 417 | kfree(link); | 435 | kfree(link); |
| 418 | } | 436 | } |
| 419 | 437 | ||
| 420 | write_unlock(&css_set_lock); | 438 | write_unlock(&css_set_lock); |
| 421 | kfree_rcu(cg, rcu_head); | 439 | kfree_rcu(cset, rcu_head); |
| 422 | } | 440 | } |
| 423 | 441 | ||
| 424 | /* | 442 | /* |
| 425 | * refcounted get/put for css_set objects | 443 | * refcounted get/put for css_set objects |
| 426 | */ | 444 | */ |
| 427 | static inline void get_css_set(struct css_set *cg) | 445 | static inline void get_css_set(struct css_set *cset) |
| 428 | { | 446 | { |
| 429 | atomic_inc(&cg->refcount); | 447 | atomic_inc(&cset->refcount); |
| 430 | } | 448 | } |
| 431 | 449 | ||
| 432 | static inline void put_css_set(struct css_set *cg) | 450 | static inline void put_css_set(struct css_set *cset) |
| 433 | { | 451 | { |
| 434 | __put_css_set(cg, 0); | 452 | __put_css_set(cset, 0); |
| 435 | } | 453 | } |
| 436 | 454 | ||
| 437 | static inline void put_css_set_taskexit(struct css_set *cg) | 455 | static inline void put_css_set_taskexit(struct css_set *cset) |
| 438 | { | 456 | { |
| 439 | __put_css_set(cg, 1); | 457 | __put_css_set(cset, 1); |
| 440 | } | 458 | } |
| 441 | 459 | ||
| 442 | /* | 460 | /** |
| 443 | * compare_css_sets - helper function for find_existing_css_set(). | 461 | * compare_css_sets - helper function for find_existing_css_set(). |
| 444 | * @cg: candidate css_set being tested | 462 | * @cset: candidate css_set being tested |
| 445 | * @old_cg: existing css_set for a task | 463 | * @old_cset: existing css_set for a task |
| 446 | * @new_cgrp: cgroup that's being entered by the task | 464 | * @new_cgrp: cgroup that's being entered by the task |
| 447 | * @template: desired set of css pointers in css_set (pre-calculated) | 465 | * @template: desired set of css pointers in css_set (pre-calculated) |
| 448 | * | 466 | * |
| 449 | * Returns true if "cg" matches "old_cg" except for the hierarchy | 467 | * Returns true if "cg" matches "old_cg" except for the hierarchy |
| 450 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". | 468 | * which "new_cgrp" belongs to, for which it should match "new_cgrp". |
| 451 | */ | 469 | */ |
| 452 | static bool compare_css_sets(struct css_set *cg, | 470 | static bool compare_css_sets(struct css_set *cset, |
| 453 | struct css_set *old_cg, | 471 | struct css_set *old_cset, |
| 454 | struct cgroup *new_cgrp, | 472 | struct cgroup *new_cgrp, |
| 455 | struct cgroup_subsys_state *template[]) | 473 | struct cgroup_subsys_state *template[]) |
| 456 | { | 474 | { |
| 457 | struct list_head *l1, *l2; | 475 | struct list_head *l1, *l2; |
| 458 | 476 | ||
| 459 | if (memcmp(template, cg->subsys, sizeof(cg->subsys))) { | 477 | if (memcmp(template, cset->subsys, sizeof(cset->subsys))) { |
| 460 | /* Not all subsystems matched */ | 478 | /* Not all subsystems matched */ |
| 461 | return false; | 479 | return false; |
| 462 | } | 480 | } |
| @@ -470,28 +488,28 @@ static bool compare_css_sets(struct css_set *cg, | |||
| 470 | * candidates. | 488 | * candidates. |
| 471 | */ | 489 | */ |
| 472 | 490 | ||
| 473 | l1 = &cg->cg_links; | 491 | l1 = &cset->cgrp_links; |
| 474 | l2 = &old_cg->cg_links; | 492 | l2 = &old_cset->cgrp_links; |
| 475 | while (1) { | 493 | while (1) { |
| 476 | struct cg_cgroup_link *cgl1, *cgl2; | 494 | struct cgrp_cset_link *link1, *link2; |
| 477 | struct cgroup *cg1, *cg2; | 495 | struct cgroup *cgrp1, *cgrp2; |
| 478 | 496 | ||
| 479 | l1 = l1->next; | 497 | l1 = l1->next; |
| 480 | l2 = l2->next; | 498 | l2 = l2->next; |
| 481 | /* See if we reached the end - both lists are equal length. */ | 499 | /* See if we reached the end - both lists are equal length. */ |
| 482 | if (l1 == &cg->cg_links) { | 500 | if (l1 == &cset->cgrp_links) { |
| 483 | BUG_ON(l2 != &old_cg->cg_links); | 501 | BUG_ON(l2 != &old_cset->cgrp_links); |
| 484 | break; | 502 | break; |
| 485 | } else { | 503 | } else { |
| 486 | BUG_ON(l2 == &old_cg->cg_links); | 504 | BUG_ON(l2 == &old_cset->cgrp_links); |
| 487 | } | 505 | } |
| 488 | /* Locate the cgroups associated with these links. */ | 506 | /* Locate the cgroups associated with these links. */ |
| 489 | cgl1 = list_entry(l1, struct cg_cgroup_link, cg_link_list); | 507 | link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link); |
| 490 | cgl2 = list_entry(l2, struct cg_cgroup_link, cg_link_list); | 508 | link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link); |
| 491 | cg1 = cgl1->cgrp; | 509 | cgrp1 = link1->cgrp; |
| 492 | cg2 = cgl2->cgrp; | 510 | cgrp2 = link2->cgrp; |
| 493 | /* Hierarchies should be linked in the same order. */ | 511 | /* Hierarchies should be linked in the same order. */ |
| 494 | BUG_ON(cg1->root != cg2->root); | 512 | BUG_ON(cgrp1->root != cgrp2->root); |
| 495 | 513 | ||
| 496 | /* | 514 | /* |
| 497 | * If this hierarchy is the hierarchy of the cgroup | 515 | * If this hierarchy is the hierarchy of the cgroup |
| @@ -500,46 +518,39 @@ static bool compare_css_sets(struct css_set *cg, | |||
| 500 | * hierarchy, then this css_set should point to the | 518 | * hierarchy, then this css_set should point to the |
| 501 | * same cgroup as the old css_set. | 519 | * same cgroup as the old css_set. |
| 502 | */ | 520 | */ |
| 503 | if (cg1->root == new_cgrp->root) { | 521 | if (cgrp1->root == new_cgrp->root) { |
| 504 | if (cg1 != new_cgrp) | 522 | if (cgrp1 != new_cgrp) |
| 505 | return false; | 523 | return false; |
| 506 | } else { | 524 | } else { |
| 507 | if (cg1 != cg2) | 525 | if (cgrp1 != cgrp2) |
| 508 | return false; | 526 | return false; |
| 509 | } | 527 | } |
| 510 | } | 528 | } |
| 511 | return true; | 529 | return true; |
| 512 | } | 530 | } |
| 513 | 531 | ||
| 514 | /* | 532 | /** |
| 515 | * find_existing_css_set() is a helper for | 533 | * find_existing_css_set - init css array and find the matching css_set |
| 516 | * find_css_set(), and checks to see whether an existing | 534 | * @old_cset: the css_set that we're using before the cgroup transition |
| 517 | * css_set is suitable. | 535 | * @cgrp: the cgroup that we're moving into |
| 518 | * | 536 | * @template: out param for the new set of csses, should be clear on entry |
| 519 | * oldcg: the cgroup group that we're using before the cgroup | ||
| 520 | * transition | ||
| 521 | * | ||
| 522 | * cgrp: the cgroup that we're moving into | ||
| 523 | * | ||
| 524 | * template: location in which to build the desired set of subsystem | ||
| 525 | * state objects for the new cgroup group | ||
| 526 | */ | 537 | */ |
| 527 | static struct css_set *find_existing_css_set( | 538 | static struct css_set *find_existing_css_set(struct css_set *old_cset, |
| 528 | struct css_set *oldcg, | 539 | struct cgroup *cgrp, |
| 529 | struct cgroup *cgrp, | 540 | struct cgroup_subsys_state *template[]) |
| 530 | struct cgroup_subsys_state *template[]) | ||
| 531 | { | 541 | { |
| 532 | int i; | ||
| 533 | struct cgroupfs_root *root = cgrp->root; | 542 | struct cgroupfs_root *root = cgrp->root; |
| 534 | struct css_set *cg; | 543 | struct cgroup_subsys *ss; |
| 544 | struct css_set *cset; | ||
| 535 | unsigned long key; | 545 | unsigned long key; |
| 546 | int i; | ||
| 536 | 547 | ||
| 537 | /* | 548 | /* |
| 538 | * Build the set of subsystem state objects that we want to see in the | 549 | * Build the set of subsystem state objects that we want to see in the |
| 539 | * new css_set. while subsystems can change globally, the entries here | 550 | * new css_set. while subsystems can change globally, the entries here |
| 540 | * won't change, so no need for locking. | 551 | * won't change, so no need for locking. |
| 541 | */ | 552 | */ |
| 542 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 553 | for_each_subsys(ss, i) { |
| 543 | if (root->subsys_mask & (1UL << i)) { | 554 | if (root->subsys_mask & (1UL << i)) { |
| 544 | /* Subsystem is in this hierarchy. So we want | 555 | /* Subsystem is in this hierarchy. So we want |
| 545 | * the subsystem state from the new | 556 | * the subsystem state from the new |
| @@ -548,148 +559,152 @@ static struct css_set *find_existing_css_set( | |||
| 548 | } else { | 559 | } else { |
| 549 | /* Subsystem is not in this hierarchy, so we | 560 | /* Subsystem is not in this hierarchy, so we |
| 550 | * don't want to change the subsystem state */ | 561 | * don't want to change the subsystem state */ |
| 551 | template[i] = oldcg->subsys[i]; | 562 | template[i] = old_cset->subsys[i]; |
| 552 | } | 563 | } |
| 553 | } | 564 | } |
| 554 | 565 | ||
| 555 | key = css_set_hash(template); | 566 | key = css_set_hash(template); |
| 556 | hash_for_each_possible(css_set_table, cg, hlist, key) { | 567 | hash_for_each_possible(css_set_table, cset, hlist, key) { |
| 557 | if (!compare_css_sets(cg, oldcg, cgrp, template)) | 568 | if (!compare_css_sets(cset, old_cset, cgrp, template)) |
| 558 | continue; | 569 | continue; |
| 559 | 570 | ||
| 560 | /* This css_set matches what we need */ | 571 | /* This css_set matches what we need */ |
| 561 | return cg; | 572 | return cset; |
| 562 | } | 573 | } |
| 563 | 574 | ||
| 564 | /* No existing cgroup group matched */ | 575 | /* No existing cgroup group matched */ |
| 565 | return NULL; | 576 | return NULL; |
| 566 | } | 577 | } |
| 567 | 578 | ||
| 568 | static void free_cg_links(struct list_head *tmp) | 579 | static void free_cgrp_cset_links(struct list_head *links_to_free) |
| 569 | { | 580 | { |
| 570 | struct cg_cgroup_link *link; | 581 | struct cgrp_cset_link *link, *tmp_link; |
| 571 | struct cg_cgroup_link *saved_link; | ||
| 572 | 582 | ||
| 573 | list_for_each_entry_safe(link, saved_link, tmp, cgrp_link_list) { | 583 | list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) { |
| 574 | list_del(&link->cgrp_link_list); | 584 | list_del(&link->cset_link); |
| 575 | kfree(link); | 585 | kfree(link); |
| 576 | } | 586 | } |
| 577 | } | 587 | } |
| 578 | 588 | ||
| 579 | /* | 589 | /** |
| 580 | * allocate_cg_links() allocates "count" cg_cgroup_link structures | 590 | * allocate_cgrp_cset_links - allocate cgrp_cset_links |
| 581 | * and chains them on tmp through their cgrp_link_list fields. Returns 0 on | 591 | * @count: the number of links to allocate |
| 582 | * success or a negative error | 592 | * @tmp_links: list_head the allocated links are put on |
| 593 | * | ||
| 594 | * Allocate @count cgrp_cset_link structures and chain them on @tmp_links | ||
| 595 | * through ->cset_link. Returns 0 on success or -errno. | ||
| 583 | */ | 596 | */ |
| 584 | static int allocate_cg_links(int count, struct list_head *tmp) | 597 | static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links) |
| 585 | { | 598 | { |
| 586 | struct cg_cgroup_link *link; | 599 | struct cgrp_cset_link *link; |
| 587 | int i; | 600 | int i; |
| 588 | INIT_LIST_HEAD(tmp); | 601 | |
| 602 | INIT_LIST_HEAD(tmp_links); | ||
| 603 | |||
| 589 | for (i = 0; i < count; i++) { | 604 | for (i = 0; i < count; i++) { |
| 590 | link = kmalloc(sizeof(*link), GFP_KERNEL); | 605 | link = kzalloc(sizeof(*link), GFP_KERNEL); |
| 591 | if (!link) { | 606 | if (!link) { |
| 592 | free_cg_links(tmp); | 607 | free_cgrp_cset_links(tmp_links); |
| 593 | return -ENOMEM; | 608 | return -ENOMEM; |
| 594 | } | 609 | } |
| 595 | list_add(&link->cgrp_link_list, tmp); | 610 | list_add(&link->cset_link, tmp_links); |
| 596 | } | 611 | } |
| 597 | return 0; | 612 | return 0; |
| 598 | } | 613 | } |
| 599 | 614 | ||
| 600 | /** | 615 | /** |
| 601 | * link_css_set - a helper function to link a css_set to a cgroup | 616 | * link_css_set - a helper function to link a css_set to a cgroup |
| 602 | * @tmp_cg_links: cg_cgroup_link objects allocated by allocate_cg_links() | 617 | * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links() |
| 603 | * @cg: the css_set to be linked | 618 | * @cset: the css_set to be linked |
| 604 | * @cgrp: the destination cgroup | 619 | * @cgrp: the destination cgroup |
| 605 | */ | 620 | */ |
| 606 | static void link_css_set(struct list_head *tmp_cg_links, | 621 | static void link_css_set(struct list_head *tmp_links, struct css_set *cset, |
| 607 | struct css_set *cg, struct cgroup *cgrp) | 622 | struct cgroup *cgrp) |
| 608 | { | 623 | { |
| 609 | struct cg_cgroup_link *link; | 624 | struct cgrp_cset_link *link; |
| 610 | 625 | ||
| 611 | BUG_ON(list_empty(tmp_cg_links)); | 626 | BUG_ON(list_empty(tmp_links)); |
| 612 | link = list_first_entry(tmp_cg_links, struct cg_cgroup_link, | 627 | link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link); |
| 613 | cgrp_link_list); | 628 | link->cset = cset; |
| 614 | link->cg = cg; | ||
| 615 | link->cgrp = cgrp; | 629 | link->cgrp = cgrp; |
| 616 | atomic_inc(&cgrp->count); | 630 | list_move(&link->cset_link, &cgrp->cset_links); |
| 617 | list_move(&link->cgrp_link_list, &cgrp->css_sets); | ||
| 618 | /* | 631 | /* |
| 619 | * Always add links to the tail of the list so that the list | 632 | * Always add links to the tail of the list so that the list |
| 620 | * is sorted by order of hierarchy creation | 633 | * is sorted by order of hierarchy creation |
| 621 | */ | 634 | */ |
| 622 | list_add_tail(&link->cg_link_list, &cg->cg_links); | 635 | list_add_tail(&link->cgrp_link, &cset->cgrp_links); |
| 623 | } | 636 | } |
| 624 | 637 | ||
| 625 | /* | 638 | /** |
| 626 | * find_css_set() takes an existing cgroup group and a | 639 | * find_css_set - return a new css_set with one cgroup updated |
| 627 | * cgroup object, and returns a css_set object that's | 640 | * @old_cset: the baseline css_set |
| 628 | * equivalent to the old group, but with the given cgroup | 641 | * @cgrp: the cgroup to be updated |
| 629 | * substituted into the appropriate hierarchy. Must be called with | 642 | * |
| 630 | * cgroup_mutex held | 643 | * Return a new css_set that's equivalent to @old_cset, but with @cgrp |
| 644 | * substituted into the appropriate hierarchy. | ||
| 631 | */ | 645 | */ |
| 632 | static struct css_set *find_css_set( | 646 | static struct css_set *find_css_set(struct css_set *old_cset, |
| 633 | struct css_set *oldcg, struct cgroup *cgrp) | 647 | struct cgroup *cgrp) |
| 634 | { | 648 | { |
| 635 | struct css_set *res; | 649 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { }; |
| 636 | struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT]; | 650 | struct css_set *cset; |
| 637 | 651 | struct list_head tmp_links; | |
| 638 | struct list_head tmp_cg_links; | 652 | struct cgrp_cset_link *link; |
| 639 | |||
| 640 | struct cg_cgroup_link *link; | ||
| 641 | unsigned long key; | 653 | unsigned long key; |
| 642 | 654 | ||
| 655 | lockdep_assert_held(&cgroup_mutex); | ||
| 656 | |||
| 643 | /* First see if we already have a cgroup group that matches | 657 | /* First see if we already have a cgroup group that matches |
| 644 | * the desired set */ | 658 | * the desired set */ |
| 645 | read_lock(&css_set_lock); | 659 | read_lock(&css_set_lock); |
| 646 | res = find_existing_css_set(oldcg, cgrp, template); | 660 | cset = find_existing_css_set(old_cset, cgrp, template); |
| 647 | if (res) | 661 | if (cset) |
| 648 | get_css_set(res); | 662 | get_css_set(cset); |
| 649 | read_unlock(&css_set_lock); | 663 | read_unlock(&css_set_lock); |
| 650 | 664 | ||
| 651 | if (res) | 665 | if (cset) |
| 652 | return res; | 666 | return cset; |
| 653 | 667 | ||
| 654 | res = kmalloc(sizeof(*res), GFP_KERNEL); | 668 | cset = kzalloc(sizeof(*cset), GFP_KERNEL); |
| 655 | if (!res) | 669 | if (!cset) |
| 656 | return NULL; | 670 | return NULL; |
| 657 | 671 | ||
| 658 | /* Allocate all the cg_cgroup_link objects that we'll need */ | 672 | /* Allocate all the cgrp_cset_link objects that we'll need */ |
| 659 | if (allocate_cg_links(root_count, &tmp_cg_links) < 0) { | 673 | if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) { |
| 660 | kfree(res); | 674 | kfree(cset); |
| 661 | return NULL; | 675 | return NULL; |
| 662 | } | 676 | } |
| 663 | 677 | ||
| 664 | atomic_set(&res->refcount, 1); | 678 | atomic_set(&cset->refcount, 1); |
| 665 | INIT_LIST_HEAD(&res->cg_links); | 679 | INIT_LIST_HEAD(&cset->cgrp_links); |
| 666 | INIT_LIST_HEAD(&res->tasks); | 680 | INIT_LIST_HEAD(&cset->tasks); |
| 667 | INIT_HLIST_NODE(&res->hlist); | 681 | INIT_HLIST_NODE(&cset->hlist); |
| 668 | 682 | ||
| 669 | /* Copy the set of subsystem state objects generated in | 683 | /* Copy the set of subsystem state objects generated in |
| 670 | * find_existing_css_set() */ | 684 | * find_existing_css_set() */ |
| 671 | memcpy(res->subsys, template, sizeof(res->subsys)); | 685 | memcpy(cset->subsys, template, sizeof(cset->subsys)); |
| 672 | 686 | ||
| 673 | write_lock(&css_set_lock); | 687 | write_lock(&css_set_lock); |
| 674 | /* Add reference counts and links from the new css_set. */ | 688 | /* Add reference counts and links from the new css_set. */ |
| 675 | list_for_each_entry(link, &oldcg->cg_links, cg_link_list) { | 689 | list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) { |
| 676 | struct cgroup *c = link->cgrp; | 690 | struct cgroup *c = link->cgrp; |
| 691 | |||
| 677 | if (c->root == cgrp->root) | 692 | if (c->root == cgrp->root) |
| 678 | c = cgrp; | 693 | c = cgrp; |
| 679 | link_css_set(&tmp_cg_links, res, c); | 694 | link_css_set(&tmp_links, cset, c); |
| 680 | } | 695 | } |
| 681 | 696 | ||
| 682 | BUG_ON(!list_empty(&tmp_cg_links)); | 697 | BUG_ON(!list_empty(&tmp_links)); |
| 683 | 698 | ||
| 684 | css_set_count++; | 699 | css_set_count++; |
| 685 | 700 | ||
| 686 | /* Add this cgroup group to the hash table */ | 701 | /* Add this cgroup group to the hash table */ |
| 687 | key = css_set_hash(res->subsys); | 702 | key = css_set_hash(cset->subsys); |
| 688 | hash_add(css_set_table, &res->hlist, key); | 703 | hash_add(css_set_table, &cset->hlist, key); |
| 689 | 704 | ||
| 690 | write_unlock(&css_set_lock); | 705 | write_unlock(&css_set_lock); |
| 691 | 706 | ||
| 692 | return res; | 707 | return cset; |
| 693 | } | 708 | } |
| 694 | 709 | ||
| 695 | /* | 710 | /* |
| @@ -699,7 +714,7 @@ static struct css_set *find_css_set( | |||
| 699 | static struct cgroup *task_cgroup_from_root(struct task_struct *task, | 714 | static struct cgroup *task_cgroup_from_root(struct task_struct *task, |
| 700 | struct cgroupfs_root *root) | 715 | struct cgroupfs_root *root) |
| 701 | { | 716 | { |
| 702 | struct css_set *css; | 717 | struct css_set *cset; |
| 703 | struct cgroup *res = NULL; | 718 | struct cgroup *res = NULL; |
| 704 | 719 | ||
| 705 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 720 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| @@ -709,13 +724,15 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
| 709 | * task can't change groups, so the only thing that can happen | 724 | * task can't change groups, so the only thing that can happen |
| 710 | * is that it exits and its css is set back to init_css_set. | 725 | * is that it exits and its css is set back to init_css_set. |
| 711 | */ | 726 | */ |
| 712 | css = task->cgroups; | 727 | cset = task_css_set(task); |
| 713 | if (css == &init_css_set) { | 728 | if (cset == &init_css_set) { |
| 714 | res = &root->top_cgroup; | 729 | res = &root->top_cgroup; |
| 715 | } else { | 730 | } else { |
| 716 | struct cg_cgroup_link *link; | 731 | struct cgrp_cset_link *link; |
| 717 | list_for_each_entry(link, &css->cg_links, cg_link_list) { | 732 | |
| 733 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { | ||
| 718 | struct cgroup *c = link->cgrp; | 734 | struct cgroup *c = link->cgrp; |
| 735 | |||
| 719 | if (c->root == root) { | 736 | if (c->root == root) { |
| 720 | res = c; | 737 | res = c; |
| 721 | break; | 738 | break; |
| @@ -785,7 +802,6 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, | |||
| 785 | */ | 802 | */ |
| 786 | 803 | ||
| 787 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); | 804 | static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); |
| 788 | static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); | ||
| 789 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); | 805 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); |
| 790 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | 806 | static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, |
| 791 | unsigned long subsys_mask); | 807 | unsigned long subsys_mask); |
| @@ -828,14 +844,14 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry) | |||
| 828 | 844 | ||
| 829 | static void cgroup_free_fn(struct work_struct *work) | 845 | static void cgroup_free_fn(struct work_struct *work) |
| 830 | { | 846 | { |
| 831 | struct cgroup *cgrp = container_of(work, struct cgroup, free_work); | 847 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); |
| 832 | struct cgroup_subsys *ss; | 848 | struct cgroup_subsys *ss; |
| 833 | 849 | ||
| 834 | mutex_lock(&cgroup_mutex); | 850 | mutex_lock(&cgroup_mutex); |
| 835 | /* | 851 | /* |
| 836 | * Release the subsystem state objects. | 852 | * Release the subsystem state objects. |
| 837 | */ | 853 | */ |
| 838 | for_each_subsys(cgrp->root, ss) | 854 | for_each_root_subsys(cgrp->root, ss) |
| 839 | ss->css_free(cgrp); | 855 | ss->css_free(cgrp); |
| 840 | 856 | ||
| 841 | cgrp->root->number_of_cgroups--; | 857 | cgrp->root->number_of_cgroups--; |
| @@ -873,7 +889,8 @@ static void cgroup_free_rcu(struct rcu_head *head) | |||
| 873 | { | 889 | { |
| 874 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | 890 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); |
| 875 | 891 | ||
| 876 | schedule_work(&cgrp->free_work); | 892 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); |
| 893 | schedule_work(&cgrp->destroy_work); | ||
| 877 | } | 894 | } |
| 878 | 895 | ||
| 879 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 896 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
| @@ -882,7 +899,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) | |||
| 882 | if (S_ISDIR(inode->i_mode)) { | 899 | if (S_ISDIR(inode->i_mode)) { |
| 883 | struct cgroup *cgrp = dentry->d_fsdata; | 900 | struct cgroup *cgrp = dentry->d_fsdata; |
| 884 | 901 | ||
| 885 | BUG_ON(!(cgroup_is_removed(cgrp))); | 902 | BUG_ON(!(cgroup_is_dead(cgrp))); |
| 886 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); | 903 | call_rcu(&cgrp->rcu_head, cgroup_free_rcu); |
| 887 | } else { | 904 | } else { |
| 888 | struct cfent *cfe = __d_cfe(dentry); | 905 | struct cfent *cfe = __d_cfe(dentry); |
| @@ -950,7 +967,7 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files, | |||
| 950 | struct cgroup *cgrp = __d_cgrp(dir); | 967 | struct cgroup *cgrp = __d_cgrp(dir); |
| 951 | struct cgroup_subsys *ss; | 968 | struct cgroup_subsys *ss; |
| 952 | 969 | ||
| 953 | for_each_subsys(cgrp->root, ss) { | 970 | for_each_root_subsys(cgrp->root, ss) { |
| 954 | struct cftype_set *set; | 971 | struct cftype_set *set; |
| 955 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 972 | if (!test_bit(ss->subsys_id, &subsys_mask)) |
| 956 | continue; | 973 | continue; |
| @@ -988,30 +1005,23 @@ static void cgroup_d_remove_dir(struct dentry *dentry) | |||
| 988 | * returns an error, no reference counts are touched. | 1005 | * returns an error, no reference counts are touched. |
| 989 | */ | 1006 | */ |
| 990 | static int rebind_subsystems(struct cgroupfs_root *root, | 1007 | static int rebind_subsystems(struct cgroupfs_root *root, |
| 991 | unsigned long final_subsys_mask) | 1008 | unsigned long added_mask, unsigned removed_mask) |
| 992 | { | 1009 | { |
| 993 | unsigned long added_mask, removed_mask; | ||
| 994 | struct cgroup *cgrp = &root->top_cgroup; | 1010 | struct cgroup *cgrp = &root->top_cgroup; |
| 1011 | struct cgroup_subsys *ss; | ||
| 995 | int i; | 1012 | int i; |
| 996 | 1013 | ||
| 997 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 1014 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| 998 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); | 1015 | BUG_ON(!mutex_is_locked(&cgroup_root_mutex)); |
| 999 | 1016 | ||
| 1000 | removed_mask = root->actual_subsys_mask & ~final_subsys_mask; | ||
| 1001 | added_mask = final_subsys_mask & ~root->actual_subsys_mask; | ||
| 1002 | /* Check that any added subsystems are currently free */ | 1017 | /* Check that any added subsystems are currently free */ |
| 1003 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1018 | for_each_subsys(ss, i) { |
| 1004 | unsigned long bit = 1UL << i; | 1019 | unsigned long bit = 1UL << i; |
| 1005 | struct cgroup_subsys *ss = subsys[i]; | 1020 | |
| 1006 | if (!(bit & added_mask)) | 1021 | if (!(bit & added_mask)) |
| 1007 | continue; | 1022 | continue; |
| 1008 | /* | 1023 | |
| 1009 | * Nobody should tell us to do a subsys that doesn't exist: | 1024 | if (ss->root != &cgroup_dummy_root) { |
| 1010 | * parse_cgroupfs_options should catch that case and refcounts | ||
| 1011 | * ensure that subsystems won't disappear once selected. | ||
| 1012 | */ | ||
| 1013 | BUG_ON(ss == NULL); | ||
| 1014 | if (ss->root != &rootnode) { | ||
| 1015 | /* Subsystem isn't free */ | 1025 | /* Subsystem isn't free */ |
| 1016 | return -EBUSY; | 1026 | return -EBUSY; |
| 1017 | } | 1027 | } |
| @@ -1025,38 +1035,41 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1025 | return -EBUSY; | 1035 | return -EBUSY; |
| 1026 | 1036 | ||
| 1027 | /* Process each subsystem */ | 1037 | /* Process each subsystem */ |
| 1028 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1038 | for_each_subsys(ss, i) { |
| 1029 | struct cgroup_subsys *ss = subsys[i]; | ||
| 1030 | unsigned long bit = 1UL << i; | 1039 | unsigned long bit = 1UL << i; |
| 1040 | |||
| 1031 | if (bit & added_mask) { | 1041 | if (bit & added_mask) { |
| 1032 | /* We're binding this subsystem to this hierarchy */ | 1042 | /* We're binding this subsystem to this hierarchy */ |
| 1033 | BUG_ON(ss == NULL); | ||
| 1034 | BUG_ON(cgrp->subsys[i]); | 1043 | BUG_ON(cgrp->subsys[i]); |
| 1035 | BUG_ON(!dummytop->subsys[i]); | 1044 | BUG_ON(!cgroup_dummy_top->subsys[i]); |
| 1036 | BUG_ON(dummytop->subsys[i]->cgroup != dummytop); | 1045 | BUG_ON(cgroup_dummy_top->subsys[i]->cgroup != cgroup_dummy_top); |
| 1037 | cgrp->subsys[i] = dummytop->subsys[i]; | 1046 | |
| 1047 | cgrp->subsys[i] = cgroup_dummy_top->subsys[i]; | ||
| 1038 | cgrp->subsys[i]->cgroup = cgrp; | 1048 | cgrp->subsys[i]->cgroup = cgrp; |
| 1039 | list_move(&ss->sibling, &root->subsys_list); | 1049 | list_move(&ss->sibling, &root->subsys_list); |
| 1040 | ss->root = root; | 1050 | ss->root = root; |
| 1041 | if (ss->bind) | 1051 | if (ss->bind) |
| 1042 | ss->bind(cgrp); | 1052 | ss->bind(cgrp); |
| 1053 | |||
| 1043 | /* refcount was already taken, and we're keeping it */ | 1054 | /* refcount was already taken, and we're keeping it */ |
| 1055 | root->subsys_mask |= bit; | ||
| 1044 | } else if (bit & removed_mask) { | 1056 | } else if (bit & removed_mask) { |
| 1045 | /* We're removing this subsystem */ | 1057 | /* We're removing this subsystem */ |
| 1046 | BUG_ON(ss == NULL); | 1058 | BUG_ON(cgrp->subsys[i] != cgroup_dummy_top->subsys[i]); |
| 1047 | BUG_ON(cgrp->subsys[i] != dummytop->subsys[i]); | ||
| 1048 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); | 1059 | BUG_ON(cgrp->subsys[i]->cgroup != cgrp); |
| 1060 | |||
| 1049 | if (ss->bind) | 1061 | if (ss->bind) |
| 1050 | ss->bind(dummytop); | 1062 | ss->bind(cgroup_dummy_top); |
| 1051 | dummytop->subsys[i]->cgroup = dummytop; | 1063 | cgroup_dummy_top->subsys[i]->cgroup = cgroup_dummy_top; |
| 1052 | cgrp->subsys[i] = NULL; | 1064 | cgrp->subsys[i] = NULL; |
| 1053 | subsys[i]->root = &rootnode; | 1065 | cgroup_subsys[i]->root = &cgroup_dummy_root; |
| 1054 | list_move(&ss->sibling, &rootnode.subsys_list); | 1066 | list_move(&ss->sibling, &cgroup_dummy_root.subsys_list); |
| 1067 | |||
| 1055 | /* subsystem is now free - drop reference on module */ | 1068 | /* subsystem is now free - drop reference on module */ |
| 1056 | module_put(ss->module); | 1069 | module_put(ss->module); |
| 1057 | } else if (bit & final_subsys_mask) { | 1070 | root->subsys_mask &= ~bit; |
| 1071 | } else if (bit & root->subsys_mask) { | ||
| 1058 | /* Subsystem state should already exist */ | 1072 | /* Subsystem state should already exist */ |
| 1059 | BUG_ON(ss == NULL); | ||
| 1060 | BUG_ON(!cgrp->subsys[i]); | 1073 | BUG_ON(!cgrp->subsys[i]); |
| 1061 | /* | 1074 | /* |
| 1062 | * a refcount was taken, but we already had one, so | 1075 | * a refcount was taken, but we already had one, so |
| @@ -1071,7 +1084,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, | |||
| 1071 | BUG_ON(cgrp->subsys[i]); | 1084 | BUG_ON(cgrp->subsys[i]); |
| 1072 | } | 1085 | } |
| 1073 | } | 1086 | } |
| 1074 | root->subsys_mask = root->actual_subsys_mask = final_subsys_mask; | 1087 | |
| 1088 | /* | ||
| 1089 | * Mark @root has finished binding subsystems. @root->subsys_mask | ||
| 1090 | * now matches the bound subsystems. | ||
| 1091 | */ | ||
| 1092 | root->flags |= CGRP_ROOT_SUBSYS_BOUND; | ||
| 1075 | 1093 | ||
| 1076 | return 0; | 1094 | return 0; |
| 1077 | } | 1095 | } |
| @@ -1082,7 +1100,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) | |||
| 1082 | struct cgroup_subsys *ss; | 1100 | struct cgroup_subsys *ss; |
| 1083 | 1101 | ||
| 1084 | mutex_lock(&cgroup_root_mutex); | 1102 | mutex_lock(&cgroup_root_mutex); |
| 1085 | for_each_subsys(root, ss) | 1103 | for_each_root_subsys(root, ss) |
| 1086 | seq_printf(seq, ",%s", ss->name); | 1104 | seq_printf(seq, ",%s", ss->name); |
| 1087 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) | 1105 | if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) |
| 1088 | seq_puts(seq, ",sane_behavior"); | 1106 | seq_puts(seq, ",sane_behavior"); |
| @@ -1114,18 +1132,19 @@ struct cgroup_sb_opts { | |||
| 1114 | }; | 1132 | }; |
| 1115 | 1133 | ||
| 1116 | /* | 1134 | /* |
| 1117 | * Convert a hierarchy specifier into a bitmask of subsystems and flags. Call | 1135 | * Convert a hierarchy specifier into a bitmask of subsystems and |
| 1118 | * with cgroup_mutex held to protect the subsys[] array. This function takes | 1136 | * flags. Call with cgroup_mutex held to protect the cgroup_subsys[] |
| 1119 | * refcounts on subsystems to be used, unless it returns error, in which case | 1137 | * array. This function takes refcounts on subsystems to be used, unless it |
| 1120 | * no refcounts are taken. | 1138 | * returns error, in which case no refcounts are taken. |
| 1121 | */ | 1139 | */ |
| 1122 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | 1140 | static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) |
| 1123 | { | 1141 | { |
| 1124 | char *token, *o = data; | 1142 | char *token, *o = data; |
| 1125 | bool all_ss = false, one_ss = false; | 1143 | bool all_ss = false, one_ss = false; |
| 1126 | unsigned long mask = (unsigned long)-1; | 1144 | unsigned long mask = (unsigned long)-1; |
| 1127 | int i; | ||
| 1128 | bool module_pin_failed = false; | 1145 | bool module_pin_failed = false; |
| 1146 | struct cgroup_subsys *ss; | ||
| 1147 | int i; | ||
| 1129 | 1148 | ||
| 1130 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); | 1149 | BUG_ON(!mutex_is_locked(&cgroup_mutex)); |
| 1131 | 1150 | ||
| @@ -1202,10 +1221,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1202 | continue; | 1221 | continue; |
| 1203 | } | 1222 | } |
| 1204 | 1223 | ||
| 1205 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1224 | for_each_subsys(ss, i) { |
| 1206 | struct cgroup_subsys *ss = subsys[i]; | ||
| 1207 | if (ss == NULL) | ||
| 1208 | continue; | ||
| 1209 | if (strcmp(token, ss->name)) | 1225 | if (strcmp(token, ss->name)) |
| 1210 | continue; | 1226 | continue; |
| 1211 | if (ss->disabled) | 1227 | if (ss->disabled) |
| @@ -1228,16 +1244,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1228 | * otherwise if 'none', 'name=' and a subsystem name options | 1244 | * otherwise if 'none', 'name=' and a subsystem name options |
| 1229 | * were not specified, let's default to 'all' | 1245 | * were not specified, let's default to 'all' |
| 1230 | */ | 1246 | */ |
| 1231 | if (all_ss || (!one_ss && !opts->none && !opts->name)) { | 1247 | if (all_ss || (!one_ss && !opts->none && !opts->name)) |
| 1232 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1248 | for_each_subsys(ss, i) |
| 1233 | struct cgroup_subsys *ss = subsys[i]; | 1249 | if (!ss->disabled) |
| 1234 | if (ss == NULL) | 1250 | set_bit(i, &opts->subsys_mask); |
| 1235 | continue; | ||
| 1236 | if (ss->disabled) | ||
| 1237 | continue; | ||
| 1238 | set_bit(i, &opts->subsys_mask); | ||
| 1239 | } | ||
| 1240 | } | ||
| 1241 | 1251 | ||
| 1242 | /* Consistency checks */ | 1252 | /* Consistency checks */ |
| 1243 | 1253 | ||
| @@ -1281,12 +1291,10 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1281 | * take duplicate reference counts on a subsystem that's already used, | 1291 | * take duplicate reference counts on a subsystem that's already used, |
| 1282 | * but rebind_subsystems handles this case. | 1292 | * but rebind_subsystems handles this case. |
| 1283 | */ | 1293 | */ |
| 1284 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 1294 | for_each_subsys(ss, i) { |
| 1285 | unsigned long bit = 1UL << i; | 1295 | if (!(opts->subsys_mask & (1UL << i))) |
| 1286 | |||
| 1287 | if (!(bit & opts->subsys_mask)) | ||
| 1288 | continue; | 1296 | continue; |
| 1289 | if (!try_module_get(subsys[i]->module)) { | 1297 | if (!try_module_get(cgroup_subsys[i]->module)) { |
| 1290 | module_pin_failed = true; | 1298 | module_pin_failed = true; |
| 1291 | break; | 1299 | break; |
| 1292 | } | 1300 | } |
| @@ -1303,7 +1311,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1303 | 1311 | ||
| 1304 | if (!(bit & opts->subsys_mask)) | 1312 | if (!(bit & opts->subsys_mask)) |
| 1305 | continue; | 1313 | continue; |
| 1306 | module_put(subsys[i]->module); | 1314 | module_put(cgroup_subsys[i]->module); |
| 1307 | } | 1315 | } |
| 1308 | return -ENOENT; | 1316 | return -ENOENT; |
| 1309 | } | 1317 | } |
| @@ -1313,14 +1321,14 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) | |||
| 1313 | 1321 | ||
| 1314 | static void drop_parsed_module_refcounts(unsigned long subsys_mask) | 1322 | static void drop_parsed_module_refcounts(unsigned long subsys_mask) |
| 1315 | { | 1323 | { |
| 1324 | struct cgroup_subsys *ss; | ||
| 1316 | int i; | 1325 | int i; |
| 1317 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 1318 | unsigned long bit = 1UL << i; | ||
| 1319 | 1326 | ||
| 1320 | if (!(bit & subsys_mask)) | 1327 | mutex_lock(&cgroup_mutex); |
| 1321 | continue; | 1328 | for_each_subsys(ss, i) |
| 1322 | module_put(subsys[i]->module); | 1329 | if (subsys_mask & (1UL << i)) |
| 1323 | } | 1330 | module_put(cgroup_subsys[i]->module); |
| 1331 | mutex_unlock(&cgroup_mutex); | ||
| 1324 | } | 1332 | } |
| 1325 | 1333 | ||
| 1326 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) | 1334 | static int cgroup_remount(struct super_block *sb, int *flags, char *data) |
| @@ -1345,7 +1353,7 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1345 | if (ret) | 1353 | if (ret) |
| 1346 | goto out_unlock; | 1354 | goto out_unlock; |
| 1347 | 1355 | ||
| 1348 | if (opts.subsys_mask != root->actual_subsys_mask || opts.release_agent) | 1356 | if (opts.subsys_mask != root->subsys_mask || opts.release_agent) |
| 1349 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", | 1357 | pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n", |
| 1350 | task_tgid_nr(current), current->comm); | 1358 | task_tgid_nr(current), current->comm); |
| 1351 | 1359 | ||
| @@ -1353,10 +1361,12 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1353 | removed_mask = root->subsys_mask & ~opts.subsys_mask; | 1361 | removed_mask = root->subsys_mask & ~opts.subsys_mask; |
| 1354 | 1362 | ||
| 1355 | /* Don't allow flags or name to change at remount */ | 1363 | /* Don't allow flags or name to change at remount */ |
| 1356 | if (opts.flags != root->flags || | 1364 | if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) || |
| 1357 | (opts.name && strcmp(opts.name, root->name))) { | 1365 | (opts.name && strcmp(opts.name, root->name))) { |
| 1366 | pr_err("cgroup: option or name mismatch, new: 0x%lx \"%s\", old: 0x%lx \"%s\"\n", | ||
| 1367 | opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "", | ||
| 1368 | root->flags & CGRP_ROOT_OPTION_MASK, root->name); | ||
| 1358 | ret = -EINVAL; | 1369 | ret = -EINVAL; |
| 1359 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
| 1360 | goto out_unlock; | 1370 | goto out_unlock; |
| 1361 | } | 1371 | } |
| 1362 | 1372 | ||
| @@ -1367,11 +1377,10 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1367 | */ | 1377 | */ |
| 1368 | cgroup_clear_directory(cgrp->dentry, false, removed_mask); | 1378 | cgroup_clear_directory(cgrp->dentry, false, removed_mask); |
| 1369 | 1379 | ||
| 1370 | ret = rebind_subsystems(root, opts.subsys_mask); | 1380 | ret = rebind_subsystems(root, added_mask, removed_mask); |
| 1371 | if (ret) { | 1381 | if (ret) { |
| 1372 | /* rebind_subsystems failed, re-populate the removed files */ | 1382 | /* rebind_subsystems failed, re-populate the removed files */ |
| 1373 | cgroup_populate_dir(cgrp, false, removed_mask); | 1383 | cgroup_populate_dir(cgrp, false, removed_mask); |
| 1374 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
| 1375 | goto out_unlock; | 1384 | goto out_unlock; |
| 1376 | } | 1385 | } |
| 1377 | 1386 | ||
| @@ -1386,6 +1395,8 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) | |||
| 1386 | mutex_unlock(&cgroup_root_mutex); | 1395 | mutex_unlock(&cgroup_root_mutex); |
| 1387 | mutex_unlock(&cgroup_mutex); | 1396 | mutex_unlock(&cgroup_mutex); |
| 1388 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); | 1397 | mutex_unlock(&cgrp->dentry->d_inode->i_mutex); |
| 1398 | if (ret) | ||
| 1399 | drop_parsed_module_refcounts(opts.subsys_mask); | ||
| 1389 | return ret; | 1400 | return ret; |
| 1390 | } | 1401 | } |
| 1391 | 1402 | ||
| @@ -1401,11 +1412,9 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) | |||
| 1401 | INIT_LIST_HEAD(&cgrp->sibling); | 1412 | INIT_LIST_HEAD(&cgrp->sibling); |
| 1402 | INIT_LIST_HEAD(&cgrp->children); | 1413 | INIT_LIST_HEAD(&cgrp->children); |
| 1403 | INIT_LIST_HEAD(&cgrp->files); | 1414 | INIT_LIST_HEAD(&cgrp->files); |
| 1404 | INIT_LIST_HEAD(&cgrp->css_sets); | 1415 | INIT_LIST_HEAD(&cgrp->cset_links); |
| 1405 | INIT_LIST_HEAD(&cgrp->allcg_node); | ||
| 1406 | INIT_LIST_HEAD(&cgrp->release_list); | 1416 | INIT_LIST_HEAD(&cgrp->release_list); |
| 1407 | INIT_LIST_HEAD(&cgrp->pidlists); | 1417 | INIT_LIST_HEAD(&cgrp->pidlists); |
| 1408 | INIT_WORK(&cgrp->free_work, cgroup_free_fn); | ||
| 1409 | mutex_init(&cgrp->pidlist_mutex); | 1418 | mutex_init(&cgrp->pidlist_mutex); |
| 1410 | INIT_LIST_HEAD(&cgrp->event_list); | 1419 | INIT_LIST_HEAD(&cgrp->event_list); |
| 1411 | spin_lock_init(&cgrp->event_list_lock); | 1420 | spin_lock_init(&cgrp->event_list_lock); |
| @@ -1418,37 +1427,37 @@ static void init_cgroup_root(struct cgroupfs_root *root) | |||
| 1418 | 1427 | ||
| 1419 | INIT_LIST_HEAD(&root->subsys_list); | 1428 | INIT_LIST_HEAD(&root->subsys_list); |
| 1420 | INIT_LIST_HEAD(&root->root_list); | 1429 | INIT_LIST_HEAD(&root->root_list); |
| 1421 | INIT_LIST_HEAD(&root->allcg_list); | ||
| 1422 | root->number_of_cgroups = 1; | 1430 | root->number_of_cgroups = 1; |
| 1423 | cgrp->root = root; | 1431 | cgrp->root = root; |
| 1424 | cgrp->name = &root_cgroup_name; | 1432 | RCU_INIT_POINTER(cgrp->name, &root_cgroup_name); |
| 1425 | init_cgroup_housekeeping(cgrp); | 1433 | init_cgroup_housekeeping(cgrp); |
| 1426 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
| 1427 | } | 1434 | } |
| 1428 | 1435 | ||
| 1429 | static bool init_root_id(struct cgroupfs_root *root) | 1436 | static int cgroup_init_root_id(struct cgroupfs_root *root, int start, int end) |
| 1430 | { | 1437 | { |
| 1431 | int ret = 0; | 1438 | int id; |
| 1432 | 1439 | ||
| 1433 | do { | 1440 | lockdep_assert_held(&cgroup_mutex); |
| 1434 | if (!ida_pre_get(&hierarchy_ida, GFP_KERNEL)) | 1441 | lockdep_assert_held(&cgroup_root_mutex); |
| 1435 | return false; | 1442 | |
| 1436 | spin_lock(&hierarchy_id_lock); | 1443 | id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, start, end, |
| 1437 | /* Try to allocate the next unused ID */ | 1444 | GFP_KERNEL); |
| 1438 | ret = ida_get_new_above(&hierarchy_ida, next_hierarchy_id, | 1445 | if (id < 0) |
| 1439 | &root->hierarchy_id); | 1446 | return id; |
| 1440 | if (ret == -ENOSPC) | 1447 | |
| 1441 | /* Try again starting from 0 */ | 1448 | root->hierarchy_id = id; |
| 1442 | ret = ida_get_new(&hierarchy_ida, &root->hierarchy_id); | 1449 | return 0; |
| 1443 | if (!ret) { | 1450 | } |
| 1444 | next_hierarchy_id = root->hierarchy_id + 1; | 1451 | |
| 1445 | } else if (ret != -EAGAIN) { | 1452 | static void cgroup_exit_root_id(struct cgroupfs_root *root) |
| 1446 | /* Can only get here if the 31-bit IDR is full ... */ | 1453 | { |
| 1447 | BUG_ON(ret); | 1454 | lockdep_assert_held(&cgroup_mutex); |
| 1448 | } | 1455 | lockdep_assert_held(&cgroup_root_mutex); |
| 1449 | spin_unlock(&hierarchy_id_lock); | 1456 | |
| 1450 | } while (ret); | 1457 | if (root->hierarchy_id) { |
| 1451 | return true; | 1458 | idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id); |
| 1459 | root->hierarchy_id = 0; | ||
| 1460 | } | ||
| 1452 | } | 1461 | } |
| 1453 | 1462 | ||
| 1454 | static int cgroup_test_super(struct super_block *sb, void *data) | 1463 | static int cgroup_test_super(struct super_block *sb, void *data) |
| @@ -1482,12 +1491,16 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
| 1482 | if (!root) | 1491 | if (!root) |
| 1483 | return ERR_PTR(-ENOMEM); | 1492 | return ERR_PTR(-ENOMEM); |
| 1484 | 1493 | ||
| 1485 | if (!init_root_id(root)) { | ||
| 1486 | kfree(root); | ||
| 1487 | return ERR_PTR(-ENOMEM); | ||
| 1488 | } | ||
| 1489 | init_cgroup_root(root); | 1494 | init_cgroup_root(root); |
| 1490 | 1495 | ||
| 1496 | /* | ||
| 1497 | * We need to set @root->subsys_mask now so that @root can be | ||
| 1498 | * matched by cgroup_test_super() before it finishes | ||
| 1499 | * initialization; otherwise, competing mounts with the same | ||
| 1500 | * options may try to bind the same subsystems instead of waiting | ||
| 1501 | * for the first one leading to unexpected mount errors. | ||
| 1502 | * SUBSYS_BOUND will be set once actual binding is complete. | ||
| 1503 | */ | ||
| 1491 | root->subsys_mask = opts->subsys_mask; | 1504 | root->subsys_mask = opts->subsys_mask; |
| 1492 | root->flags = opts->flags; | 1505 | root->flags = opts->flags; |
| 1493 | ida_init(&root->cgroup_ida); | 1506 | ida_init(&root->cgroup_ida); |
| @@ -1500,17 +1513,15 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) | |||
| 1500 | return root; | 1513 | return root; |
| 1501 | } | 1514 | } |
| 1502 | 1515 | ||
| 1503 | static void cgroup_drop_root(struct cgroupfs_root *root) | 1516 | static void cgroup_free_root(struct cgroupfs_root *root) |
| 1504 | { | 1517 | { |
| 1505 | if (!root) | 1518 | if (root) { |
| 1506 | return; | 1519 | /* hierarhcy ID shoulid already have been released */ |
| 1520 | WARN_ON_ONCE(root->hierarchy_id); | ||
| 1507 | 1521 | ||
| 1508 | BUG_ON(!root->hierarchy_id); | 1522 | ida_destroy(&root->cgroup_ida); |
| 1509 | spin_lock(&hierarchy_id_lock); | 1523 | kfree(root); |
| 1510 | ida_remove(&hierarchy_ida, root->hierarchy_id); | 1524 | } |
| 1511 | spin_unlock(&hierarchy_id_lock); | ||
| 1512 | ida_destroy(&root->cgroup_ida); | ||
| 1513 | kfree(root); | ||
| 1514 | } | 1525 | } |
| 1515 | 1526 | ||
| 1516 | static int cgroup_set_super(struct super_block *sb, void *data) | 1527 | static int cgroup_set_super(struct super_block *sb, void *data) |
| @@ -1597,7 +1608,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1597 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); | 1608 | sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); |
| 1598 | if (IS_ERR(sb)) { | 1609 | if (IS_ERR(sb)) { |
| 1599 | ret = PTR_ERR(sb); | 1610 | ret = PTR_ERR(sb); |
| 1600 | cgroup_drop_root(opts.new_root); | 1611 | cgroup_free_root(opts.new_root); |
| 1601 | goto drop_modules; | 1612 | goto drop_modules; |
| 1602 | } | 1613 | } |
| 1603 | 1614 | ||
| @@ -1605,12 +1616,12 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1605 | BUG_ON(!root); | 1616 | BUG_ON(!root); |
| 1606 | if (root == opts.new_root) { | 1617 | if (root == opts.new_root) { |
| 1607 | /* We used the new root structure, so this is a new hierarchy */ | 1618 | /* We used the new root structure, so this is a new hierarchy */ |
| 1608 | struct list_head tmp_cg_links; | 1619 | struct list_head tmp_links; |
| 1609 | struct cgroup *root_cgrp = &root->top_cgroup; | 1620 | struct cgroup *root_cgrp = &root->top_cgroup; |
| 1610 | struct cgroupfs_root *existing_root; | 1621 | struct cgroupfs_root *existing_root; |
| 1611 | const struct cred *cred; | 1622 | const struct cred *cred; |
| 1612 | int i; | 1623 | int i; |
| 1613 | struct css_set *cg; | 1624 | struct css_set *cset; |
| 1614 | 1625 | ||
| 1615 | BUG_ON(sb->s_root != NULL); | 1626 | BUG_ON(sb->s_root != NULL); |
| 1616 | 1627 | ||
| @@ -1637,13 +1648,18 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1637 | * that's us. The worst that can happen is that we | 1648 | * that's us. The worst that can happen is that we |
| 1638 | * have some link structures left over | 1649 | * have some link structures left over |
| 1639 | */ | 1650 | */ |
| 1640 | ret = allocate_cg_links(css_set_count, &tmp_cg_links); | 1651 | ret = allocate_cgrp_cset_links(css_set_count, &tmp_links); |
| 1652 | if (ret) | ||
| 1653 | goto unlock_drop; | ||
| 1654 | |||
| 1655 | /* ID 0 is reserved for dummy root, 1 for unified hierarchy */ | ||
| 1656 | ret = cgroup_init_root_id(root, 2, 0); | ||
| 1641 | if (ret) | 1657 | if (ret) |
| 1642 | goto unlock_drop; | 1658 | goto unlock_drop; |
| 1643 | 1659 | ||
| 1644 | ret = rebind_subsystems(root, root->subsys_mask); | 1660 | ret = rebind_subsystems(root, root->subsys_mask, 0); |
| 1645 | if (ret == -EBUSY) { | 1661 | if (ret == -EBUSY) { |
| 1646 | free_cg_links(&tmp_cg_links); | 1662 | free_cgrp_cset_links(&tmp_links); |
| 1647 | goto unlock_drop; | 1663 | goto unlock_drop; |
| 1648 | } | 1664 | } |
| 1649 | /* | 1665 | /* |
| @@ -1655,8 +1671,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1655 | /* EBUSY should be the only error here */ | 1671 | /* EBUSY should be the only error here */ |
| 1656 | BUG_ON(ret); | 1672 | BUG_ON(ret); |
| 1657 | 1673 | ||
| 1658 | list_add(&root->root_list, &roots); | 1674 | list_add(&root->root_list, &cgroup_roots); |
| 1659 | root_count++; | 1675 | cgroup_root_count++; |
| 1660 | 1676 | ||
| 1661 | sb->s_root->d_fsdata = root_cgrp; | 1677 | sb->s_root->d_fsdata = root_cgrp; |
| 1662 | root->top_cgroup.dentry = sb->s_root; | 1678 | root->top_cgroup.dentry = sb->s_root; |
| @@ -1664,11 +1680,11 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1664 | /* Link the top cgroup in this hierarchy into all | 1680 | /* Link the top cgroup in this hierarchy into all |
| 1665 | * the css_set objects */ | 1681 | * the css_set objects */ |
| 1666 | write_lock(&css_set_lock); | 1682 | write_lock(&css_set_lock); |
| 1667 | hash_for_each(css_set_table, i, cg, hlist) | 1683 | hash_for_each(css_set_table, i, cset, hlist) |
| 1668 | link_css_set(&tmp_cg_links, cg, root_cgrp); | 1684 | link_css_set(&tmp_links, cset, root_cgrp); |
| 1669 | write_unlock(&css_set_lock); | 1685 | write_unlock(&css_set_lock); |
| 1670 | 1686 | ||
| 1671 | free_cg_links(&tmp_cg_links); | 1687 | free_cgrp_cset_links(&tmp_links); |
| 1672 | 1688 | ||
| 1673 | BUG_ON(!list_empty(&root_cgrp->children)); | 1689 | BUG_ON(!list_empty(&root_cgrp->children)); |
| 1674 | BUG_ON(root->number_of_cgroups != 1); | 1690 | BUG_ON(root->number_of_cgroups != 1); |
| @@ -1684,9 +1700,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1684 | * We re-used an existing hierarchy - the new root (if | 1700 | * We re-used an existing hierarchy - the new root (if |
| 1685 | * any) is not needed | 1701 | * any) is not needed |
| 1686 | */ | 1702 | */ |
| 1687 | cgroup_drop_root(opts.new_root); | 1703 | cgroup_free_root(opts.new_root); |
| 1688 | 1704 | ||
| 1689 | if (root->flags != opts.flags) { | 1705 | if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) { |
| 1690 | if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { | 1706 | if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) { |
| 1691 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); | 1707 | pr_err("cgroup: sane_behavior: new mount options should match the existing superblock\n"); |
| 1692 | ret = -EINVAL; | 1708 | ret = -EINVAL; |
| @@ -1705,6 +1721,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1705 | return dget(sb->s_root); | 1721 | return dget(sb->s_root); |
| 1706 | 1722 | ||
| 1707 | unlock_drop: | 1723 | unlock_drop: |
| 1724 | cgroup_exit_root_id(root); | ||
| 1708 | mutex_unlock(&cgroup_root_mutex); | 1725 | mutex_unlock(&cgroup_root_mutex); |
| 1709 | mutex_unlock(&cgroup_mutex); | 1726 | mutex_unlock(&cgroup_mutex); |
| 1710 | mutex_unlock(&inode->i_mutex); | 1727 | mutex_unlock(&inode->i_mutex); |
| @@ -1721,9 +1738,8 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, | |||
| 1721 | static void cgroup_kill_sb(struct super_block *sb) { | 1738 | static void cgroup_kill_sb(struct super_block *sb) { |
| 1722 | struct cgroupfs_root *root = sb->s_fs_info; | 1739 | struct cgroupfs_root *root = sb->s_fs_info; |
| 1723 | struct cgroup *cgrp = &root->top_cgroup; | 1740 | struct cgroup *cgrp = &root->top_cgroup; |
| 1741 | struct cgrp_cset_link *link, *tmp_link; | ||
| 1724 | int ret; | 1742 | int ret; |
| 1725 | struct cg_cgroup_link *link; | ||
| 1726 | struct cg_cgroup_link *saved_link; | ||
| 1727 | 1743 | ||
| 1728 | BUG_ON(!root); | 1744 | BUG_ON(!root); |
| 1729 | 1745 | ||
| @@ -1734,36 +1750,39 @@ static void cgroup_kill_sb(struct super_block *sb) { | |||
| 1734 | mutex_lock(&cgroup_root_mutex); | 1750 | mutex_lock(&cgroup_root_mutex); |
| 1735 | 1751 | ||
| 1736 | /* Rebind all subsystems back to the default hierarchy */ | 1752 | /* Rebind all subsystems back to the default hierarchy */ |
| 1737 | ret = rebind_subsystems(root, 0); | 1753 | if (root->flags & CGRP_ROOT_SUBSYS_BOUND) { |
| 1738 | /* Shouldn't be able to fail ... */ | 1754 | ret = rebind_subsystems(root, 0, root->subsys_mask); |
| 1739 | BUG_ON(ret); | 1755 | /* Shouldn't be able to fail ... */ |
| 1756 | BUG_ON(ret); | ||
| 1757 | } | ||
| 1740 | 1758 | ||
| 1741 | /* | 1759 | /* |
| 1742 | * Release all the links from css_sets to this hierarchy's | 1760 | * Release all the links from cset_links to this hierarchy's |
| 1743 | * root cgroup | 1761 | * root cgroup |
| 1744 | */ | 1762 | */ |
| 1745 | write_lock(&css_set_lock); | 1763 | write_lock(&css_set_lock); |
| 1746 | 1764 | ||
| 1747 | list_for_each_entry_safe(link, saved_link, &cgrp->css_sets, | 1765 | list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) { |
| 1748 | cgrp_link_list) { | 1766 | list_del(&link->cset_link); |
| 1749 | list_del(&link->cg_link_list); | 1767 | list_del(&link->cgrp_link); |
| 1750 | list_del(&link->cgrp_link_list); | ||
| 1751 | kfree(link); | 1768 | kfree(link); |
| 1752 | } | 1769 | } |
| 1753 | write_unlock(&css_set_lock); | 1770 | write_unlock(&css_set_lock); |
| 1754 | 1771 | ||
| 1755 | if (!list_empty(&root->root_list)) { | 1772 | if (!list_empty(&root->root_list)) { |
| 1756 | list_del(&root->root_list); | 1773 | list_del(&root->root_list); |
| 1757 | root_count--; | 1774 | cgroup_root_count--; |
| 1758 | } | 1775 | } |
| 1759 | 1776 | ||
| 1777 | cgroup_exit_root_id(root); | ||
| 1778 | |||
| 1760 | mutex_unlock(&cgroup_root_mutex); | 1779 | mutex_unlock(&cgroup_root_mutex); |
| 1761 | mutex_unlock(&cgroup_mutex); | 1780 | mutex_unlock(&cgroup_mutex); |
| 1762 | 1781 | ||
| 1763 | simple_xattrs_free(&cgrp->xattrs); | 1782 | simple_xattrs_free(&cgrp->xattrs); |
| 1764 | 1783 | ||
| 1765 | kill_litter_super(sb); | 1784 | kill_litter_super(sb); |
| 1766 | cgroup_drop_root(root); | 1785 | cgroup_free_root(root); |
| 1767 | } | 1786 | } |
| 1768 | 1787 | ||
| 1769 | static struct file_system_type cgroup_fs_type = { | 1788 | static struct file_system_type cgroup_fs_type = { |
| @@ -1825,6 +1844,45 @@ out: | |||
| 1825 | } | 1844 | } |
| 1826 | EXPORT_SYMBOL_GPL(cgroup_path); | 1845 | EXPORT_SYMBOL_GPL(cgroup_path); |
| 1827 | 1846 | ||
| 1847 | /** | ||
| 1848 | * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy | ||
| 1849 | * @task: target task | ||
| 1850 | * @buf: the buffer to write the path into | ||
| 1851 | * @buflen: the length of the buffer | ||
| 1852 | * | ||
| 1853 | * Determine @task's cgroup on the first (the one with the lowest non-zero | ||
| 1854 | * hierarchy_id) cgroup hierarchy and copy its path into @buf. This | ||
| 1855 | * function grabs cgroup_mutex and shouldn't be used inside locks used by | ||
| 1856 | * cgroup controller callbacks. | ||
| 1857 | * | ||
| 1858 | * Returns 0 on success, fails with -%ENAMETOOLONG if @buflen is too short. | ||
| 1859 | */ | ||
| 1860 | int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen) | ||
| 1861 | { | ||
| 1862 | struct cgroupfs_root *root; | ||
| 1863 | struct cgroup *cgrp; | ||
| 1864 | int hierarchy_id = 1, ret = 0; | ||
| 1865 | |||
| 1866 | if (buflen < 2) | ||
| 1867 | return -ENAMETOOLONG; | ||
| 1868 | |||
| 1869 | mutex_lock(&cgroup_mutex); | ||
| 1870 | |||
| 1871 | root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id); | ||
| 1872 | |||
| 1873 | if (root) { | ||
| 1874 | cgrp = task_cgroup_from_root(task, root); | ||
| 1875 | ret = cgroup_path(cgrp, buf, buflen); | ||
| 1876 | } else { | ||
| 1877 | /* if no hierarchy exists, everyone is in "/" */ | ||
| 1878 | memcpy(buf, "/", 2); | ||
| 1879 | } | ||
| 1880 | |||
| 1881 | mutex_unlock(&cgroup_mutex); | ||
| 1882 | return ret; | ||
| 1883 | } | ||
| 1884 | EXPORT_SYMBOL_GPL(task_cgroup_path); | ||
| 1885 | |||
| 1828 | /* | 1886 | /* |
| 1829 | * Control Group taskset | 1887 | * Control Group taskset |
| 1830 | */ | 1888 | */ |
| @@ -1910,10 +1968,11 @@ EXPORT_SYMBOL_GPL(cgroup_taskset_size); | |||
| 1910 | * | 1968 | * |
| 1911 | * Must be called with cgroup_mutex and threadgroup locked. | 1969 | * Must be called with cgroup_mutex and threadgroup locked. |
| 1912 | */ | 1970 | */ |
| 1913 | static void cgroup_task_migrate(struct cgroup *oldcgrp, | 1971 | static void cgroup_task_migrate(struct cgroup *old_cgrp, |
| 1914 | struct task_struct *tsk, struct css_set *newcg) | 1972 | struct task_struct *tsk, |
| 1973 | struct css_set *new_cset) | ||
| 1915 | { | 1974 | { |
| 1916 | struct css_set *oldcg; | 1975 | struct css_set *old_cset; |
| 1917 | 1976 | ||
| 1918 | /* | 1977 | /* |
| 1919 | * We are synchronized through threadgroup_lock() against PF_EXITING | 1978 | * We are synchronized through threadgroup_lock() against PF_EXITING |
| @@ -1921,25 +1980,25 @@ static void cgroup_task_migrate(struct cgroup *oldcgrp, | |||
| 1921 | * css_set to init_css_set and dropping the old one. | 1980 | * css_set to init_css_set and dropping the old one. |
| 1922 | */ | 1981 | */ |
| 1923 | WARN_ON_ONCE(tsk->flags & PF_EXITING); | 1982 | WARN_ON_ONCE(tsk->flags & PF_EXITING); |
| 1924 | oldcg = tsk->cgroups; | 1983 | old_cset = task_css_set(tsk); |
| 1925 | 1984 | ||
| 1926 | task_lock(tsk); | 1985 | task_lock(tsk); |
| 1927 | rcu_assign_pointer(tsk->cgroups, newcg); | 1986 | rcu_assign_pointer(tsk->cgroups, new_cset); |
| 1928 | task_unlock(tsk); | 1987 | task_unlock(tsk); |
| 1929 | 1988 | ||
| 1930 | /* Update the css_set linked lists if we're using them */ | 1989 | /* Update the css_set linked lists if we're using them */ |
| 1931 | write_lock(&css_set_lock); | 1990 | write_lock(&css_set_lock); |
| 1932 | if (!list_empty(&tsk->cg_list)) | 1991 | if (!list_empty(&tsk->cg_list)) |
| 1933 | list_move(&tsk->cg_list, &newcg->tasks); | 1992 | list_move(&tsk->cg_list, &new_cset->tasks); |
| 1934 | write_unlock(&css_set_lock); | 1993 | write_unlock(&css_set_lock); |
| 1935 | 1994 | ||
| 1936 | /* | 1995 | /* |
| 1937 | * We just gained a reference on oldcg by taking it from the task. As | 1996 | * We just gained a reference on old_cset by taking it from the |
| 1938 | * trading it for newcg is protected by cgroup_mutex, we're safe to drop | 1997 | * task. As trading it for new_cset is protected by cgroup_mutex, |
| 1939 | * it here; it will be freed under RCU. | 1998 | * we're safe to drop it here; it will be freed under RCU. |
| 1940 | */ | 1999 | */ |
| 1941 | set_bit(CGRP_RELEASABLE, &oldcgrp->flags); | 2000 | set_bit(CGRP_RELEASABLE, &old_cgrp->flags); |
| 1942 | put_css_set(oldcg); | 2001 | put_css_set(old_cset); |
| 1943 | } | 2002 | } |
| 1944 | 2003 | ||
| 1945 | /** | 2004 | /** |
| @@ -2029,7 +2088,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 2029 | /* | 2088 | /* |
| 2030 | * step 1: check that we can legitimately attach to the cgroup. | 2089 | * step 1: check that we can legitimately attach to the cgroup. |
| 2031 | */ | 2090 | */ |
| 2032 | for_each_subsys(root, ss) { | 2091 | for_each_root_subsys(root, ss) { |
| 2033 | if (ss->can_attach) { | 2092 | if (ss->can_attach) { |
| 2034 | retval = ss->can_attach(cgrp, &tset); | 2093 | retval = ss->can_attach(cgrp, &tset); |
| 2035 | if (retval) { | 2094 | if (retval) { |
| @@ -2044,8 +2103,11 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 2044 | * we use find_css_set, which allocates a new one if necessary. | 2103 | * we use find_css_set, which allocates a new one if necessary. |
| 2045 | */ | 2104 | */ |
| 2046 | for (i = 0; i < group_size; i++) { | 2105 | for (i = 0; i < group_size; i++) { |
| 2106 | struct css_set *old_cset; | ||
| 2107 | |||
| 2047 | tc = flex_array_get(group, i); | 2108 | tc = flex_array_get(group, i); |
| 2048 | tc->cg = find_css_set(tc->task->cgroups, cgrp); | 2109 | old_cset = task_css_set(tc->task); |
| 2110 | tc->cg = find_css_set(old_cset, cgrp); | ||
| 2049 | if (!tc->cg) { | 2111 | if (!tc->cg) { |
| 2050 | retval = -ENOMEM; | 2112 | retval = -ENOMEM; |
| 2051 | goto out_put_css_set_refs; | 2113 | goto out_put_css_set_refs; |
| @@ -2066,7 +2128,7 @@ static int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk, | |||
| 2066 | /* | 2128 | /* |
| 2067 | * step 4: do subsystem attach callbacks. | 2129 | * step 4: do subsystem attach callbacks. |
| 2068 | */ | 2130 | */ |
| 2069 | for_each_subsys(root, ss) { | 2131 | for_each_root_subsys(root, ss) { |
| 2070 | if (ss->attach) | 2132 | if (ss->attach) |
| 2071 | ss->attach(cgrp, &tset); | 2133 | ss->attach(cgrp, &tset); |
| 2072 | } | 2134 | } |
| @@ -2086,7 +2148,7 @@ out_put_css_set_refs: | |||
| 2086 | } | 2148 | } |
| 2087 | out_cancel_attach: | 2149 | out_cancel_attach: |
| 2088 | if (retval) { | 2150 | if (retval) { |
| 2089 | for_each_subsys(root, ss) { | 2151 | for_each_root_subsys(root, ss) { |
| 2090 | if (ss == failed_ss) | 2152 | if (ss == failed_ss) |
| 2091 | break; | 2153 | break; |
| 2092 | if (ss->cancel_attach) | 2154 | if (ss->cancel_attach) |
| @@ -2323,7 +2385,7 @@ static ssize_t cgroup_file_write(struct file *file, const char __user *buf, | |||
| 2323 | struct cftype *cft = __d_cft(file->f_dentry); | 2385 | struct cftype *cft = __d_cft(file->f_dentry); |
| 2324 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2386 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
| 2325 | 2387 | ||
| 2326 | if (cgroup_is_removed(cgrp)) | 2388 | if (cgroup_is_dead(cgrp)) |
| 2327 | return -ENODEV; | 2389 | return -ENODEV; |
| 2328 | if (cft->write) | 2390 | if (cft->write) |
| 2329 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); | 2391 | return cft->write(cgrp, cft, file, buf, nbytes, ppos); |
| @@ -2368,7 +2430,7 @@ static ssize_t cgroup_file_read(struct file *file, char __user *buf, | |||
| 2368 | struct cftype *cft = __d_cft(file->f_dentry); | 2430 | struct cftype *cft = __d_cft(file->f_dentry); |
| 2369 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); | 2431 | struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent); |
| 2370 | 2432 | ||
| 2371 | if (cgroup_is_removed(cgrp)) | 2433 | if (cgroup_is_dead(cgrp)) |
| 2372 | return -ENODEV; | 2434 | return -ENODEV; |
| 2373 | 2435 | ||
| 2374 | if (cft->read) | 2436 | if (cft->read) |
| @@ -2435,10 +2497,12 @@ static int cgroup_file_open(struct inode *inode, struct file *file) | |||
| 2435 | cft = __d_cft(file->f_dentry); | 2497 | cft = __d_cft(file->f_dentry); |
| 2436 | 2498 | ||
| 2437 | if (cft->read_map || cft->read_seq_string) { | 2499 | if (cft->read_map || cft->read_seq_string) { |
| 2438 | struct cgroup_seqfile_state *state = | 2500 | struct cgroup_seqfile_state *state; |
| 2439 | kzalloc(sizeof(*state), GFP_USER); | 2501 | |
| 2502 | state = kzalloc(sizeof(*state), GFP_USER); | ||
| 2440 | if (!state) | 2503 | if (!state) |
| 2441 | return -ENOMEM; | 2504 | return -ENOMEM; |
| 2505 | |||
| 2442 | state->cft = cft; | 2506 | state->cft = cft; |
| 2443 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); | 2507 | state->cgroup = __d_cgrp(file->f_dentry->d_parent); |
| 2444 | file->f_op = &cgroup_seqfile_operations; | 2508 | file->f_op = &cgroup_seqfile_operations; |
| @@ -2486,6 +2550,13 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2486 | 2550 | ||
| 2487 | cgrp = __d_cgrp(old_dentry); | 2551 | cgrp = __d_cgrp(old_dentry); |
| 2488 | 2552 | ||
| 2553 | /* | ||
| 2554 | * This isn't a proper migration and its usefulness is very | ||
| 2555 | * limited. Disallow if sane_behavior. | ||
| 2556 | */ | ||
| 2557 | if (cgroup_sane_behavior(cgrp)) | ||
| 2558 | return -EPERM; | ||
| 2559 | |||
| 2489 | name = cgroup_alloc_name(new_dentry); | 2560 | name = cgroup_alloc_name(new_dentry); |
| 2490 | if (!name) | 2561 | if (!name) |
| 2491 | return -ENOMEM; | 2562 | return -ENOMEM; |
| @@ -2496,7 +2567,7 @@ static int cgroup_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 2496 | return ret; | 2567 | return ret; |
| 2497 | } | 2568 | } |
| 2498 | 2569 | ||
| 2499 | old_name = cgrp->name; | 2570 | old_name = rcu_dereference_protected(cgrp->name, true); |
| 2500 | rcu_assign_pointer(cgrp->name, name); | 2571 | rcu_assign_pointer(cgrp->name, name); |
| 2501 | 2572 | ||
| 2502 | kfree_rcu(old_name, rcu_head); | 2573 | kfree_rcu(old_name, rcu_head); |
| @@ -2577,7 +2648,7 @@ static const struct inode_operations cgroup_file_inode_operations = { | |||
| 2577 | }; | 2648 | }; |
| 2578 | 2649 | ||
| 2579 | static const struct inode_operations cgroup_dir_inode_operations = { | 2650 | static const struct inode_operations cgroup_dir_inode_operations = { |
| 2580 | .lookup = cgroup_lookup, | 2651 | .lookup = simple_lookup, |
| 2581 | .mkdir = cgroup_mkdir, | 2652 | .mkdir = cgroup_mkdir, |
| 2582 | .rmdir = cgroup_rmdir, | 2653 | .rmdir = cgroup_rmdir, |
| 2583 | .rename = cgroup_rename, | 2654 | .rename = cgroup_rename, |
| @@ -2587,14 +2658,6 @@ static const struct inode_operations cgroup_dir_inode_operations = { | |||
| 2587 | .removexattr = cgroup_removexattr, | 2658 | .removexattr = cgroup_removexattr, |
| 2588 | }; | 2659 | }; |
| 2589 | 2660 | ||
| 2590 | static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | ||
| 2591 | { | ||
| 2592 | if (dentry->d_name.len > NAME_MAX) | ||
| 2593 | return ERR_PTR(-ENAMETOOLONG); | ||
| 2594 | d_add(dentry, NULL); | ||
| 2595 | return NULL; | ||
| 2596 | } | ||
| 2597 | |||
| 2598 | /* | 2661 | /* |
| 2599 | * Check if a file is a control file | 2662 | * Check if a file is a control file |
| 2600 | */ | 2663 | */ |
| @@ -2747,58 +2810,78 @@ static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, | |||
| 2747 | return ret; | 2810 | return ret; |
| 2748 | } | 2811 | } |
| 2749 | 2812 | ||
| 2750 | static DEFINE_MUTEX(cgroup_cft_mutex); | ||
| 2751 | |||
| 2752 | static void cgroup_cfts_prepare(void) | 2813 | static void cgroup_cfts_prepare(void) |
| 2753 | __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex) | 2814 | __acquires(&cgroup_mutex) |
| 2754 | { | 2815 | { |
| 2755 | /* | 2816 | /* |
| 2756 | * Thanks to the entanglement with vfs inode locking, we can't walk | 2817 | * Thanks to the entanglement with vfs inode locking, we can't walk |
| 2757 | * the existing cgroups under cgroup_mutex and create files. | 2818 | * the existing cgroups under cgroup_mutex and create files. |
| 2758 | * Instead, we increment reference on all cgroups and build list of | 2819 | * Instead, we use cgroup_for_each_descendant_pre() and drop RCU |
| 2759 | * them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure | 2820 | * read lock before calling cgroup_addrm_files(). |
| 2760 | * exclusive access to the field. | ||
| 2761 | */ | 2821 | */ |
| 2762 | mutex_lock(&cgroup_cft_mutex); | ||
| 2763 | mutex_lock(&cgroup_mutex); | 2822 | mutex_lock(&cgroup_mutex); |
| 2764 | } | 2823 | } |
| 2765 | 2824 | ||
| 2766 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, | 2825 | static void cgroup_cfts_commit(struct cgroup_subsys *ss, |
| 2767 | struct cftype *cfts, bool is_add) | 2826 | struct cftype *cfts, bool is_add) |
| 2768 | __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex) | 2827 | __releases(&cgroup_mutex) |
| 2769 | { | 2828 | { |
| 2770 | LIST_HEAD(pending); | 2829 | LIST_HEAD(pending); |
| 2771 | struct cgroup *cgrp, *n; | 2830 | struct cgroup *cgrp, *root = &ss->root->top_cgroup; |
| 2831 | struct super_block *sb = ss->root->sb; | ||
| 2832 | struct dentry *prev = NULL; | ||
| 2833 | struct inode *inode; | ||
| 2834 | u64 update_before; | ||
| 2772 | 2835 | ||
| 2773 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ | 2836 | /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */ |
| 2774 | if (cfts && ss->root != &rootnode) { | 2837 | if (!cfts || ss->root == &cgroup_dummy_root || |
| 2775 | list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) { | 2838 | !atomic_inc_not_zero(&sb->s_active)) { |
| 2776 | dget(cgrp->dentry); | 2839 | mutex_unlock(&cgroup_mutex); |
| 2777 | list_add_tail(&cgrp->cft_q_node, &pending); | 2840 | return; |
| 2778 | } | ||
| 2779 | } | 2841 | } |
| 2780 | 2842 | ||
| 2781 | mutex_unlock(&cgroup_mutex); | ||
| 2782 | |||
| 2783 | /* | 2843 | /* |
| 2784 | * All new cgroups will see @cfts update on @ss->cftsets. Add/rm | 2844 | * All cgroups which are created after we drop cgroup_mutex will |
| 2785 | * files for all cgroups which were created before. | 2845 | * have the updated set of files, so we only need to update the |
| 2846 | * cgroups created before the current @cgroup_serial_nr_next. | ||
| 2786 | */ | 2847 | */ |
| 2787 | list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) { | 2848 | update_before = cgroup_serial_nr_next; |
| 2788 | struct inode *inode = cgrp->dentry->d_inode; | 2849 | |
| 2850 | mutex_unlock(&cgroup_mutex); | ||
| 2851 | |||
| 2852 | /* @root always needs to be updated */ | ||
| 2853 | inode = root->dentry->d_inode; | ||
| 2854 | mutex_lock(&inode->i_mutex); | ||
| 2855 | mutex_lock(&cgroup_mutex); | ||
| 2856 | cgroup_addrm_files(root, ss, cfts, is_add); | ||
| 2857 | mutex_unlock(&cgroup_mutex); | ||
| 2858 | mutex_unlock(&inode->i_mutex); | ||
| 2859 | |||
| 2860 | /* add/rm files for all cgroups created before */ | ||
| 2861 | rcu_read_lock(); | ||
| 2862 | cgroup_for_each_descendant_pre(cgrp, root) { | ||
| 2863 | if (cgroup_is_dead(cgrp)) | ||
| 2864 | continue; | ||
| 2865 | |||
| 2866 | inode = cgrp->dentry->d_inode; | ||
| 2867 | dget(cgrp->dentry); | ||
| 2868 | rcu_read_unlock(); | ||
| 2869 | |||
| 2870 | dput(prev); | ||
| 2871 | prev = cgrp->dentry; | ||
| 2789 | 2872 | ||
| 2790 | mutex_lock(&inode->i_mutex); | 2873 | mutex_lock(&inode->i_mutex); |
| 2791 | mutex_lock(&cgroup_mutex); | 2874 | mutex_lock(&cgroup_mutex); |
| 2792 | if (!cgroup_is_removed(cgrp)) | 2875 | if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp)) |
| 2793 | cgroup_addrm_files(cgrp, ss, cfts, is_add); | 2876 | cgroup_addrm_files(cgrp, ss, cfts, is_add); |
| 2794 | mutex_unlock(&cgroup_mutex); | 2877 | mutex_unlock(&cgroup_mutex); |
| 2795 | mutex_unlock(&inode->i_mutex); | 2878 | mutex_unlock(&inode->i_mutex); |
| 2796 | 2879 | ||
| 2797 | list_del_init(&cgrp->cft_q_node); | 2880 | rcu_read_lock(); |
| 2798 | dput(cgrp->dentry); | ||
| 2799 | } | 2881 | } |
| 2800 | 2882 | rcu_read_unlock(); | |
| 2801 | mutex_unlock(&cgroup_cft_mutex); | 2883 | dput(prev); |
| 2884 | deactivate_super(sb); | ||
| 2802 | } | 2885 | } |
| 2803 | 2886 | ||
| 2804 | /** | 2887 | /** |
| @@ -2853,7 +2936,8 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | |||
| 2853 | 2936 | ||
| 2854 | list_for_each_entry(set, &ss->cftsets, node) { | 2937 | list_for_each_entry(set, &ss->cftsets, node) { |
| 2855 | if (set->cfts == cfts) { | 2938 | if (set->cfts == cfts) { |
| 2856 | list_del_init(&set->node); | 2939 | list_del(&set->node); |
| 2940 | kfree(set); | ||
| 2857 | cgroup_cfts_commit(ss, cfts, false); | 2941 | cgroup_cfts_commit(ss, cfts, false); |
| 2858 | return 0; | 2942 | return 0; |
| 2859 | } | 2943 | } |
| @@ -2872,12 +2956,11 @@ int cgroup_rm_cftypes(struct cgroup_subsys *ss, struct cftype *cfts) | |||
| 2872 | int cgroup_task_count(const struct cgroup *cgrp) | 2956 | int cgroup_task_count(const struct cgroup *cgrp) |
| 2873 | { | 2957 | { |
| 2874 | int count = 0; | 2958 | int count = 0; |
| 2875 | struct cg_cgroup_link *link; | 2959 | struct cgrp_cset_link *link; |
| 2876 | 2960 | ||
| 2877 | read_lock(&css_set_lock); | 2961 | read_lock(&css_set_lock); |
| 2878 | list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) { | 2962 | list_for_each_entry(link, &cgrp->cset_links, cset_link) |
| 2879 | count += atomic_read(&link->cg->refcount); | 2963 | count += atomic_read(&link->cset->refcount); |
| 2880 | } | ||
| 2881 | read_unlock(&css_set_lock); | 2964 | read_unlock(&css_set_lock); |
| 2882 | return count; | 2965 | return count; |
| 2883 | } | 2966 | } |
| @@ -2886,25 +2969,24 @@ int cgroup_task_count(const struct cgroup *cgrp) | |||
| 2886 | * Advance a list_head iterator. The iterator should be positioned at | 2969 | * Advance a list_head iterator. The iterator should be positioned at |
| 2887 | * the start of a css_set | 2970 | * the start of a css_set |
| 2888 | */ | 2971 | */ |
| 2889 | static void cgroup_advance_iter(struct cgroup *cgrp, | 2972 | static void cgroup_advance_iter(struct cgroup *cgrp, struct cgroup_iter *it) |
| 2890 | struct cgroup_iter *it) | ||
| 2891 | { | 2973 | { |
| 2892 | struct list_head *l = it->cg_link; | 2974 | struct list_head *l = it->cset_link; |
| 2893 | struct cg_cgroup_link *link; | 2975 | struct cgrp_cset_link *link; |
| 2894 | struct css_set *cg; | 2976 | struct css_set *cset; |
| 2895 | 2977 | ||
| 2896 | /* Advance to the next non-empty css_set */ | 2978 | /* Advance to the next non-empty css_set */ |
| 2897 | do { | 2979 | do { |
| 2898 | l = l->next; | 2980 | l = l->next; |
| 2899 | if (l == &cgrp->css_sets) { | 2981 | if (l == &cgrp->cset_links) { |
| 2900 | it->cg_link = NULL; | 2982 | it->cset_link = NULL; |
| 2901 | return; | 2983 | return; |
| 2902 | } | 2984 | } |
| 2903 | link = list_entry(l, struct cg_cgroup_link, cgrp_link_list); | 2985 | link = list_entry(l, struct cgrp_cset_link, cset_link); |
| 2904 | cg = link->cg; | 2986 | cset = link->cset; |
| 2905 | } while (list_empty(&cg->tasks)); | 2987 | } while (list_empty(&cset->tasks)); |
| 2906 | it->cg_link = l; | 2988 | it->cset_link = l; |
| 2907 | it->task = cg->tasks.next; | 2989 | it->task = cset->tasks.next; |
| 2908 | } | 2990 | } |
| 2909 | 2991 | ||
| 2910 | /* | 2992 | /* |
| @@ -2934,7 +3016,7 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 2934 | * entry won't be deleted though the process has exited. | 3016 | * entry won't be deleted though the process has exited. |
| 2935 | */ | 3017 | */ |
| 2936 | if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) | 3018 | if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list)) |
| 2937 | list_add(&p->cg_list, &p->cgroups->tasks); | 3019 | list_add(&p->cg_list, &task_css_set(p)->tasks); |
| 2938 | task_unlock(p); | 3020 | task_unlock(p); |
| 2939 | } while_each_thread(g, p); | 3021 | } while_each_thread(g, p); |
| 2940 | read_unlock(&tasklist_lock); | 3022 | read_unlock(&tasklist_lock); |
| @@ -2942,12 +3024,67 @@ static void cgroup_enable_task_cg_lists(void) | |||
| 2942 | } | 3024 | } |
| 2943 | 3025 | ||
| 2944 | /** | 3026 | /** |
| 3027 | * cgroup_next_sibling - find the next sibling of a given cgroup | ||
| 3028 | * @pos: the current cgroup | ||
| 3029 | * | ||
| 3030 | * This function returns the next sibling of @pos and should be called | ||
| 3031 | * under RCU read lock. The only requirement is that @pos is accessible. | ||
| 3032 | * The next sibling is guaranteed to be returned regardless of @pos's | ||
| 3033 | * state. | ||
| 3034 | */ | ||
| 3035 | struct cgroup *cgroup_next_sibling(struct cgroup *pos) | ||
| 3036 | { | ||
| 3037 | struct cgroup *next; | ||
| 3038 | |||
| 3039 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 3040 | |||
| 3041 | /* | ||
| 3042 | * @pos could already have been removed. Once a cgroup is removed, | ||
| 3043 | * its ->sibling.next is no longer updated when its next sibling | ||
| 3044 | * changes. As CGRP_DEAD assertion is serialized and happens | ||
| 3045 | * before the cgroup is taken off the ->sibling list, if we see it | ||
| 3046 | * unasserted, it's guaranteed that the next sibling hasn't | ||
| 3047 | * finished its grace period even if it's already removed, and thus | ||
| 3048 | * safe to dereference from this RCU critical section. If | ||
| 3049 | * ->sibling.next is inaccessible, cgroup_is_dead() is guaranteed | ||
| 3050 | * to be visible as %true here. | ||
| 3051 | */ | ||
| 3052 | if (likely(!cgroup_is_dead(pos))) { | ||
| 3053 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); | ||
| 3054 | if (&next->sibling != &pos->parent->children) | ||
| 3055 | return next; | ||
| 3056 | return NULL; | ||
| 3057 | } | ||
| 3058 | |||
| 3059 | /* | ||
| 3060 | * Can't dereference the next pointer. Each cgroup is given a | ||
| 3061 | * monotonically increasing unique serial number and always | ||
| 3062 | * appended to the sibling list, so the next one can be found by | ||
| 3063 | * walking the parent's children until we see a cgroup with higher | ||
| 3064 | * serial number than @pos's. | ||
| 3065 | * | ||
| 3066 | * While this path can be slow, it's taken only when either the | ||
| 3067 | * current cgroup is removed or iteration and removal race. | ||
| 3068 | */ | ||
| 3069 | list_for_each_entry_rcu(next, &pos->parent->children, sibling) | ||
| 3070 | if (next->serial_nr > pos->serial_nr) | ||
| 3071 | return next; | ||
| 3072 | return NULL; | ||
| 3073 | } | ||
| 3074 | EXPORT_SYMBOL_GPL(cgroup_next_sibling); | ||
| 3075 | |||
| 3076 | /** | ||
| 2945 | * cgroup_next_descendant_pre - find the next descendant for pre-order walk | 3077 | * cgroup_next_descendant_pre - find the next descendant for pre-order walk |
| 2946 | * @pos: the current position (%NULL to initiate traversal) | 3078 | * @pos: the current position (%NULL to initiate traversal) |
| 2947 | * @cgroup: cgroup whose descendants to walk | 3079 | * @cgroup: cgroup whose descendants to walk |
| 2948 | * | 3080 | * |
| 2949 | * To be used by cgroup_for_each_descendant_pre(). Find the next | 3081 | * To be used by cgroup_for_each_descendant_pre(). Find the next |
| 2950 | * descendant to visit for pre-order traversal of @cgroup's descendants. | 3082 | * descendant to visit for pre-order traversal of @cgroup's descendants. |
| 3083 | * | ||
| 3084 | * While this function requires RCU read locking, it doesn't require the | ||
| 3085 | * whole traversal to be contained in a single RCU critical section. This | ||
| 3086 | * function will return the correct next descendant as long as both @pos | ||
| 3087 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | ||
| 2951 | */ | 3088 | */ |
| 2952 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | 3089 | struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, |
| 2953 | struct cgroup *cgroup) | 3090 | struct cgroup *cgroup) |
| @@ -2967,11 +3104,9 @@ struct cgroup *cgroup_next_descendant_pre(struct cgroup *pos, | |||
| 2967 | 3104 | ||
| 2968 | /* no child, visit my or the closest ancestor's next sibling */ | 3105 | /* no child, visit my or the closest ancestor's next sibling */ |
| 2969 | while (pos != cgroup) { | 3106 | while (pos != cgroup) { |
| 2970 | next = list_entry_rcu(pos->sibling.next, struct cgroup, | 3107 | next = cgroup_next_sibling(pos); |
| 2971 | sibling); | 3108 | if (next) |
| 2972 | if (&next->sibling != &pos->parent->children) | ||
| 2973 | return next; | 3109 | return next; |
| 2974 | |||
| 2975 | pos = pos->parent; | 3110 | pos = pos->parent; |
| 2976 | } | 3111 | } |
| 2977 | 3112 | ||
| @@ -2986,6 +3121,11 @@ EXPORT_SYMBOL_GPL(cgroup_next_descendant_pre); | |||
| 2986 | * Return the rightmost descendant of @pos. If there's no descendant, | 3121 | * Return the rightmost descendant of @pos. If there's no descendant, |
| 2987 | * @pos is returned. This can be used during pre-order traversal to skip | 3122 | * @pos is returned. This can be used during pre-order traversal to skip |
| 2988 | * subtree of @pos. | 3123 | * subtree of @pos. |
| 3124 | * | ||
| 3125 | * While this function requires RCU read locking, it doesn't require the | ||
| 3126 | * whole traversal to be contained in a single RCU critical section. This | ||
| 3127 | * function will return the correct rightmost descendant as long as @pos is | ||
| 3128 | * accessible. | ||
| 2989 | */ | 3129 | */ |
| 2990 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) | 3130 | struct cgroup *cgroup_rightmost_descendant(struct cgroup *pos) |
| 2991 | { | 3131 | { |
| @@ -3025,6 +3165,11 @@ static struct cgroup *cgroup_leftmost_descendant(struct cgroup *pos) | |||
| 3025 | * | 3165 | * |
| 3026 | * To be used by cgroup_for_each_descendant_post(). Find the next | 3166 | * To be used by cgroup_for_each_descendant_post(). Find the next |
| 3027 | * descendant to visit for post-order traversal of @cgroup's descendants. | 3167 | * descendant to visit for post-order traversal of @cgroup's descendants. |
| 3168 | * | ||
| 3169 | * While this function requires RCU read locking, it doesn't require the | ||
| 3170 | * whole traversal to be contained in a single RCU critical section. This | ||
| 3171 | * function will return the correct next descendant as long as both @pos | ||
| 3172 | * and @cgroup are accessible and @pos is a descendant of @cgroup. | ||
| 3028 | */ | 3173 | */ |
| 3029 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | 3174 | struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, |
| 3030 | struct cgroup *cgroup) | 3175 | struct cgroup *cgroup) |
| @@ -3040,8 +3185,8 @@ struct cgroup *cgroup_next_descendant_post(struct cgroup *pos, | |||
| 3040 | } | 3185 | } |
| 3041 | 3186 | ||
| 3042 | /* if there's an unvisited sibling, visit its leftmost descendant */ | 3187 | /* if there's an unvisited sibling, visit its leftmost descendant */ |
| 3043 | next = list_entry_rcu(pos->sibling.next, struct cgroup, sibling); | 3188 | next = cgroup_next_sibling(pos); |
| 3044 | if (&next->sibling != &pos->parent->children) | 3189 | if (next) |
| 3045 | return cgroup_leftmost_descendant(next); | 3190 | return cgroup_leftmost_descendant(next); |
| 3046 | 3191 | ||
| 3047 | /* no sibling left, visit parent */ | 3192 | /* no sibling left, visit parent */ |
| @@ -3062,7 +3207,7 @@ void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it) | |||
| 3062 | cgroup_enable_task_cg_lists(); | 3207 | cgroup_enable_task_cg_lists(); |
| 3063 | 3208 | ||
| 3064 | read_lock(&css_set_lock); | 3209 | read_lock(&css_set_lock); |
| 3065 | it->cg_link = &cgrp->css_sets; | 3210 | it->cset_link = &cgrp->cset_links; |
| 3066 | cgroup_advance_iter(cgrp, it); | 3211 | cgroup_advance_iter(cgrp, it); |
| 3067 | } | 3212 | } |
| 3068 | 3213 | ||
| @@ -3071,16 +3216,16 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, | |||
| 3071 | { | 3216 | { |
| 3072 | struct task_struct *res; | 3217 | struct task_struct *res; |
| 3073 | struct list_head *l = it->task; | 3218 | struct list_head *l = it->task; |
| 3074 | struct cg_cgroup_link *link; | 3219 | struct cgrp_cset_link *link; |
| 3075 | 3220 | ||
| 3076 | /* If the iterator cg is NULL, we have no tasks */ | 3221 | /* If the iterator cg is NULL, we have no tasks */ |
| 3077 | if (!it->cg_link) | 3222 | if (!it->cset_link) |
| 3078 | return NULL; | 3223 | return NULL; |
| 3079 | res = list_entry(l, struct task_struct, cg_list); | 3224 | res = list_entry(l, struct task_struct, cg_list); |
| 3080 | /* Advance iterator to find next entry */ | 3225 | /* Advance iterator to find next entry */ |
| 3081 | l = l->next; | 3226 | l = l->next; |
| 3082 | link = list_entry(it->cg_link, struct cg_cgroup_link, cgrp_link_list); | 3227 | link = list_entry(it->cset_link, struct cgrp_cset_link, cset_link); |
| 3083 | if (l == &link->cg->tasks) { | 3228 | if (l == &link->cset->tasks) { |
| 3084 | /* We reached the end of this task list - move on to | 3229 | /* We reached the end of this task list - move on to |
| 3085 | * the next cg_cgroup_link */ | 3230 | * the next cg_cgroup_link */ |
| 3086 | cgroup_advance_iter(cgrp, it); | 3231 | cgroup_advance_iter(cgrp, it); |
| @@ -3411,7 +3556,7 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
| 3411 | } | 3556 | } |
| 3412 | } | 3557 | } |
| 3413 | /* entry not found; create a new one */ | 3558 | /* entry not found; create a new one */ |
| 3414 | l = kmalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); | 3559 | l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL); |
| 3415 | if (!l) { | 3560 | if (!l) { |
| 3416 | mutex_unlock(&cgrp->pidlist_mutex); | 3561 | mutex_unlock(&cgrp->pidlist_mutex); |
| 3417 | return l; | 3562 | return l; |
| @@ -3420,8 +3565,6 @@ static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp, | |||
| 3420 | down_write(&l->mutex); | 3565 | down_write(&l->mutex); |
| 3421 | l->key.type = type; | 3566 | l->key.type = type; |
| 3422 | l->key.ns = get_pid_ns(ns); | 3567 | l->key.ns = get_pid_ns(ns); |
| 3423 | l->use_count = 0; /* don't increment here */ | ||
| 3424 | l->list = NULL; | ||
| 3425 | l->owner = cgrp; | 3568 | l->owner = cgrp; |
| 3426 | list_add(&l->links, &cgrp->pidlists); | 3569 | list_add(&l->links, &cgrp->pidlists); |
| 3427 | mutex_unlock(&cgrp->pidlist_mutex); | 3570 | mutex_unlock(&cgrp->pidlist_mutex); |
| @@ -3727,6 +3870,23 @@ static int cgroup_write_notify_on_release(struct cgroup *cgrp, | |||
| 3727 | } | 3870 | } |
| 3728 | 3871 | ||
| 3729 | /* | 3872 | /* |
| 3873 | * When dput() is called asynchronously, if umount has been done and | ||
| 3874 | * then deactivate_super() in cgroup_free_fn() kills the superblock, | ||
| 3875 | * there's a small window that vfs will see the root dentry with non-zero | ||
| 3876 | * refcnt and trigger BUG(). | ||
| 3877 | * | ||
| 3878 | * That's why we hold a reference before dput() and drop it right after. | ||
| 3879 | */ | ||
| 3880 | static void cgroup_dput(struct cgroup *cgrp) | ||
| 3881 | { | ||
| 3882 | struct super_block *sb = cgrp->root->sb; | ||
| 3883 | |||
| 3884 | atomic_inc(&sb->s_active); | ||
| 3885 | dput(cgrp->dentry); | ||
| 3886 | deactivate_super(sb); | ||
| 3887 | } | ||
| 3888 | |||
| 3889 | /* | ||
| 3730 | * Unregister event and free resources. | 3890 | * Unregister event and free resources. |
| 3731 | * | 3891 | * |
| 3732 | * Gets called from workqueue. | 3892 | * Gets called from workqueue. |
| @@ -3746,7 +3906,7 @@ static void cgroup_event_remove(struct work_struct *work) | |||
| 3746 | 3906 | ||
| 3747 | eventfd_ctx_put(event->eventfd); | 3907 | eventfd_ctx_put(event->eventfd); |
| 3748 | kfree(event); | 3908 | kfree(event); |
| 3749 | dput(cgrp->dentry); | 3909 | cgroup_dput(cgrp); |
| 3750 | } | 3910 | } |
| 3751 | 3911 | ||
| 3752 | /* | 3912 | /* |
| @@ -3933,33 +4093,16 @@ static int cgroup_clone_children_write(struct cgroup *cgrp, | |||
| 3933 | return 0; | 4093 | return 0; |
| 3934 | } | 4094 | } |
| 3935 | 4095 | ||
| 3936 | /* | 4096 | static struct cftype cgroup_base_files[] = { |
| 3937 | * for the common functions, 'private' gives the type of file | ||
| 3938 | */ | ||
| 3939 | /* for hysterical raisins, we can't put this on the older files */ | ||
| 3940 | #define CGROUP_FILE_GENERIC_PREFIX "cgroup." | ||
| 3941 | static struct cftype files[] = { | ||
| 3942 | { | ||
| 3943 | .name = "tasks", | ||
| 3944 | .open = cgroup_tasks_open, | ||
| 3945 | .write_u64 = cgroup_tasks_write, | ||
| 3946 | .release = cgroup_pidlist_release, | ||
| 3947 | .mode = S_IRUGO | S_IWUSR, | ||
| 3948 | }, | ||
| 3949 | { | 4097 | { |
| 3950 | .name = CGROUP_FILE_GENERIC_PREFIX "procs", | 4098 | .name = "cgroup.procs", |
| 3951 | .open = cgroup_procs_open, | 4099 | .open = cgroup_procs_open, |
| 3952 | .write_u64 = cgroup_procs_write, | 4100 | .write_u64 = cgroup_procs_write, |
| 3953 | .release = cgroup_pidlist_release, | 4101 | .release = cgroup_pidlist_release, |
| 3954 | .mode = S_IRUGO | S_IWUSR, | 4102 | .mode = S_IRUGO | S_IWUSR, |
| 3955 | }, | 4103 | }, |
| 3956 | { | 4104 | { |
| 3957 | .name = "notify_on_release", | 4105 | .name = "cgroup.event_control", |
| 3958 | .read_u64 = cgroup_read_notify_on_release, | ||
| 3959 | .write_u64 = cgroup_write_notify_on_release, | ||
| 3960 | }, | ||
| 3961 | { | ||
| 3962 | .name = CGROUP_FILE_GENERIC_PREFIX "event_control", | ||
| 3963 | .write_string = cgroup_write_event_control, | 4106 | .write_string = cgroup_write_event_control, |
| 3964 | .mode = S_IWUGO, | 4107 | .mode = S_IWUGO, |
| 3965 | }, | 4108 | }, |
| @@ -3974,9 +4117,29 @@ static struct cftype files[] = { | |||
| 3974 | .flags = CFTYPE_ONLY_ON_ROOT, | 4117 | .flags = CFTYPE_ONLY_ON_ROOT, |
| 3975 | .read_seq_string = cgroup_sane_behavior_show, | 4118 | .read_seq_string = cgroup_sane_behavior_show, |
| 3976 | }, | 4119 | }, |
| 4120 | |||
| 4121 | /* | ||
| 4122 | * Historical crazy stuff. These don't have "cgroup." prefix and | ||
| 4123 | * don't exist if sane_behavior. If you're depending on these, be | ||
| 4124 | * prepared to be burned. | ||
| 4125 | */ | ||
| 4126 | { | ||
| 4127 | .name = "tasks", | ||
| 4128 | .flags = CFTYPE_INSANE, /* use "procs" instead */ | ||
| 4129 | .open = cgroup_tasks_open, | ||
| 4130 | .write_u64 = cgroup_tasks_write, | ||
| 4131 | .release = cgroup_pidlist_release, | ||
| 4132 | .mode = S_IRUGO | S_IWUSR, | ||
| 4133 | }, | ||
| 4134 | { | ||
| 4135 | .name = "notify_on_release", | ||
| 4136 | .flags = CFTYPE_INSANE, | ||
| 4137 | .read_u64 = cgroup_read_notify_on_release, | ||
| 4138 | .write_u64 = cgroup_write_notify_on_release, | ||
| 4139 | }, | ||
| 3977 | { | 4140 | { |
| 3978 | .name = "release_agent", | 4141 | .name = "release_agent", |
| 3979 | .flags = CFTYPE_ONLY_ON_ROOT, | 4142 | .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT, |
| 3980 | .read_seq_string = cgroup_release_agent_show, | 4143 | .read_seq_string = cgroup_release_agent_show, |
| 3981 | .write_string = cgroup_release_agent_write, | 4144 | .write_string = cgroup_release_agent_write, |
| 3982 | .max_write_len = PATH_MAX, | 4145 | .max_write_len = PATH_MAX, |
| @@ -3997,13 +4160,13 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | |||
| 3997 | struct cgroup_subsys *ss; | 4160 | struct cgroup_subsys *ss; |
| 3998 | 4161 | ||
| 3999 | if (base_files) { | 4162 | if (base_files) { |
| 4000 | err = cgroup_addrm_files(cgrp, NULL, files, true); | 4163 | err = cgroup_addrm_files(cgrp, NULL, cgroup_base_files, true); |
| 4001 | if (err < 0) | 4164 | if (err < 0) |
| 4002 | return err; | 4165 | return err; |
| 4003 | } | 4166 | } |
| 4004 | 4167 | ||
| 4005 | /* process cftsets of each subsystem */ | 4168 | /* process cftsets of each subsystem */ |
| 4006 | for_each_subsys(cgrp->root, ss) { | 4169 | for_each_root_subsys(cgrp->root, ss) { |
| 4007 | struct cftype_set *set; | 4170 | struct cftype_set *set; |
| 4008 | if (!test_bit(ss->subsys_id, &subsys_mask)) | 4171 | if (!test_bit(ss->subsys_id, &subsys_mask)) |
| 4009 | continue; | 4172 | continue; |
| @@ -4013,15 +4176,17 @@ static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, | |||
| 4013 | } | 4176 | } |
| 4014 | 4177 | ||
| 4015 | /* This cgroup is ready now */ | 4178 | /* This cgroup is ready now */ |
| 4016 | for_each_subsys(cgrp->root, ss) { | 4179 | for_each_root_subsys(cgrp->root, ss) { |
| 4017 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4180 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 4181 | struct css_id *id = rcu_dereference_protected(css->id, true); | ||
| 4182 | |||
| 4018 | /* | 4183 | /* |
| 4019 | * Update id->css pointer and make this css visible from | 4184 | * Update id->css pointer and make this css visible from |
| 4020 | * CSS ID functions. This pointer will be dereferened | 4185 | * CSS ID functions. This pointer will be dereferened |
| 4021 | * from RCU-read-side without locks. | 4186 | * from RCU-read-side without locks. |
| 4022 | */ | 4187 | */ |
| 4023 | if (css->id) | 4188 | if (id) |
| 4024 | rcu_assign_pointer(css->id->css, css); | 4189 | rcu_assign_pointer(id->css, css); |
| 4025 | } | 4190 | } |
| 4026 | 4191 | ||
| 4027 | return 0; | 4192 | return 0; |
| @@ -4031,12 +4196,16 @@ static void css_dput_fn(struct work_struct *work) | |||
| 4031 | { | 4196 | { |
| 4032 | struct cgroup_subsys_state *css = | 4197 | struct cgroup_subsys_state *css = |
| 4033 | container_of(work, struct cgroup_subsys_state, dput_work); | 4198 | container_of(work, struct cgroup_subsys_state, dput_work); |
| 4034 | struct dentry *dentry = css->cgroup->dentry; | ||
| 4035 | struct super_block *sb = dentry->d_sb; | ||
| 4036 | 4199 | ||
| 4037 | atomic_inc(&sb->s_active); | 4200 | cgroup_dput(css->cgroup); |
| 4038 | dput(dentry); | 4201 | } |
| 4039 | deactivate_super(sb); | 4202 | |
| 4203 | static void css_release(struct percpu_ref *ref) | ||
| 4204 | { | ||
| 4205 | struct cgroup_subsys_state *css = | ||
| 4206 | container_of(ref, struct cgroup_subsys_state, refcnt); | ||
| 4207 | |||
| 4208 | schedule_work(&css->dput_work); | ||
| 4040 | } | 4209 | } |
| 4041 | 4210 | ||
| 4042 | static void init_cgroup_css(struct cgroup_subsys_state *css, | 4211 | static void init_cgroup_css(struct cgroup_subsys_state *css, |
| @@ -4044,10 +4213,9 @@ static void init_cgroup_css(struct cgroup_subsys_state *css, | |||
| 4044 | struct cgroup *cgrp) | 4213 | struct cgroup *cgrp) |
| 4045 | { | 4214 | { |
| 4046 | css->cgroup = cgrp; | 4215 | css->cgroup = cgrp; |
| 4047 | atomic_set(&css->refcnt, 1); | ||
| 4048 | css->flags = 0; | 4216 | css->flags = 0; |
| 4049 | css->id = NULL; | 4217 | css->id = NULL; |
| 4050 | if (cgrp == dummytop) | 4218 | if (cgrp == cgroup_dummy_top) |
| 4051 | css->flags |= CSS_ROOT; | 4219 | css->flags |= CSS_ROOT; |
| 4052 | BUG_ON(cgrp->subsys[ss->subsys_id]); | 4220 | BUG_ON(cgrp->subsys[ss->subsys_id]); |
| 4053 | cgrp->subsys[ss->subsys_id] = css; | 4221 | cgrp->subsys[ss->subsys_id] = css; |
| @@ -4157,7 +4325,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4157 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) | 4325 | if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags)) |
| 4158 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); | 4326 | set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags); |
| 4159 | 4327 | ||
| 4160 | for_each_subsys(root, ss) { | 4328 | for_each_root_subsys(root, ss) { |
| 4161 | struct cgroup_subsys_state *css; | 4329 | struct cgroup_subsys_state *css; |
| 4162 | 4330 | ||
| 4163 | css = ss->css_alloc(cgrp); | 4331 | css = ss->css_alloc(cgrp); |
| @@ -4165,7 +4333,15 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4165 | err = PTR_ERR(css); | 4333 | err = PTR_ERR(css); |
| 4166 | goto err_free_all; | 4334 | goto err_free_all; |
| 4167 | } | 4335 | } |
| 4336 | |||
| 4337 | err = percpu_ref_init(&css->refcnt, css_release); | ||
| 4338 | if (err) { | ||
| 4339 | ss->css_free(cgrp); | ||
| 4340 | goto err_free_all; | ||
| 4341 | } | ||
| 4342 | |||
| 4168 | init_cgroup_css(css, ss, cgrp); | 4343 | init_cgroup_css(css, ss, cgrp); |
| 4344 | |||
| 4169 | if (ss->use_id) { | 4345 | if (ss->use_id) { |
| 4170 | err = alloc_css_id(ss, parent, cgrp); | 4346 | err = alloc_css_id(ss, parent, cgrp); |
| 4171 | if (err) | 4347 | if (err) |
| @@ -4183,20 +4359,21 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4183 | goto err_free_all; | 4359 | goto err_free_all; |
| 4184 | lockdep_assert_held(&dentry->d_inode->i_mutex); | 4360 | lockdep_assert_held(&dentry->d_inode->i_mutex); |
| 4185 | 4361 | ||
| 4362 | cgrp->serial_nr = cgroup_serial_nr_next++; | ||
| 4363 | |||
| 4186 | /* allocation complete, commit to creation */ | 4364 | /* allocation complete, commit to creation */ |
| 4187 | list_add_tail(&cgrp->allcg_node, &root->allcg_list); | ||
| 4188 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); | 4365 | list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); |
| 4189 | root->number_of_cgroups++; | 4366 | root->number_of_cgroups++; |
| 4190 | 4367 | ||
| 4191 | /* each css holds a ref to the cgroup's dentry */ | 4368 | /* each css holds a ref to the cgroup's dentry */ |
| 4192 | for_each_subsys(root, ss) | 4369 | for_each_root_subsys(root, ss) |
| 4193 | dget(dentry); | 4370 | dget(dentry); |
| 4194 | 4371 | ||
| 4195 | /* hold a ref to the parent's dentry */ | 4372 | /* hold a ref to the parent's dentry */ |
| 4196 | dget(parent->dentry); | 4373 | dget(parent->dentry); |
| 4197 | 4374 | ||
| 4198 | /* creation succeeded, notify subsystems */ | 4375 | /* creation succeeded, notify subsystems */ |
| 4199 | for_each_subsys(root, ss) { | 4376 | for_each_root_subsys(root, ss) { |
| 4200 | err = online_css(ss, cgrp); | 4377 | err = online_css(ss, cgrp); |
| 4201 | if (err) | 4378 | if (err) |
| 4202 | goto err_destroy; | 4379 | goto err_destroy; |
| @@ -4221,9 +4398,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, | |||
| 4221 | return 0; | 4398 | return 0; |
| 4222 | 4399 | ||
| 4223 | err_free_all: | 4400 | err_free_all: |
| 4224 | for_each_subsys(root, ss) { | 4401 | for_each_root_subsys(root, ss) { |
| 4225 | if (cgrp->subsys[ss->subsys_id]) | 4402 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 4403 | |||
| 4404 | if (css) { | ||
| 4405 | percpu_ref_cancel_init(&css->refcnt); | ||
| 4226 | ss->css_free(cgrp); | 4406 | ss->css_free(cgrp); |
| 4407 | } | ||
| 4227 | } | 4408 | } |
| 4228 | mutex_unlock(&cgroup_mutex); | 4409 | mutex_unlock(&cgroup_mutex); |
| 4229 | /* Release the reference count that we took on the superblock */ | 4410 | /* Release the reference count that we took on the superblock */ |
| @@ -4251,63 +4432,137 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 4251 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); | 4432 | return cgroup_create(c_parent, dentry, mode | S_IFDIR); |
| 4252 | } | 4433 | } |
| 4253 | 4434 | ||
| 4435 | static void cgroup_css_killed(struct cgroup *cgrp) | ||
| 4436 | { | ||
| 4437 | if (!atomic_dec_and_test(&cgrp->css_kill_cnt)) | ||
| 4438 | return; | ||
| 4439 | |||
| 4440 | /* percpu ref's of all css's are killed, kick off the next step */ | ||
| 4441 | INIT_WORK(&cgrp->destroy_work, cgroup_offline_fn); | ||
| 4442 | schedule_work(&cgrp->destroy_work); | ||
| 4443 | } | ||
| 4444 | |||
| 4445 | static void css_ref_killed_fn(struct percpu_ref *ref) | ||
| 4446 | { | ||
| 4447 | struct cgroup_subsys_state *css = | ||
| 4448 | container_of(ref, struct cgroup_subsys_state, refcnt); | ||
| 4449 | |||
| 4450 | cgroup_css_killed(css->cgroup); | ||
| 4451 | } | ||
| 4452 | |||
| 4453 | /** | ||
| 4454 | * cgroup_destroy_locked - the first stage of cgroup destruction | ||
| 4455 | * @cgrp: cgroup to be destroyed | ||
| 4456 | * | ||
| 4457 | * css's make use of percpu refcnts whose killing latency shouldn't be | ||
| 4458 | * exposed to userland and are RCU protected. Also, cgroup core needs to | ||
| 4459 | * guarantee that css_tryget() won't succeed by the time ->css_offline() is | ||
| 4460 | * invoked. To satisfy all the requirements, destruction is implemented in | ||
| 4461 | * the following two steps. | ||
| 4462 | * | ||
| 4463 | * s1. Verify @cgrp can be destroyed and mark it dying. Remove all | ||
| 4464 | * userland visible parts and start killing the percpu refcnts of | ||
| 4465 | * css's. Set up so that the next stage will be kicked off once all | ||
| 4466 | * the percpu refcnts are confirmed to be killed. | ||
| 4467 | * | ||
| 4468 | * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the | ||
| 4469 | * rest of destruction. Once all cgroup references are gone, the | ||
| 4470 | * cgroup is RCU-freed. | ||
| 4471 | * | ||
| 4472 | * This function implements s1. After this step, @cgrp is gone as far as | ||
| 4473 | * the userland is concerned and a new cgroup with the same name may be | ||
| 4474 | * created. As cgroup doesn't care about the names internally, this | ||
| 4475 | * doesn't cause any problem. | ||
| 4476 | */ | ||
| 4254 | static int cgroup_destroy_locked(struct cgroup *cgrp) | 4477 | static int cgroup_destroy_locked(struct cgroup *cgrp) |
| 4255 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) | 4478 | __releases(&cgroup_mutex) __acquires(&cgroup_mutex) |
| 4256 | { | 4479 | { |
| 4257 | struct dentry *d = cgrp->dentry; | 4480 | struct dentry *d = cgrp->dentry; |
| 4258 | struct cgroup *parent = cgrp->parent; | ||
| 4259 | struct cgroup_event *event, *tmp; | 4481 | struct cgroup_event *event, *tmp; |
| 4260 | struct cgroup_subsys *ss; | 4482 | struct cgroup_subsys *ss; |
| 4483 | struct cgroup *child; | ||
| 4484 | bool empty; | ||
| 4261 | 4485 | ||
| 4262 | lockdep_assert_held(&d->d_inode->i_mutex); | 4486 | lockdep_assert_held(&d->d_inode->i_mutex); |
| 4263 | lockdep_assert_held(&cgroup_mutex); | 4487 | lockdep_assert_held(&cgroup_mutex); |
| 4264 | 4488 | ||
| 4265 | if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) | 4489 | /* |
| 4490 | * css_set_lock synchronizes access to ->cset_links and prevents | ||
| 4491 | * @cgrp from being removed while __put_css_set() is in progress. | ||
| 4492 | */ | ||
| 4493 | read_lock(&css_set_lock); | ||
| 4494 | empty = list_empty(&cgrp->cset_links); | ||
| 4495 | read_unlock(&css_set_lock); | ||
| 4496 | if (!empty) | ||
| 4497 | return -EBUSY; | ||
| 4498 | |||
| 4499 | /* | ||
| 4500 | * Make sure there's no live children. We can't test ->children | ||
| 4501 | * emptiness as dead children linger on it while being destroyed; | ||
| 4502 | * otherwise, "rmdir parent/child parent" may fail with -EBUSY. | ||
| 4503 | */ | ||
| 4504 | empty = true; | ||
| 4505 | rcu_read_lock(); | ||
| 4506 | list_for_each_entry_rcu(child, &cgrp->children, sibling) { | ||
| 4507 | empty = cgroup_is_dead(child); | ||
| 4508 | if (!empty) | ||
| 4509 | break; | ||
| 4510 | } | ||
| 4511 | rcu_read_unlock(); | ||
| 4512 | if (!empty) | ||
| 4266 | return -EBUSY; | 4513 | return -EBUSY; |
| 4267 | 4514 | ||
| 4268 | /* | 4515 | /* |
| 4269 | * Block new css_tryget() by deactivating refcnt and mark @cgrp | 4516 | * Block new css_tryget() by killing css refcnts. cgroup core |
| 4270 | * removed. This makes future css_tryget() and child creation | 4517 | * guarantees that, by the time ->css_offline() is invoked, no new |
| 4271 | * attempts fail thus maintaining the removal conditions verified | 4518 | * css reference will be given out via css_tryget(). We can't |
| 4272 | * above. | 4519 | * simply call percpu_ref_kill() and proceed to offlining css's |
| 4520 | * because percpu_ref_kill() doesn't guarantee that the ref is seen | ||
| 4521 | * as killed on all CPUs on return. | ||
| 4522 | * | ||
| 4523 | * Use percpu_ref_kill_and_confirm() to get notifications as each | ||
| 4524 | * css is confirmed to be seen as killed on all CPUs. The | ||
| 4525 | * notification callback keeps track of the number of css's to be | ||
| 4526 | * killed and schedules cgroup_offline_fn() to perform the rest of | ||
| 4527 | * destruction once the percpu refs of all css's are confirmed to | ||
| 4528 | * be killed. | ||
| 4273 | */ | 4529 | */ |
| 4274 | for_each_subsys(cgrp->root, ss) { | 4530 | atomic_set(&cgrp->css_kill_cnt, 1); |
| 4531 | for_each_root_subsys(cgrp->root, ss) { | ||
| 4275 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; | 4532 | struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; |
| 4276 | 4533 | ||
| 4277 | WARN_ON(atomic_read(&css->refcnt) < 0); | 4534 | /* |
| 4278 | atomic_add(CSS_DEACT_BIAS, &css->refcnt); | 4535 | * Killing would put the base ref, but we need to keep it |
| 4279 | } | 4536 | * alive until after ->css_offline. |
| 4280 | set_bit(CGRP_REMOVED, &cgrp->flags); | 4537 | */ |
| 4538 | percpu_ref_get(&css->refcnt); | ||
| 4281 | 4539 | ||
| 4282 | /* tell subsystems to initate destruction */ | 4540 | atomic_inc(&cgrp->css_kill_cnt); |
| 4283 | for_each_subsys(cgrp->root, ss) | 4541 | percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn); |
| 4284 | offline_css(ss, cgrp); | 4542 | } |
| 4543 | cgroup_css_killed(cgrp); | ||
| 4285 | 4544 | ||
| 4286 | /* | 4545 | /* |
| 4287 | * Put all the base refs. Each css holds an extra reference to the | 4546 | * Mark @cgrp dead. This prevents further task migration and child |
| 4288 | * cgroup's dentry and cgroup removal proceeds regardless of css | 4547 | * creation by disabling cgroup_lock_live_group(). Note that |
| 4289 | * refs. On the last put of each css, whenever that may be, the | 4548 | * CGRP_DEAD assertion is depended upon by cgroup_next_sibling() to |
| 4290 | * extra dentry ref is put so that dentry destruction happens only | 4549 | * resume iteration after dropping RCU read lock. See |
| 4291 | * after all css's are released. | 4550 | * cgroup_next_sibling() for details. |
| 4292 | */ | 4551 | */ |
| 4293 | for_each_subsys(cgrp->root, ss) | 4552 | set_bit(CGRP_DEAD, &cgrp->flags); |
| 4294 | css_put(cgrp->subsys[ss->subsys_id]); | ||
| 4295 | 4553 | ||
| 4554 | /* CGRP_DEAD is set, remove from ->release_list for the last time */ | ||
| 4296 | raw_spin_lock(&release_list_lock); | 4555 | raw_spin_lock(&release_list_lock); |
| 4297 | if (!list_empty(&cgrp->release_list)) | 4556 | if (!list_empty(&cgrp->release_list)) |
| 4298 | list_del_init(&cgrp->release_list); | 4557 | list_del_init(&cgrp->release_list); |
| 4299 | raw_spin_unlock(&release_list_lock); | 4558 | raw_spin_unlock(&release_list_lock); |
| 4300 | 4559 | ||
| 4301 | /* delete this cgroup from parent->children */ | 4560 | /* |
| 4302 | list_del_rcu(&cgrp->sibling); | 4561 | * Remove @cgrp directory. The removal puts the base ref but we |
| 4303 | list_del_init(&cgrp->allcg_node); | 4562 | * aren't quite done with @cgrp yet, so hold onto it. |
| 4304 | 4563 | */ | |
| 4305 | dget(d); | 4564 | dget(d); |
| 4306 | cgroup_d_remove_dir(d); | 4565 | cgroup_d_remove_dir(d); |
| 4307 | dput(d); | ||
| 4308 | |||
| 4309 | set_bit(CGRP_RELEASABLE, &parent->flags); | ||
| 4310 | check_for_release(parent); | ||
| 4311 | 4566 | ||
| 4312 | /* | 4567 | /* |
| 4313 | * Unregister events and notify userspace. | 4568 | * Unregister events and notify userspace. |
| @@ -4322,6 +4577,53 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) | |||
| 4322 | spin_unlock(&cgrp->event_list_lock); | 4577 | spin_unlock(&cgrp->event_list_lock); |
| 4323 | 4578 | ||
| 4324 | return 0; | 4579 | return 0; |
| 4580 | }; | ||
| 4581 | |||
| 4582 | /** | ||
| 4583 | * cgroup_offline_fn - the second step of cgroup destruction | ||
| 4584 | * @work: cgroup->destroy_free_work | ||
| 4585 | * | ||
| 4586 | * This function is invoked from a work item for a cgroup which is being | ||
| 4587 | * destroyed after the percpu refcnts of all css's are guaranteed to be | ||
| 4588 | * seen as killed on all CPUs, and performs the rest of destruction. This | ||
| 4589 | * is the second step of destruction described in the comment above | ||
| 4590 | * cgroup_destroy_locked(). | ||
| 4591 | */ | ||
| 4592 | static void cgroup_offline_fn(struct work_struct *work) | ||
| 4593 | { | ||
| 4594 | struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); | ||
| 4595 | struct cgroup *parent = cgrp->parent; | ||
| 4596 | struct dentry *d = cgrp->dentry; | ||
| 4597 | struct cgroup_subsys *ss; | ||
| 4598 | |||
| 4599 | mutex_lock(&cgroup_mutex); | ||
| 4600 | |||
| 4601 | /* | ||
| 4602 | * css_tryget() is guaranteed to fail now. Tell subsystems to | ||
| 4603 | * initate destruction. | ||
| 4604 | */ | ||
| 4605 | for_each_root_subsys(cgrp->root, ss) | ||
| 4606 | offline_css(ss, cgrp); | ||
| 4607 | |||
| 4608 | /* | ||
| 4609 | * Put the css refs from cgroup_destroy_locked(). Each css holds | ||
| 4610 | * an extra reference to the cgroup's dentry and cgroup removal | ||
| 4611 | * proceeds regardless of css refs. On the last put of each css, | ||
| 4612 | * whenever that may be, the extra dentry ref is put so that dentry | ||
| 4613 | * destruction happens only after all css's are released. | ||
| 4614 | */ | ||
| 4615 | for_each_root_subsys(cgrp->root, ss) | ||
| 4616 | css_put(cgrp->subsys[ss->subsys_id]); | ||
| 4617 | |||
| 4618 | /* delete this cgroup from parent->children */ | ||
| 4619 | list_del_rcu(&cgrp->sibling); | ||
| 4620 | |||
| 4621 | dput(d); | ||
| 4622 | |||
| 4623 | set_bit(CGRP_RELEASABLE, &parent->flags); | ||
| 4624 | check_for_release(parent); | ||
| 4625 | |||
| 4626 | mutex_unlock(&cgroup_mutex); | ||
| 4325 | } | 4627 | } |
| 4326 | 4628 | ||
| 4327 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) | 4629 | static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry) |
| @@ -4361,12 +4663,12 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
| 4361 | cgroup_init_cftsets(ss); | 4663 | cgroup_init_cftsets(ss); |
| 4362 | 4664 | ||
| 4363 | /* Create the top cgroup state for this subsystem */ | 4665 | /* Create the top cgroup state for this subsystem */ |
| 4364 | list_add(&ss->sibling, &rootnode.subsys_list); | 4666 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); |
| 4365 | ss->root = &rootnode; | 4667 | ss->root = &cgroup_dummy_root; |
| 4366 | css = ss->css_alloc(dummytop); | 4668 | css = ss->css_alloc(cgroup_dummy_top); |
| 4367 | /* We don't handle early failures gracefully */ | 4669 | /* We don't handle early failures gracefully */ |
| 4368 | BUG_ON(IS_ERR(css)); | 4670 | BUG_ON(IS_ERR(css)); |
| 4369 | init_cgroup_css(css, ss, dummytop); | 4671 | init_cgroup_css(css, ss, cgroup_dummy_top); |
| 4370 | 4672 | ||
| 4371 | /* Update the init_css_set to contain a subsys | 4673 | /* Update the init_css_set to contain a subsys |
| 4372 | * pointer to this state - since the subsystem is | 4674 | * pointer to this state - since the subsystem is |
| @@ -4381,7 +4683,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) | |||
| 4381 | * need to invoke fork callbacks here. */ | 4683 | * need to invoke fork callbacks here. */ |
| 4382 | BUG_ON(!list_empty(&init_task.tasks)); | 4684 | BUG_ON(!list_empty(&init_task.tasks)); |
| 4383 | 4685 | ||
| 4384 | BUG_ON(online_css(ss, dummytop)); | 4686 | BUG_ON(online_css(ss, cgroup_dummy_top)); |
| 4385 | 4687 | ||
| 4386 | mutex_unlock(&cgroup_mutex); | 4688 | mutex_unlock(&cgroup_mutex); |
| 4387 | 4689 | ||
| @@ -4404,7 +4706,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4404 | struct cgroup_subsys_state *css; | 4706 | struct cgroup_subsys_state *css; |
| 4405 | int i, ret; | 4707 | int i, ret; |
| 4406 | struct hlist_node *tmp; | 4708 | struct hlist_node *tmp; |
| 4407 | struct css_set *cg; | 4709 | struct css_set *cset; |
| 4408 | unsigned long key; | 4710 | unsigned long key; |
| 4409 | 4711 | ||
| 4410 | /* check name and function validity */ | 4712 | /* check name and function validity */ |
| @@ -4427,7 +4729,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4427 | */ | 4729 | */ |
| 4428 | if (ss->module == NULL) { | 4730 | if (ss->module == NULL) { |
| 4429 | /* a sanity check */ | 4731 | /* a sanity check */ |
| 4430 | BUG_ON(subsys[ss->subsys_id] != ss); | 4732 | BUG_ON(cgroup_subsys[ss->subsys_id] != ss); |
| 4431 | return 0; | 4733 | return 0; |
| 4432 | } | 4734 | } |
| 4433 | 4735 | ||
| @@ -4435,26 +4737,26 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4435 | cgroup_init_cftsets(ss); | 4737 | cgroup_init_cftsets(ss); |
| 4436 | 4738 | ||
| 4437 | mutex_lock(&cgroup_mutex); | 4739 | mutex_lock(&cgroup_mutex); |
| 4438 | subsys[ss->subsys_id] = ss; | 4740 | cgroup_subsys[ss->subsys_id] = ss; |
| 4439 | 4741 | ||
| 4440 | /* | 4742 | /* |
| 4441 | * no ss->css_alloc seems to need anything important in the ss | 4743 | * no ss->css_alloc seems to need anything important in the ss |
| 4442 | * struct, so this can happen first (i.e. before the rootnode | 4744 | * struct, so this can happen first (i.e. before the dummy root |
| 4443 | * attachment). | 4745 | * attachment). |
| 4444 | */ | 4746 | */ |
| 4445 | css = ss->css_alloc(dummytop); | 4747 | css = ss->css_alloc(cgroup_dummy_top); |
| 4446 | if (IS_ERR(css)) { | 4748 | if (IS_ERR(css)) { |
| 4447 | /* failure case - need to deassign the subsys[] slot. */ | 4749 | /* failure case - need to deassign the cgroup_subsys[] slot. */ |
| 4448 | subsys[ss->subsys_id] = NULL; | 4750 | cgroup_subsys[ss->subsys_id] = NULL; |
| 4449 | mutex_unlock(&cgroup_mutex); | 4751 | mutex_unlock(&cgroup_mutex); |
| 4450 | return PTR_ERR(css); | 4752 | return PTR_ERR(css); |
| 4451 | } | 4753 | } |
| 4452 | 4754 | ||
| 4453 | list_add(&ss->sibling, &rootnode.subsys_list); | 4755 | list_add(&ss->sibling, &cgroup_dummy_root.subsys_list); |
| 4454 | ss->root = &rootnode; | 4756 | ss->root = &cgroup_dummy_root; |
| 4455 | 4757 | ||
| 4456 | /* our new subsystem will be attached to the dummy hierarchy. */ | 4758 | /* our new subsystem will be attached to the dummy hierarchy. */ |
| 4457 | init_cgroup_css(css, ss, dummytop); | 4759 | init_cgroup_css(css, ss, cgroup_dummy_top); |
| 4458 | /* init_idr must be after init_cgroup_css because it sets css->id. */ | 4760 | /* init_idr must be after init_cgroup_css because it sets css->id. */ |
| 4459 | if (ss->use_id) { | 4761 | if (ss->use_id) { |
| 4460 | ret = cgroup_init_idr(ss, css); | 4762 | ret = cgroup_init_idr(ss, css); |
| @@ -4471,21 +4773,21 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss) | |||
| 4471 | * this is all done under the css_set_lock. | 4773 | * this is all done under the css_set_lock. |
| 4472 | */ | 4774 | */ |
| 4473 | write_lock(&css_set_lock); | 4775 | write_lock(&css_set_lock); |
| 4474 | hash_for_each_safe(css_set_table, i, tmp, cg, hlist) { | 4776 | hash_for_each_safe(css_set_table, i, tmp, cset, hlist) { |
| 4475 | /* skip entries that we already rehashed */ | 4777 | /* skip entries that we already rehashed */ |
| 4476 | if (cg->subsys[ss->subsys_id]) | 4778 | if (cset->subsys[ss->subsys_id]) |
| 4477 | continue; | 4779 | continue; |
| 4478 | /* remove existing entry */ | 4780 | /* remove existing entry */ |
| 4479 | hash_del(&cg->hlist); | 4781 | hash_del(&cset->hlist); |
| 4480 | /* set new value */ | 4782 | /* set new value */ |
| 4481 | cg->subsys[ss->subsys_id] = css; | 4783 | cset->subsys[ss->subsys_id] = css; |
| 4482 | /* recompute hash and restore entry */ | 4784 | /* recompute hash and restore entry */ |
| 4483 | key = css_set_hash(cg->subsys); | 4785 | key = css_set_hash(cset->subsys); |
| 4484 | hash_add(css_set_table, &cg->hlist, key); | 4786 | hash_add(css_set_table, &cset->hlist, key); |
| 4485 | } | 4787 | } |
| 4486 | write_unlock(&css_set_lock); | 4788 | write_unlock(&css_set_lock); |
| 4487 | 4789 | ||
| 4488 | ret = online_css(ss, dummytop); | 4790 | ret = online_css(ss, cgroup_dummy_top); |
| 4489 | if (ret) | 4791 | if (ret) |
| 4490 | goto err_unload; | 4792 | goto err_unload; |
| 4491 | 4793 | ||
| @@ -4511,7 +4813,7 @@ EXPORT_SYMBOL_GPL(cgroup_load_subsys); | |||
| 4511 | */ | 4813 | */ |
| 4512 | void cgroup_unload_subsys(struct cgroup_subsys *ss) | 4814 | void cgroup_unload_subsys(struct cgroup_subsys *ss) |
| 4513 | { | 4815 | { |
| 4514 | struct cg_cgroup_link *link; | 4816 | struct cgrp_cset_link *link; |
| 4515 | 4817 | ||
| 4516 | BUG_ON(ss->module == NULL); | 4818 | BUG_ON(ss->module == NULL); |
| 4517 | 4819 | ||
| @@ -4520,45 +4822,46 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss) | |||
| 4520 | * try_module_get in parse_cgroupfs_options should ensure that it | 4822 | * try_module_get in parse_cgroupfs_options should ensure that it |
| 4521 | * doesn't start being used while we're killing it off. | 4823 | * doesn't start being used while we're killing it off. |
| 4522 | */ | 4824 | */ |
| 4523 | BUG_ON(ss->root != &rootnode); | 4825 | BUG_ON(ss->root != &cgroup_dummy_root); |
| 4524 | 4826 | ||
| 4525 | mutex_lock(&cgroup_mutex); | 4827 | mutex_lock(&cgroup_mutex); |
| 4526 | 4828 | ||
| 4527 | offline_css(ss, dummytop); | 4829 | offline_css(ss, cgroup_dummy_top); |
| 4528 | 4830 | ||
| 4529 | if (ss->use_id) | 4831 | if (ss->use_id) |
| 4530 | idr_destroy(&ss->idr); | 4832 | idr_destroy(&ss->idr); |
| 4531 | 4833 | ||
| 4532 | /* deassign the subsys_id */ | 4834 | /* deassign the subsys_id */ |
| 4533 | subsys[ss->subsys_id] = NULL; | 4835 | cgroup_subsys[ss->subsys_id] = NULL; |
| 4534 | 4836 | ||
| 4535 | /* remove subsystem from rootnode's list of subsystems */ | 4837 | /* remove subsystem from the dummy root's list of subsystems */ |
| 4536 | list_del_init(&ss->sibling); | 4838 | list_del_init(&ss->sibling); |
| 4537 | 4839 | ||
| 4538 | /* | 4840 | /* |
| 4539 | * disentangle the css from all css_sets attached to the dummytop. as | 4841 | * disentangle the css from all css_sets attached to the dummy |
| 4540 | * in loading, we need to pay our respects to the hashtable gods. | 4842 | * top. as in loading, we need to pay our respects to the hashtable |
| 4843 | * gods. | ||
| 4541 | */ | 4844 | */ |
| 4542 | write_lock(&css_set_lock); | 4845 | write_lock(&css_set_lock); |
| 4543 | list_for_each_entry(link, &dummytop->css_sets, cgrp_link_list) { | 4846 | list_for_each_entry(link, &cgroup_dummy_top->cset_links, cset_link) { |
| 4544 | struct css_set *cg = link->cg; | 4847 | struct css_set *cset = link->cset; |
| 4545 | unsigned long key; | 4848 | unsigned long key; |
| 4546 | 4849 | ||
| 4547 | hash_del(&cg->hlist); | 4850 | hash_del(&cset->hlist); |
| 4548 | cg->subsys[ss->subsys_id] = NULL; | 4851 | cset->subsys[ss->subsys_id] = NULL; |
| 4549 | key = css_set_hash(cg->subsys); | 4852 | key = css_set_hash(cset->subsys); |
| 4550 | hash_add(css_set_table, &cg->hlist, key); | 4853 | hash_add(css_set_table, &cset->hlist, key); |
| 4551 | } | 4854 | } |
| 4552 | write_unlock(&css_set_lock); | 4855 | write_unlock(&css_set_lock); |
| 4553 | 4856 | ||
| 4554 | /* | 4857 | /* |
| 4555 | * remove subsystem's css from the dummytop and free it - need to | 4858 | * remove subsystem's css from the cgroup_dummy_top and free it - |
| 4556 | * free before marking as null because ss->css_free needs the | 4859 | * need to free before marking as null because ss->css_free needs |
| 4557 | * cgrp->subsys pointer to find their state. note that this also | 4860 | * the cgrp->subsys pointer to find their state. note that this |
| 4558 | * takes care of freeing the css_id. | 4861 | * also takes care of freeing the css_id. |
| 4559 | */ | 4862 | */ |
| 4560 | ss->css_free(dummytop); | 4863 | ss->css_free(cgroup_dummy_top); |
| 4561 | dummytop->subsys[ss->subsys_id] = NULL; | 4864 | cgroup_dummy_top->subsys[ss->subsys_id] = NULL; |
| 4562 | 4865 | ||
| 4563 | mutex_unlock(&cgroup_mutex); | 4866 | mutex_unlock(&cgroup_mutex); |
| 4564 | } | 4867 | } |
| @@ -4572,30 +4875,25 @@ EXPORT_SYMBOL_GPL(cgroup_unload_subsys); | |||
| 4572 | */ | 4875 | */ |
| 4573 | int __init cgroup_init_early(void) | 4876 | int __init cgroup_init_early(void) |
| 4574 | { | 4877 | { |
| 4878 | struct cgroup_subsys *ss; | ||
| 4575 | int i; | 4879 | int i; |
| 4880 | |||
| 4576 | atomic_set(&init_css_set.refcount, 1); | 4881 | atomic_set(&init_css_set.refcount, 1); |
| 4577 | INIT_LIST_HEAD(&init_css_set.cg_links); | 4882 | INIT_LIST_HEAD(&init_css_set.cgrp_links); |
| 4578 | INIT_LIST_HEAD(&init_css_set.tasks); | 4883 | INIT_LIST_HEAD(&init_css_set.tasks); |
| 4579 | INIT_HLIST_NODE(&init_css_set.hlist); | 4884 | INIT_HLIST_NODE(&init_css_set.hlist); |
| 4580 | css_set_count = 1; | 4885 | css_set_count = 1; |
| 4581 | init_cgroup_root(&rootnode); | 4886 | init_cgroup_root(&cgroup_dummy_root); |
| 4582 | root_count = 1; | 4887 | cgroup_root_count = 1; |
| 4583 | init_task.cgroups = &init_css_set; | 4888 | RCU_INIT_POINTER(init_task.cgroups, &init_css_set); |
| 4584 | 4889 | ||
| 4585 | init_css_set_link.cg = &init_css_set; | 4890 | init_cgrp_cset_link.cset = &init_css_set; |
| 4586 | init_css_set_link.cgrp = dummytop; | 4891 | init_cgrp_cset_link.cgrp = cgroup_dummy_top; |
| 4587 | list_add(&init_css_set_link.cgrp_link_list, | 4892 | list_add(&init_cgrp_cset_link.cset_link, &cgroup_dummy_top->cset_links); |
| 4588 | &rootnode.top_cgroup.css_sets); | 4893 | list_add(&init_cgrp_cset_link.cgrp_link, &init_css_set.cgrp_links); |
| 4589 | list_add(&init_css_set_link.cg_link_list, | ||
| 4590 | &init_css_set.cg_links); | ||
| 4591 | |||
| 4592 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 4593 | struct cgroup_subsys *ss = subsys[i]; | ||
| 4594 | |||
| 4595 | /* at bootup time, we don't worry about modular subsystems */ | ||
| 4596 | if (!ss || ss->module) | ||
| 4597 | continue; | ||
| 4598 | 4894 | ||
| 4895 | /* at bootup time, we don't worry about modular subsystems */ | ||
| 4896 | for_each_builtin_subsys(ss, i) { | ||
| 4599 | BUG_ON(!ss->name); | 4897 | BUG_ON(!ss->name); |
| 4600 | BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); | 4898 | BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN); |
| 4601 | BUG_ON(!ss->css_alloc); | 4899 | BUG_ON(!ss->css_alloc); |
| @@ -4620,30 +4918,33 @@ int __init cgroup_init_early(void) | |||
| 4620 | */ | 4918 | */ |
| 4621 | int __init cgroup_init(void) | 4919 | int __init cgroup_init(void) |
| 4622 | { | 4920 | { |
| 4623 | int err; | 4921 | struct cgroup_subsys *ss; |
| 4624 | int i; | ||
| 4625 | unsigned long key; | 4922 | unsigned long key; |
| 4923 | int i, err; | ||
| 4626 | 4924 | ||
| 4627 | err = bdi_init(&cgroup_backing_dev_info); | 4925 | err = bdi_init(&cgroup_backing_dev_info); |
| 4628 | if (err) | 4926 | if (err) |
| 4629 | return err; | 4927 | return err; |
| 4630 | 4928 | ||
| 4631 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 4929 | for_each_builtin_subsys(ss, i) { |
| 4632 | struct cgroup_subsys *ss = subsys[i]; | ||
| 4633 | |||
| 4634 | /* at bootup time, we don't worry about modular subsystems */ | ||
| 4635 | if (!ss || ss->module) | ||
| 4636 | continue; | ||
| 4637 | if (!ss->early_init) | 4930 | if (!ss->early_init) |
| 4638 | cgroup_init_subsys(ss); | 4931 | cgroup_init_subsys(ss); |
| 4639 | if (ss->use_id) | 4932 | if (ss->use_id) |
| 4640 | cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]); | 4933 | cgroup_init_idr(ss, init_css_set.subsys[ss->subsys_id]); |
| 4641 | } | 4934 | } |
| 4642 | 4935 | ||
| 4936 | /* allocate id for the dummy hierarchy */ | ||
| 4937 | mutex_lock(&cgroup_mutex); | ||
| 4938 | mutex_lock(&cgroup_root_mutex); | ||
| 4939 | |||
| 4643 | /* Add init_css_set to the hash table */ | 4940 | /* Add init_css_set to the hash table */ |
| 4644 | key = css_set_hash(init_css_set.subsys); | 4941 | key = css_set_hash(init_css_set.subsys); |
| 4645 | hash_add(css_set_table, &init_css_set.hlist, key); | 4942 | hash_add(css_set_table, &init_css_set.hlist, key); |
| 4646 | BUG_ON(!init_root_id(&rootnode)); | 4943 | |
| 4944 | BUG_ON(cgroup_init_root_id(&cgroup_dummy_root, 0, 1)); | ||
| 4945 | |||
| 4946 | mutex_unlock(&cgroup_root_mutex); | ||
| 4947 | mutex_unlock(&cgroup_mutex); | ||
| 4647 | 4948 | ||
| 4648 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); | 4949 | cgroup_kobj = kobject_create_and_add("cgroup", fs_kobj); |
| 4649 | if (!cgroup_kobj) { | 4950 | if (!cgroup_kobj) { |
| @@ -4708,7 +5009,7 @@ int proc_cgroup_show(struct seq_file *m, void *v) | |||
| 4708 | int count = 0; | 5009 | int count = 0; |
| 4709 | 5010 | ||
| 4710 | seq_printf(m, "%d:", root->hierarchy_id); | 5011 | seq_printf(m, "%d:", root->hierarchy_id); |
| 4711 | for_each_subsys(root, ss) | 5012 | for_each_root_subsys(root, ss) |
| 4712 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); | 5013 | seq_printf(m, "%s%s", count++ ? "," : "", ss->name); |
| 4713 | if (strlen(root->name)) | 5014 | if (strlen(root->name)) |
| 4714 | seq_printf(m, "%sname=%s", count ? "," : "", | 5015 | seq_printf(m, "%sname=%s", count ? "," : "", |
| @@ -4734,6 +5035,7 @@ out: | |||
| 4734 | /* Display information about each subsystem and each hierarchy */ | 5035 | /* Display information about each subsystem and each hierarchy */ |
| 4735 | static int proc_cgroupstats_show(struct seq_file *m, void *v) | 5036 | static int proc_cgroupstats_show(struct seq_file *m, void *v) |
| 4736 | { | 5037 | { |
| 5038 | struct cgroup_subsys *ss; | ||
| 4737 | int i; | 5039 | int i; |
| 4738 | 5040 | ||
| 4739 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); | 5041 | seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n"); |
| @@ -4743,14 +5045,12 @@ static int proc_cgroupstats_show(struct seq_file *m, void *v) | |||
| 4743 | * subsys/hierarchy state. | 5045 | * subsys/hierarchy state. |
| 4744 | */ | 5046 | */ |
| 4745 | mutex_lock(&cgroup_mutex); | 5047 | mutex_lock(&cgroup_mutex); |
| 4746 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | 5048 | |
| 4747 | struct cgroup_subsys *ss = subsys[i]; | 5049 | for_each_subsys(ss, i) |
| 4748 | if (ss == NULL) | ||
| 4749 | continue; | ||
| 4750 | seq_printf(m, "%s\t%d\t%d\t%d\n", | 5050 | seq_printf(m, "%s\t%d\t%d\t%d\n", |
| 4751 | ss->name, ss->root->hierarchy_id, | 5051 | ss->name, ss->root->hierarchy_id, |
| 4752 | ss->root->number_of_cgroups, !ss->disabled); | 5052 | ss->root->number_of_cgroups, !ss->disabled); |
| 4753 | } | 5053 | |
| 4754 | mutex_unlock(&cgroup_mutex); | 5054 | mutex_unlock(&cgroup_mutex); |
| 4755 | return 0; | 5055 | return 0; |
| 4756 | } | 5056 | } |
| @@ -4786,8 +5086,8 @@ static const struct file_operations proc_cgroupstats_operations = { | |||
| 4786 | void cgroup_fork(struct task_struct *child) | 5086 | void cgroup_fork(struct task_struct *child) |
| 4787 | { | 5087 | { |
| 4788 | task_lock(current); | 5088 | task_lock(current); |
| 5089 | get_css_set(task_css_set(current)); | ||
| 4789 | child->cgroups = current->cgroups; | 5090 | child->cgroups = current->cgroups; |
| 4790 | get_css_set(child->cgroups); | ||
| 4791 | task_unlock(current); | 5091 | task_unlock(current); |
| 4792 | INIT_LIST_HEAD(&child->cg_list); | 5092 | INIT_LIST_HEAD(&child->cg_list); |
| 4793 | } | 5093 | } |
| @@ -4804,6 +5104,7 @@ void cgroup_fork(struct task_struct *child) | |||
| 4804 | */ | 5104 | */ |
| 4805 | void cgroup_post_fork(struct task_struct *child) | 5105 | void cgroup_post_fork(struct task_struct *child) |
| 4806 | { | 5106 | { |
| 5107 | struct cgroup_subsys *ss; | ||
| 4807 | int i; | 5108 | int i; |
| 4808 | 5109 | ||
| 4809 | /* | 5110 | /* |
| @@ -4821,7 +5122,7 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 4821 | write_lock(&css_set_lock); | 5122 | write_lock(&css_set_lock); |
| 4822 | task_lock(child); | 5123 | task_lock(child); |
| 4823 | if (list_empty(&child->cg_list)) | 5124 | if (list_empty(&child->cg_list)) |
| 4824 | list_add(&child->cg_list, &child->cgroups->tasks); | 5125 | list_add(&child->cg_list, &task_css_set(child)->tasks); |
| 4825 | task_unlock(child); | 5126 | task_unlock(child); |
| 4826 | write_unlock(&css_set_lock); | 5127 | write_unlock(&css_set_lock); |
| 4827 | } | 5128 | } |
| @@ -4840,12 +5141,9 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 4840 | * of the array can be freed at module unload, so we | 5141 | * of the array can be freed at module unload, so we |
| 4841 | * can't touch that. | 5142 | * can't touch that. |
| 4842 | */ | 5143 | */ |
| 4843 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | 5144 | for_each_builtin_subsys(ss, i) |
| 4844 | struct cgroup_subsys *ss = subsys[i]; | ||
| 4845 | |||
| 4846 | if (ss->fork) | 5145 | if (ss->fork) |
| 4847 | ss->fork(child); | 5146 | ss->fork(child); |
| 4848 | } | ||
| 4849 | } | 5147 | } |
| 4850 | } | 5148 | } |
| 4851 | 5149 | ||
| @@ -4886,7 +5184,8 @@ void cgroup_post_fork(struct task_struct *child) | |||
| 4886 | */ | 5184 | */ |
| 4887 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) | 5185 | void cgroup_exit(struct task_struct *tsk, int run_callbacks) |
| 4888 | { | 5186 | { |
| 4889 | struct css_set *cg; | 5187 | struct cgroup_subsys *ss; |
| 5188 | struct css_set *cset; | ||
| 4890 | int i; | 5189 | int i; |
| 4891 | 5190 | ||
| 4892 | /* | 5191 | /* |
| @@ -4903,36 +5202,32 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks) | |||
| 4903 | 5202 | ||
| 4904 | /* Reassign the task to the init_css_set. */ | 5203 | /* Reassign the task to the init_css_set. */ |
| 4905 | task_lock(tsk); | 5204 | task_lock(tsk); |
| 4906 | cg = tsk->cgroups; | 5205 | cset = task_css_set(tsk); |
| 4907 | tsk->cgroups = &init_css_set; | 5206 | RCU_INIT_POINTER(tsk->cgroups, &init_css_set); |
| 4908 | 5207 | ||
| 4909 | if (run_callbacks && need_forkexit_callback) { | 5208 | if (run_callbacks && need_forkexit_callback) { |
| 4910 | /* | 5209 | /* |
| 4911 | * fork/exit callbacks are supported only for builtin | 5210 | * fork/exit callbacks are supported only for builtin |
| 4912 | * subsystems, see cgroup_post_fork() for details. | 5211 | * subsystems, see cgroup_post_fork() for details. |
| 4913 | */ | 5212 | */ |
| 4914 | for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) { | 5213 | for_each_builtin_subsys(ss, i) { |
| 4915 | struct cgroup_subsys *ss = subsys[i]; | ||
| 4916 | |||
| 4917 | if (ss->exit) { | 5214 | if (ss->exit) { |
| 4918 | struct cgroup *old_cgrp = | 5215 | struct cgroup *old_cgrp = cset->subsys[i]->cgroup; |
| 4919 | rcu_dereference_raw(cg->subsys[i])->cgroup; | ||
| 4920 | struct cgroup *cgrp = task_cgroup(tsk, i); | 5216 | struct cgroup *cgrp = task_cgroup(tsk, i); |
| 5217 | |||
| 4921 | ss->exit(cgrp, old_cgrp, tsk); | 5218 | ss->exit(cgrp, old_cgrp, tsk); |
| 4922 | } | 5219 | } |
| 4923 | } | 5220 | } |
| 4924 | } | 5221 | } |
| 4925 | task_unlock(tsk); | 5222 | task_unlock(tsk); |
| 4926 | 5223 | ||
| 4927 | put_css_set_taskexit(cg); | 5224 | put_css_set_taskexit(cset); |
| 4928 | } | 5225 | } |
| 4929 | 5226 | ||
| 4930 | static void check_for_release(struct cgroup *cgrp) | 5227 | static void check_for_release(struct cgroup *cgrp) |
| 4931 | { | 5228 | { |
| 4932 | /* All of these checks rely on RCU to keep the cgroup | ||
| 4933 | * structure alive */ | ||
| 4934 | if (cgroup_is_releasable(cgrp) && | 5229 | if (cgroup_is_releasable(cgrp) && |
| 4935 | !atomic_read(&cgrp->count) && list_empty(&cgrp->children)) { | 5230 | list_empty(&cgrp->cset_links) && list_empty(&cgrp->children)) { |
| 4936 | /* | 5231 | /* |
| 4937 | * Control Group is currently removeable. If it's not | 5232 | * Control Group is currently removeable. If it's not |
| 4938 | * already queued for a userspace notification, queue | 5233 | * already queued for a userspace notification, queue |
| @@ -4941,7 +5236,7 @@ static void check_for_release(struct cgroup *cgrp) | |||
| 4941 | int need_schedule_work = 0; | 5236 | int need_schedule_work = 0; |
| 4942 | 5237 | ||
| 4943 | raw_spin_lock(&release_list_lock); | 5238 | raw_spin_lock(&release_list_lock); |
| 4944 | if (!cgroup_is_removed(cgrp) && | 5239 | if (!cgroup_is_dead(cgrp) && |
| 4945 | list_empty(&cgrp->release_list)) { | 5240 | list_empty(&cgrp->release_list)) { |
| 4946 | list_add(&cgrp->release_list, &release_list); | 5241 | list_add(&cgrp->release_list, &release_list); |
| 4947 | need_schedule_work = 1; | 5242 | need_schedule_work = 1; |
| @@ -4952,34 +5247,6 @@ static void check_for_release(struct cgroup *cgrp) | |||
| 4952 | } | 5247 | } |
| 4953 | } | 5248 | } |
| 4954 | 5249 | ||
| 4955 | /* Caller must verify that the css is not for root cgroup */ | ||
| 4956 | bool __css_tryget(struct cgroup_subsys_state *css) | ||
| 4957 | { | ||
| 4958 | while (true) { | ||
| 4959 | int t, v; | ||
| 4960 | |||
| 4961 | v = css_refcnt(css); | ||
| 4962 | t = atomic_cmpxchg(&css->refcnt, v, v + 1); | ||
| 4963 | if (likely(t == v)) | ||
| 4964 | return true; | ||
| 4965 | else if (t < 0) | ||
| 4966 | return false; | ||
| 4967 | cpu_relax(); | ||
| 4968 | } | ||
| 4969 | } | ||
| 4970 | EXPORT_SYMBOL_GPL(__css_tryget); | ||
| 4971 | |||
| 4972 | /* Caller must verify that the css is not for root cgroup */ | ||
| 4973 | void __css_put(struct cgroup_subsys_state *css) | ||
| 4974 | { | ||
| 4975 | int v; | ||
| 4976 | |||
| 4977 | v = css_unbias_refcnt(atomic_dec_return(&css->refcnt)); | ||
| 4978 | if (v == 0) | ||
| 4979 | schedule_work(&css->dput_work); | ||
| 4980 | } | ||
| 4981 | EXPORT_SYMBOL_GPL(__css_put); | ||
| 4982 | |||
| 4983 | /* | 5250 | /* |
| 4984 | * Notify userspace when a cgroup is released, by running the | 5251 | * Notify userspace when a cgroup is released, by running the |
| 4985 | * configured release agent with the name of the cgroup (path | 5252 | * configured release agent with the name of the cgroup (path |
| @@ -5054,23 +5321,19 @@ static void cgroup_release_agent(struct work_struct *work) | |||
| 5054 | 5321 | ||
| 5055 | static int __init cgroup_disable(char *str) | 5322 | static int __init cgroup_disable(char *str) |
| 5056 | { | 5323 | { |
| 5057 | int i; | 5324 | struct cgroup_subsys *ss; |
| 5058 | char *token; | 5325 | char *token; |
| 5326 | int i; | ||
| 5059 | 5327 | ||
| 5060 | while ((token = strsep(&str, ",")) != NULL) { | 5328 | while ((token = strsep(&str, ",")) != NULL) { |
| 5061 | if (!*token) | 5329 | if (!*token) |
| 5062 | continue; | 5330 | continue; |
| 5063 | for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { | ||
| 5064 | struct cgroup_subsys *ss = subsys[i]; | ||
| 5065 | |||
| 5066 | /* | ||
| 5067 | * cgroup_disable, being at boot time, can't | ||
| 5068 | * know about module subsystems, so we don't | ||
| 5069 | * worry about them. | ||
| 5070 | */ | ||
| 5071 | if (!ss || ss->module) | ||
| 5072 | continue; | ||
| 5073 | 5331 | ||
| 5332 | /* | ||
| 5333 | * cgroup_disable, being at boot time, can't know about | ||
| 5334 | * module subsystems, so we don't worry about them. | ||
| 5335 | */ | ||
| 5336 | for_each_builtin_subsys(ss, i) { | ||
| 5074 | if (!strcmp(token, ss->name)) { | 5337 | if (!strcmp(token, ss->name)) { |
| 5075 | ss->disabled = 1; | 5338 | ss->disabled = 1; |
| 5076 | printk(KERN_INFO "Disabling %s control group" | 5339 | printk(KERN_INFO "Disabling %s control group" |
| @@ -5087,9 +5350,7 @@ __setup("cgroup_disable=", cgroup_disable); | |||
| 5087 | * Functons for CSS ID. | 5350 | * Functons for CSS ID. |
| 5088 | */ | 5351 | */ |
| 5089 | 5352 | ||
| 5090 | /* | 5353 | /* to get ID other than 0, this should be called when !cgroup_is_dead() */ |
| 5091 | *To get ID other than 0, this should be called when !cgroup_is_removed(). | ||
| 5092 | */ | ||
| 5093 | unsigned short css_id(struct cgroup_subsys_state *css) | 5354 | unsigned short css_id(struct cgroup_subsys_state *css) |
| 5094 | { | 5355 | { |
| 5095 | struct css_id *cssid; | 5356 | struct css_id *cssid; |
| @@ -5099,7 +5360,7 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
| 5099 | * on this or this is under rcu_read_lock(). Once css->id is allocated, | 5360 | * on this or this is under rcu_read_lock(). Once css->id is allocated, |
| 5100 | * it's unchanged until freed. | 5361 | * it's unchanged until freed. |
| 5101 | */ | 5362 | */ |
| 5102 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); | 5363 | cssid = rcu_dereference_raw(css->id); |
| 5103 | 5364 | ||
| 5104 | if (cssid) | 5365 | if (cssid) |
| 5105 | return cssid->id; | 5366 | return cssid->id; |
| @@ -5107,18 +5368,6 @@ unsigned short css_id(struct cgroup_subsys_state *css) | |||
| 5107 | } | 5368 | } |
| 5108 | EXPORT_SYMBOL_GPL(css_id); | 5369 | EXPORT_SYMBOL_GPL(css_id); |
| 5109 | 5370 | ||
| 5110 | unsigned short css_depth(struct cgroup_subsys_state *css) | ||
| 5111 | { | ||
| 5112 | struct css_id *cssid; | ||
| 5113 | |||
| 5114 | cssid = rcu_dereference_check(css->id, css_refcnt(css)); | ||
| 5115 | |||
| 5116 | if (cssid) | ||
| 5117 | return cssid->depth; | ||
| 5118 | return 0; | ||
| 5119 | } | ||
| 5120 | EXPORT_SYMBOL_GPL(css_depth); | ||
| 5121 | |||
| 5122 | /** | 5371 | /** |
| 5123 | * css_is_ancestor - test "root" css is an ancestor of "child" | 5372 | * css_is_ancestor - test "root" css is an ancestor of "child" |
| 5124 | * @child: the css to be tested. | 5373 | * @child: the css to be tested. |
| @@ -5153,7 +5402,8 @@ bool css_is_ancestor(struct cgroup_subsys_state *child, | |||
| 5153 | 5402 | ||
| 5154 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) | 5403 | void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css) |
| 5155 | { | 5404 | { |
| 5156 | struct css_id *id = css->id; | 5405 | struct css_id *id = rcu_dereference_protected(css->id, true); |
| 5406 | |||
| 5157 | /* When this is called before css_id initialization, id can be NULL */ | 5407 | /* When this is called before css_id initialization, id can be NULL */ |
| 5158 | if (!id) | 5408 | if (!id) |
| 5159 | return; | 5409 | return; |
| @@ -5219,8 +5469,8 @@ static int __init_or_module cgroup_init_idr(struct cgroup_subsys *ss, | |||
| 5219 | return PTR_ERR(newid); | 5469 | return PTR_ERR(newid); |
| 5220 | 5470 | ||
| 5221 | newid->stack[0] = newid->id; | 5471 | newid->stack[0] = newid->id; |
| 5222 | newid->css = rootcss; | 5472 | RCU_INIT_POINTER(newid->css, rootcss); |
| 5223 | rootcss->id = newid; | 5473 | RCU_INIT_POINTER(rootcss->id, newid); |
| 5224 | return 0; | 5474 | return 0; |
| 5225 | } | 5475 | } |
| 5226 | 5476 | ||
| @@ -5234,7 +5484,7 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent, | |||
| 5234 | subsys_id = ss->subsys_id; | 5484 | subsys_id = ss->subsys_id; |
| 5235 | parent_css = parent->subsys[subsys_id]; | 5485 | parent_css = parent->subsys[subsys_id]; |
| 5236 | child_css = child->subsys[subsys_id]; | 5486 | child_css = child->subsys[subsys_id]; |
| 5237 | parent_id = parent_css->id; | 5487 | parent_id = rcu_dereference_protected(parent_css->id, true); |
| 5238 | depth = parent_id->depth + 1; | 5488 | depth = parent_id->depth + 1; |
| 5239 | 5489 | ||
| 5240 | child_id = get_new_cssid(ss, depth); | 5490 | child_id = get_new_cssid(ss, depth); |
| @@ -5299,7 +5549,7 @@ struct cgroup_subsys_state *cgroup_css_from_dir(struct file *f, int id) | |||
| 5299 | } | 5549 | } |
| 5300 | 5550 | ||
| 5301 | #ifdef CONFIG_CGROUP_DEBUG | 5551 | #ifdef CONFIG_CGROUP_DEBUG |
| 5302 | static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont) | 5552 | static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cgrp) |
| 5303 | { | 5553 | { |
| 5304 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); | 5554 | struct cgroup_subsys_state *css = kzalloc(sizeof(*css), GFP_KERNEL); |
| 5305 | 5555 | ||
| @@ -5309,48 +5559,43 @@ static struct cgroup_subsys_state *debug_css_alloc(struct cgroup *cont) | |||
| 5309 | return css; | 5559 | return css; |
| 5310 | } | 5560 | } |
| 5311 | 5561 | ||
| 5312 | static void debug_css_free(struct cgroup *cont) | 5562 | static void debug_css_free(struct cgroup *cgrp) |
| 5313 | { | ||
| 5314 | kfree(cont->subsys[debug_subsys_id]); | ||
| 5315 | } | ||
| 5316 | |||
| 5317 | static u64 cgroup_refcount_read(struct cgroup *cont, struct cftype *cft) | ||
| 5318 | { | 5563 | { |
| 5319 | return atomic_read(&cont->count); | 5564 | kfree(cgrp->subsys[debug_subsys_id]); |
| 5320 | } | 5565 | } |
| 5321 | 5566 | ||
| 5322 | static u64 debug_taskcount_read(struct cgroup *cont, struct cftype *cft) | 5567 | static u64 debug_taskcount_read(struct cgroup *cgrp, struct cftype *cft) |
| 5323 | { | 5568 | { |
| 5324 | return cgroup_task_count(cont); | 5569 | return cgroup_task_count(cgrp); |
| 5325 | } | 5570 | } |
| 5326 | 5571 | ||
| 5327 | static u64 current_css_set_read(struct cgroup *cont, struct cftype *cft) | 5572 | static u64 current_css_set_read(struct cgroup *cgrp, struct cftype *cft) |
| 5328 | { | 5573 | { |
| 5329 | return (u64)(unsigned long)current->cgroups; | 5574 | return (u64)(unsigned long)current->cgroups; |
| 5330 | } | 5575 | } |
| 5331 | 5576 | ||
| 5332 | static u64 current_css_set_refcount_read(struct cgroup *cont, | 5577 | static u64 current_css_set_refcount_read(struct cgroup *cgrp, |
| 5333 | struct cftype *cft) | 5578 | struct cftype *cft) |
| 5334 | { | 5579 | { |
| 5335 | u64 count; | 5580 | u64 count; |
| 5336 | 5581 | ||
| 5337 | rcu_read_lock(); | 5582 | rcu_read_lock(); |
| 5338 | count = atomic_read(¤t->cgroups->refcount); | 5583 | count = atomic_read(&task_css_set(current)->refcount); |
| 5339 | rcu_read_unlock(); | 5584 | rcu_read_unlock(); |
| 5340 | return count; | 5585 | return count; |
| 5341 | } | 5586 | } |
| 5342 | 5587 | ||
| 5343 | static int current_css_set_cg_links_read(struct cgroup *cont, | 5588 | static int current_css_set_cg_links_read(struct cgroup *cgrp, |
| 5344 | struct cftype *cft, | 5589 | struct cftype *cft, |
| 5345 | struct seq_file *seq) | 5590 | struct seq_file *seq) |
| 5346 | { | 5591 | { |
| 5347 | struct cg_cgroup_link *link; | 5592 | struct cgrp_cset_link *link; |
| 5348 | struct css_set *cg; | 5593 | struct css_set *cset; |
| 5349 | 5594 | ||
| 5350 | read_lock(&css_set_lock); | 5595 | read_lock(&css_set_lock); |
| 5351 | rcu_read_lock(); | 5596 | rcu_read_lock(); |
| 5352 | cg = rcu_dereference(current->cgroups); | 5597 | cset = rcu_dereference(current->cgroups); |
| 5353 | list_for_each_entry(link, &cg->cg_links, cg_link_list) { | 5598 | list_for_each_entry(link, &cset->cgrp_links, cgrp_link) { |
| 5354 | struct cgroup *c = link->cgrp; | 5599 | struct cgroup *c = link->cgrp; |
| 5355 | const char *name; | 5600 | const char *name; |
| 5356 | 5601 | ||
| @@ -5367,19 +5612,19 @@ static int current_css_set_cg_links_read(struct cgroup *cont, | |||
| 5367 | } | 5612 | } |
| 5368 | 5613 | ||
| 5369 | #define MAX_TASKS_SHOWN_PER_CSS 25 | 5614 | #define MAX_TASKS_SHOWN_PER_CSS 25 |
| 5370 | static int cgroup_css_links_read(struct cgroup *cont, | 5615 | static int cgroup_css_links_read(struct cgroup *cgrp, |
| 5371 | struct cftype *cft, | 5616 | struct cftype *cft, |
| 5372 | struct seq_file *seq) | 5617 | struct seq_file *seq) |
| 5373 | { | 5618 | { |
| 5374 | struct cg_cgroup_link *link; | 5619 | struct cgrp_cset_link *link; |
| 5375 | 5620 | ||
| 5376 | read_lock(&css_set_lock); | 5621 | read_lock(&css_set_lock); |
| 5377 | list_for_each_entry(link, &cont->css_sets, cgrp_link_list) { | 5622 | list_for_each_entry(link, &cgrp->cset_links, cset_link) { |
| 5378 | struct css_set *cg = link->cg; | 5623 | struct css_set *cset = link->cset; |
| 5379 | struct task_struct *task; | 5624 | struct task_struct *task; |
| 5380 | int count = 0; | 5625 | int count = 0; |
| 5381 | seq_printf(seq, "css_set %p\n", cg); | 5626 | seq_printf(seq, "css_set %p\n", cset); |
| 5382 | list_for_each_entry(task, &cg->tasks, cg_list) { | 5627 | list_for_each_entry(task, &cset->tasks, cg_list) { |
| 5383 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) { | 5628 | if (count++ > MAX_TASKS_SHOWN_PER_CSS) { |
| 5384 | seq_puts(seq, " ...\n"); | 5629 | seq_puts(seq, " ...\n"); |
| 5385 | break; | 5630 | break; |
| @@ -5400,10 +5645,6 @@ static u64 releasable_read(struct cgroup *cgrp, struct cftype *cft) | |||
| 5400 | 5645 | ||
| 5401 | static struct cftype debug_files[] = { | 5646 | static struct cftype debug_files[] = { |
| 5402 | { | 5647 | { |
| 5403 | .name = "cgroup_refcount", | ||
| 5404 | .read_u64 = cgroup_refcount_read, | ||
| 5405 | }, | ||
| 5406 | { | ||
| 5407 | .name = "taskcount", | 5648 | .name = "taskcount", |
| 5408 | .read_u64 = debug_taskcount_read, | 5649 | .read_u64 = debug_taskcount_read, |
| 5409 | }, | 5650 | }, |
