aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/resource_counter.txt4
-rw-r--r--Documentation/feature-removal-schedule.txt11
-rw-r--r--block/blk-cgroup.c45
-rw-r--r--include/linux/cgroup.h81
-rw-r--r--include/linux/res_counter.h2
-rw-r--r--include/net/sock.h12
-rw-r--r--include/net/tcp_memcontrol.h4
-rw-r--r--kernel/cgroup.c564
-rw-r--r--kernel/cgroup_freezer.c11
-rw-r--r--kernel/cpuset.c31
-rw-r--r--kernel/res_counter.c71
-rw-r--r--kernel/sched/core.c16
-rw-r--r--mm/memcontrol.c115
-rw-r--r--net/core/netprio_cgroup.c30
-rw-r--r--net/core/sock.c10
-rw-r--r--net/ipv4/tcp_memcontrol.c77
-rw-r--r--net/sched/cls_cgroup.c31
-rw-r--r--security/device_cgroup.c10
18 files changed, 687 insertions, 438 deletions
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
index 95b24d766eab..f3c4ec3626a2 100644
--- a/Documentation/cgroups/resource_counter.txt
+++ b/Documentation/cgroups/resource_counter.txt
@@ -77,11 +77,11 @@ to work with it.
77 where the charging failed. 77 where the charging failed.
78 78
79 d. int res_counter_charge_locked 79 d. int res_counter_charge_locked
80 (struct res_counter *rc, unsigned long val) 80 (struct res_counter *rc, unsigned long val, bool force)
81 81
82 The same as res_counter_charge(), but it must not acquire/release the 82 The same as res_counter_charge(), but it must not acquire/release the
83 res_counter->lock internally (it must be called with res_counter->lock 83 res_counter->lock internally (it must be called with res_counter->lock
84 held). 84 held). The force parameter indicates whether we can bypass the limit.
85 85
86 e. void res_counter_uncharge[_locked] 86 e. void res_counter_uncharge[_locked]
87 (struct res_counter *rc, unsigned long val) 87 (struct res_counter *rc, unsigned long val)
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index e9abede594e1..1e69a81e99d4 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -556,3 +556,14 @@ Why: The V4L2_CID_VCENTER, V4L2_CID_HCENTER controls have been deprecated
556 There are newer controls (V4L2_CID_PAN*, V4L2_CID_TILT*) that provide 556 There are newer controls (V4L2_CID_PAN*, V4L2_CID_TILT*) that provide
557 similar functionality. 557 similar functionality.
558Who: Sylwester Nawrocki <sylvester.nawrocki@gmail.com> 558Who: Sylwester Nawrocki <sylvester.nawrocki@gmail.com>
559
560----------------------------
561
562What: cgroup option updates via remount
563When: March 2013
564Why: Remount currently allows changing bound subsystems and
565 release_agent. Rebinding is hardly useful as it only works
566 when the hierarchy is empty and release_agent itself should be
567 replaced with conventional fsnotify.
568
569----------------------------
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ea84a23d5e68..126c341955de 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -28,34 +28,12 @@ static LIST_HEAD(blkio_list);
28struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 28struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
29EXPORT_SYMBOL_GPL(blkio_root_cgroup); 29EXPORT_SYMBOL_GPL(blkio_root_cgroup);
30 30
31static struct cgroup_subsys_state *blkiocg_create(struct cgroup *);
32static int blkiocg_can_attach(struct cgroup *, struct cgroup_taskset *);
33static void blkiocg_attach(struct cgroup *, struct cgroup_taskset *);
34static void blkiocg_destroy(struct cgroup *);
35static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
36
37/* for encoding cft->private value on file */ 31/* for encoding cft->private value on file */
38#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val)) 32#define BLKIOFILE_PRIVATE(x, val) (((x) << 16) | (val))
39/* What policy owns the file, proportional or throttle */ 33/* What policy owns the file, proportional or throttle */
40#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff) 34#define BLKIOFILE_POLICY(val) (((val) >> 16) & 0xffff)
41#define BLKIOFILE_ATTR(val) ((val) & 0xffff) 35#define BLKIOFILE_ATTR(val) ((val) & 0xffff)
42 36
43struct cgroup_subsys blkio_subsys = {
44 .name = "blkio",
45 .create = blkiocg_create,
46 .can_attach = blkiocg_can_attach,
47 .attach = blkiocg_attach,
48 .destroy = blkiocg_destroy,
49 .populate = blkiocg_populate,
50#ifdef CONFIG_BLK_CGROUP
51 /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
52 .subsys_id = blkio_subsys_id,
53#endif
54 .use_id = 1,
55 .module = THIS_MODULE,
56};
57EXPORT_SYMBOL_GPL(blkio_subsys);
58
59static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg, 37static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
60 struct blkio_policy_node *pn) 38 struct blkio_policy_node *pn)
61{ 39{
@@ -1537,14 +1515,9 @@ struct cftype blkio_files[] = {
1537 .read_map = blkiocg_file_read_map, 1515 .read_map = blkiocg_file_read_map,
1538 }, 1516 },
1539#endif 1517#endif
1518 { } /* terminate */
1540}; 1519};
1541 1520
1542static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1543{
1544 return cgroup_add_files(cgroup, subsys, blkio_files,
1545 ARRAY_SIZE(blkio_files));
1546}
1547
1548static void blkiocg_destroy(struct cgroup *cgroup) 1521static void blkiocg_destroy(struct cgroup *cgroup)
1549{ 1522{
1550 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 1523 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
@@ -1658,6 +1631,22 @@ static void blkiocg_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
1658 } 1631 }
1659} 1632}
1660 1633
1634struct cgroup_subsys blkio_subsys = {
1635 .name = "blkio",
1636 .create = blkiocg_create,
1637 .can_attach = blkiocg_can_attach,
1638 .attach = blkiocg_attach,
1639 .destroy = blkiocg_destroy,
1640#ifdef CONFIG_BLK_CGROUP
1641 /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
1642 .subsys_id = blkio_subsys_id,
1643#endif
1644 .base_cftypes = blkio_files,
1645 .use_id = 1,
1646 .module = THIS_MODULE,
1647};
1648EXPORT_SYMBOL_GPL(blkio_subsys);
1649
1661void blkio_policy_register(struct blkio_policy_type *blkiop) 1650void blkio_policy_register(struct blkio_policy_type *blkiop)
1662{ 1651{
1663 spin_lock(&blkio_list_lock); 1652 spin_lock(&blkio_list_lock);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 5a85b3415c1b..d3f5fba2c159 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -16,6 +16,7 @@
16#include <linux/prio_heap.h> 16#include <linux/prio_heap.h>
17#include <linux/rwsem.h> 17#include <linux/rwsem.h>
18#include <linux/idr.h> 18#include <linux/idr.h>
19#include <linux/workqueue.h>
19 20
20#ifdef CONFIG_CGROUPS 21#ifdef CONFIG_CGROUPS
21 22
@@ -76,12 +77,16 @@ struct cgroup_subsys_state {
76 unsigned long flags; 77 unsigned long flags;
77 /* ID for this css, if possible */ 78 /* ID for this css, if possible */
78 struct css_id __rcu *id; 79 struct css_id __rcu *id;
80
81 /* Used to put @cgroup->dentry on the last css_put() */
82 struct work_struct dput_work;
79}; 83};
80 84
81/* bits in struct cgroup_subsys_state flags field */ 85/* bits in struct cgroup_subsys_state flags field */
82enum { 86enum {
83 CSS_ROOT, /* This CSS is the root of the subsystem */ 87 CSS_ROOT, /* This CSS is the root of the subsystem */
84 CSS_REMOVED, /* This CSS is dead */ 88 CSS_REMOVED, /* This CSS is dead */
89 CSS_CLEAR_CSS_REFS, /* @ss->__DEPRECATED_clear_css_refs */
85}; 90};
86 91
87/* Caller must verify that the css is not for root cgroup */ 92/* Caller must verify that the css is not for root cgroup */
@@ -115,16 +120,12 @@ static inline bool css_is_removed(struct cgroup_subsys_state *css)
115 * the css has been destroyed. 120 * the css has been destroyed.
116 */ 121 */
117 122
123extern bool __css_tryget(struct cgroup_subsys_state *css);
118static inline bool css_tryget(struct cgroup_subsys_state *css) 124static inline bool css_tryget(struct cgroup_subsys_state *css)
119{ 125{
120 if (test_bit(CSS_ROOT, &css->flags)) 126 if (test_bit(CSS_ROOT, &css->flags))
121 return true; 127 return true;
122 while (!atomic_inc_not_zero(&css->refcnt)) { 128 return __css_tryget(css);
123 if (test_bit(CSS_REMOVED, &css->flags))
124 return false;
125 cpu_relax();
126 }
127 return true;
128} 129}
129 130
130/* 131/*
@@ -132,11 +133,11 @@ static inline bool css_tryget(struct cgroup_subsys_state *css)
132 * css_get() or css_tryget() 133 * css_get() or css_tryget()
133 */ 134 */
134 135
135extern void __css_put(struct cgroup_subsys_state *css, int count); 136extern void __css_put(struct cgroup_subsys_state *css);
136static inline void css_put(struct cgroup_subsys_state *css) 137static inline void css_put(struct cgroup_subsys_state *css)
137{ 138{
138 if (!test_bit(CSS_ROOT, &css->flags)) 139 if (!test_bit(CSS_ROOT, &css->flags))
139 __css_put(css, 1); 140 __css_put(css);
140} 141}
141 142
142/* bits in struct cgroup flags field */ 143/* bits in struct cgroup flags field */
@@ -175,6 +176,7 @@ struct cgroup {
175 */ 176 */
176 struct list_head sibling; /* my parent's children */ 177 struct list_head sibling; /* my parent's children */
177 struct list_head children; /* my children */ 178 struct list_head children; /* my children */
179 struct list_head files; /* my files */
178 180
179 struct cgroup *parent; /* my parent */ 181 struct cgroup *parent; /* my parent */
180 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */ 182 struct dentry __rcu *dentry; /* cgroup fs entry, RCU protected */
@@ -191,6 +193,9 @@ struct cgroup {
191 */ 193 */
192 struct list_head css_sets; 194 struct list_head css_sets;
193 195
196 struct list_head allcg_node; /* cgroupfs_root->allcg_list */
197 struct list_head cft_q_node; /* used during cftype add/rm */
198
194 /* 199 /*
195 * Linked list running through all cgroups that can 200 * Linked list running through all cgroups that can
196 * potentially be reaped by the release agent. Protected by 201 * potentially be reaped by the release agent. Protected by
@@ -275,11 +280,17 @@ struct cgroup_map_cb {
275 * - the 'cftype' of the file is file->f_dentry->d_fsdata 280 * - the 'cftype' of the file is file->f_dentry->d_fsdata
276 */ 281 */
277 282
278#define MAX_CFTYPE_NAME 64 283/* cftype->flags */
284#define CFTYPE_ONLY_ON_ROOT (1U << 0) /* only create on root cg */
285#define CFTYPE_NOT_ON_ROOT (1U << 1) /* don't create onp root cg */
286
287#define MAX_CFTYPE_NAME 64
288
279struct cftype { 289struct cftype {
280 /* 290 /*
281 * By convention, the name should begin with the name of the 291 * By convention, the name should begin with the name of the
282 * subsystem, followed by a period 292 * subsystem, followed by a period. Zero length string indicates
293 * end of cftype array.
283 */ 294 */
284 char name[MAX_CFTYPE_NAME]; 295 char name[MAX_CFTYPE_NAME];
285 int private; 296 int private;
@@ -295,6 +306,9 @@ struct cftype {
295 */ 306 */
296 size_t max_write_len; 307 size_t max_write_len;
297 308
309 /* CFTYPE_* flags */
310 unsigned int flags;
311
298 int (*open)(struct inode *inode, struct file *file); 312 int (*open)(struct inode *inode, struct file *file);
299 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft, 313 ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
300 struct file *file, 314 struct file *file,
@@ -373,6 +387,16 @@ struct cftype {
373 struct eventfd_ctx *eventfd); 387 struct eventfd_ctx *eventfd);
374}; 388};
375 389
390/*
391 * cftype_sets describe cftypes belonging to a subsystem and are chained at
392 * cgroup_subsys->cftsets. Each cftset points to an array of cftypes
393 * terminated by zero length name.
394 */
395struct cftype_set {
396 struct list_head node; /* chained at subsys->cftsets */
397 const struct cftype *cfts;
398};
399
376struct cgroup_scanner { 400struct cgroup_scanner {
377 struct cgroup *cg; 401 struct cgroup *cg;
378 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan); 402 int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
@@ -382,21 +406,8 @@ struct cgroup_scanner {
382 void *data; 406 void *data;
383}; 407};
384 408
385/* 409int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
386 * Add a new file to the given cgroup directory. Should only be 410int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts);
387 * called by subsystems from within a populate() method
388 */
389int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
390 const struct cftype *cft);
391
392/*
393 * Add a set of new files to the given cgroup directory. Should
394 * only be called by subsystems from within a populate() method
395 */
396int cgroup_add_files(struct cgroup *cgrp,
397 struct cgroup_subsys *subsys,
398 const struct cftype cft[],
399 int count);
400 411
401int cgroup_is_removed(const struct cgroup *cgrp); 412int cgroup_is_removed(const struct cgroup *cgrp);
402 413
@@ -461,7 +472,6 @@ struct cgroup_subsys {
461 void (*fork)(struct task_struct *task); 472 void (*fork)(struct task_struct *task);
462 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp, 473 void (*exit)(struct cgroup *cgrp, struct cgroup *old_cgrp,
463 struct task_struct *task); 474 struct task_struct *task);
464 int (*populate)(struct cgroup_subsys *ss, struct cgroup *cgrp);
465 void (*post_clone)(struct cgroup *cgrp); 475 void (*post_clone)(struct cgroup *cgrp);
466 void (*bind)(struct cgroup *root); 476 void (*bind)(struct cgroup *root);
467 477
@@ -474,6 +484,18 @@ struct cgroup_subsys {
474 * (not available in early_init time.) 484 * (not available in early_init time.)
475 */ 485 */
476 bool use_id; 486 bool use_id;
487
488 /*
489 * If %true, cgroup removal will try to clear css refs by retrying
490 * ss->pre_destroy() until there's no css ref left. This behavior
491 * is strictly for backward compatibility and will be removed as
492 * soon as the current user (memcg) is updated.
493 *
494 * If %false, ss->pre_destroy() can't fail and cgroup removal won't
495 * wait for css refs to drop to zero before proceeding.
496 */
497 bool __DEPRECATED_clear_css_refs;
498
477#define MAX_CGROUP_TYPE_NAMELEN 32 499#define MAX_CGROUP_TYPE_NAMELEN 32
478 const char *name; 500 const char *name;
479 501
@@ -500,6 +522,13 @@ struct cgroup_subsys {
500 struct idr idr; 522 struct idr idr;
501 spinlock_t id_lock; 523 spinlock_t id_lock;
502 524
525 /* list of cftype_sets */
526 struct list_head cftsets;
527
528 /* base cftypes, automatically [de]registered with subsys itself */
529 struct cftype *base_cftypes;
530 struct cftype_set base_cftset;
531
503 /* should be defined only by modular subsystems */ 532 /* should be defined only by modular subsystems */
504 struct module *module; 533 struct module *module;
505}; 534};
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
index da81af086eaf..fb201896a8b0 100644
--- a/include/linux/res_counter.h
+++ b/include/linux/res_counter.h
@@ -116,7 +116,7 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent);
116 */ 116 */
117 117
118int __must_check res_counter_charge_locked(struct res_counter *counter, 118int __must_check res_counter_charge_locked(struct res_counter *counter,
119 unsigned long val); 119 unsigned long val, bool force);
120int __must_check res_counter_charge(struct res_counter *counter, 120int __must_check res_counter_charge(struct res_counter *counter,
121 unsigned long val, struct res_counter **limit_fail_at); 121 unsigned long val, struct res_counter **limit_fail_at);
122int __must_check res_counter_charge_nofail(struct res_counter *counter, 122int __must_check res_counter_charge_nofail(struct res_counter *counter,
diff --git a/include/net/sock.h b/include/net/sock.h
index da931555e000..d89f0582b6b6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -70,16 +70,16 @@
70struct cgroup; 70struct cgroup;
71struct cgroup_subsys; 71struct cgroup_subsys;
72#ifdef CONFIG_NET 72#ifdef CONFIG_NET
73int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss); 73int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
74void mem_cgroup_sockets_destroy(struct cgroup *cgrp); 74void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg);
75#else 75#else
76static inline 76static inline
77int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 77int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
78{ 78{
79 return 0; 79 return 0;
80} 80}
81static inline 81static inline
82void mem_cgroup_sockets_destroy(struct cgroup *cgrp) 82void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
83{ 83{
84} 84}
85#endif 85#endif
@@ -914,9 +914,9 @@ struct proto {
914 * This function has to setup any files the protocol want to 914 * This function has to setup any files the protocol want to
915 * appear in the kmem cgroup filesystem. 915 * appear in the kmem cgroup filesystem.
916 */ 916 */
917 int (*init_cgroup)(struct cgroup *cgrp, 917 int (*init_cgroup)(struct mem_cgroup *memcg,
918 struct cgroup_subsys *ss); 918 struct cgroup_subsys *ss);
919 void (*destroy_cgroup)(struct cgroup *cgrp); 919 void (*destroy_cgroup)(struct mem_cgroup *memcg);
920 struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg); 920 struct cg_proto *(*proto_cgroup)(struct mem_cgroup *memcg);
921#endif 921#endif
922}; 922};
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
index 48410ff25c9e..7df18bc43a97 100644
--- a/include/net/tcp_memcontrol.h
+++ b/include/net/tcp_memcontrol.h
@@ -12,8 +12,8 @@ struct tcp_memcontrol {
12}; 12};
13 13
14struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); 14struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
15int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); 15int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss);
16void tcp_destroy_cgroup(struct cgroup *cgrp); 16void tcp_destroy_cgroup(struct mem_cgroup *memcg);
17unsigned long long tcp_max_memory(const struct mem_cgroup *memcg); 17unsigned long long tcp_max_memory(const struct mem_cgroup *memcg);
18void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx); 18void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx);
19#endif /* _TCP_MEMCG_H */ 19#endif /* _TCP_MEMCG_H */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ed64ccac67c9..ad8eae5bb801 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -60,9 +60,13 @@
60#include <linux/eventfd.h> 60#include <linux/eventfd.h>
61#include <linux/poll.h> 61#include <linux/poll.h>
62#include <linux/flex_array.h> /* used in cgroup_attach_proc */ 62#include <linux/flex_array.h> /* used in cgroup_attach_proc */
63#include <linux/kthread.h>
63 64
64#include <linux/atomic.h> 65#include <linux/atomic.h>
65 66
67/* css deactivation bias, makes css->refcnt negative to deny new trygets */
68#define CSS_DEACT_BIAS INT_MIN
69
66/* 70/*
67 * cgroup_mutex is the master lock. Any modification to cgroup or its 71 * cgroup_mutex is the master lock. Any modification to cgroup or its
68 * hierarchy must be performed while holding it. 72 * hierarchy must be performed while holding it.
@@ -127,6 +131,9 @@ struct cgroupfs_root {
127 /* A list running through the active hierarchies */ 131 /* A list running through the active hierarchies */
128 struct list_head root_list; 132 struct list_head root_list;
129 133
134 /* All cgroups on this root, cgroup_mutex protected */
135 struct list_head allcg_list;
136
130 /* Hierarchy-specific flags */ 137 /* Hierarchy-specific flags */
131 unsigned long flags; 138 unsigned long flags;
132 139
@@ -145,6 +152,15 @@ struct cgroupfs_root {
145static struct cgroupfs_root rootnode; 152static struct cgroupfs_root rootnode;
146 153
147/* 154/*
155 * cgroupfs file entry, pointed to from leaf dentry->d_fsdata.
156 */
157struct cfent {
158 struct list_head node;
159 struct dentry *dentry;
160 struct cftype *type;
161};
162
163/*
148 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when 164 * CSS ID -- ID per subsys's Cgroup Subsys State(CSS). used only when
149 * cgroup_subsys->use_id != 0. 165 * cgroup_subsys->use_id != 0.
150 */ 166 */
@@ -239,6 +255,14 @@ int cgroup_lock_is_held(void)
239 255
240EXPORT_SYMBOL_GPL(cgroup_lock_is_held); 256EXPORT_SYMBOL_GPL(cgroup_lock_is_held);
241 257
258/* the current nr of refs, always >= 0 whether @css is deactivated or not */
259static int css_refcnt(struct cgroup_subsys_state *css)
260{
261 int v = atomic_read(&css->refcnt);
262
263 return v >= 0 ? v : v - CSS_DEACT_BIAS;
264}
265
242/* convenient tests for these bits */ 266/* convenient tests for these bits */
243inline int cgroup_is_removed(const struct cgroup *cgrp) 267inline int cgroup_is_removed(const struct cgroup *cgrp)
244{ 268{
@@ -279,6 +303,21 @@ list_for_each_entry(_ss, &_root->subsys_list, sibling)
279#define for_each_active_root(_root) \ 303#define for_each_active_root(_root) \
280list_for_each_entry(_root, &roots, root_list) 304list_for_each_entry(_root, &roots, root_list)
281 305
306static inline struct cgroup *__d_cgrp(struct dentry *dentry)
307{
308 return dentry->d_fsdata;
309}
310
311static inline struct cfent *__d_cfe(struct dentry *dentry)
312{
313 return dentry->d_fsdata;
314}
315
316static inline struct cftype *__d_cft(struct dentry *dentry)
317{
318 return __d_cfe(dentry)->type;
319}
320
282/* the list of cgroups eligible for automatic release. Protected by 321/* the list of cgroups eligible for automatic release. Protected by
283 * release_list_lock */ 322 * release_list_lock */
284static LIST_HEAD(release_list); 323static LIST_HEAD(release_list);
@@ -816,12 +855,17 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp)
816 struct cgroup_subsys *ss; 855 struct cgroup_subsys *ss;
817 int ret = 0; 856 int ret = 0;
818 857
819 for_each_subsys(cgrp->root, ss) 858 for_each_subsys(cgrp->root, ss) {
820 if (ss->pre_destroy) { 859 if (!ss->pre_destroy)
821 ret = ss->pre_destroy(cgrp); 860 continue;
822 if (ret) 861
823 break; 862 ret = ss->pre_destroy(cgrp);
863 if (ret) {
864 /* ->pre_destroy() failure is being deprecated */
865 WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs);
866 break;
824 } 867 }
868 }
825 869
826 return ret; 870 return ret;
827} 871}
@@ -864,6 +908,14 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
864 BUG_ON(!list_empty(&cgrp->pidlists)); 908 BUG_ON(!list_empty(&cgrp->pidlists));
865 909
866 kfree_rcu(cgrp, rcu_head); 910 kfree_rcu(cgrp, rcu_head);
911 } else {
912 struct cfent *cfe = __d_cfe(dentry);
913 struct cgroup *cgrp = dentry->d_parent->d_fsdata;
914
915 WARN_ONCE(!list_empty(&cfe->node) &&
916 cgrp != &cgrp->root->top_cgroup,
917 "cfe still linked for %s\n", cfe->type->name);
918 kfree(cfe);
867 } 919 }
868 iput(inode); 920 iput(inode);
869} 921}
@@ -882,34 +934,36 @@ static void remove_dir(struct dentry *d)
882 dput(parent); 934 dput(parent);
883} 935}
884 936
885static void cgroup_clear_directory(struct dentry *dentry) 937static int cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
886{ 938{
887 struct list_head *node; 939 struct cfent *cfe;
888 940
889 BUG_ON(!mutex_is_locked(&dentry->d_inode->i_mutex)); 941 lockdep_assert_held(&cgrp->dentry->d_inode->i_mutex);
890 spin_lock(&dentry->d_lock); 942 lockdep_assert_held(&cgroup_mutex);
891 node = dentry->d_subdirs.next; 943
892 while (node != &dentry->d_subdirs) { 944 list_for_each_entry(cfe, &cgrp->files, node) {
893 struct dentry *d = list_entry(node, struct dentry, d_u.d_child); 945 struct dentry *d = cfe->dentry;
894 946
895 spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED); 947 if (cft && cfe->type != cft)
896 list_del_init(node); 948 continue;
897 if (d->d_inode) { 949
898 /* This should never be called on a cgroup 950 dget(d);
899 * directory with child cgroups */ 951 d_delete(d);
900 BUG_ON(d->d_inode->i_mode & S_IFDIR); 952 simple_unlink(d->d_inode, d);
901 dget_dlock(d); 953 list_del_init(&cfe->node);
902 spin_unlock(&d->d_lock); 954 dput(d);
903 spin_unlock(&dentry->d_lock); 955
904 d_delete(d); 956 return 0;
905 simple_unlink(dentry->d_inode, d);
906 dput(d);
907 spin_lock(&dentry->d_lock);
908 } else
909 spin_unlock(&d->d_lock);
910 node = dentry->d_subdirs.next;
911 } 957 }
912 spin_unlock(&dentry->d_lock); 958 return -ENOENT;
959}
960
961static void cgroup_clear_directory(struct dentry *dir)
962{
963 struct cgroup *cgrp = __d_cgrp(dir);
964
965 while (!list_empty(&cgrp->files))
966 cgroup_rm_file(cgrp, NULL);
913} 967}
914 968
915/* 969/*
@@ -1294,6 +1348,11 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1294 if (ret) 1348 if (ret)
1295 goto out_unlock; 1349 goto out_unlock;
1296 1350
1351 /* See feature-removal-schedule.txt */
1352 if (opts.subsys_bits != root->actual_subsys_bits || opts.release_agent)
1353 pr_warning("cgroup: option changes via remount are deprecated (pid=%d comm=%s)\n",
1354 task_tgid_nr(current), current->comm);
1355
1297 /* Don't allow flags or name to change at remount */ 1356 /* Don't allow flags or name to change at remount */
1298 if (opts.flags != root->flags || 1357 if (opts.flags != root->flags ||
1299 (opts.name && strcmp(opts.name, root->name))) { 1358 (opts.name && strcmp(opts.name, root->name))) {
@@ -1308,7 +1367,8 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data)
1308 goto out_unlock; 1367 goto out_unlock;
1309 } 1368 }
1310 1369
1311 /* (re)populate subsystem files */ 1370 /* clear out any existing files and repopulate subsystem files */
1371 cgroup_clear_directory(cgrp->dentry);
1312 cgroup_populate_dir(cgrp); 1372 cgroup_populate_dir(cgrp);
1313 1373
1314 if (opts.release_agent) 1374 if (opts.release_agent)
@@ -1333,6 +1393,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1333{ 1393{
1334 INIT_LIST_HEAD(&cgrp->sibling); 1394 INIT_LIST_HEAD(&cgrp->sibling);
1335 INIT_LIST_HEAD(&cgrp->children); 1395 INIT_LIST_HEAD(&cgrp->children);
1396 INIT_LIST_HEAD(&cgrp->files);
1336 INIT_LIST_HEAD(&cgrp->css_sets); 1397 INIT_LIST_HEAD(&cgrp->css_sets);
1337 INIT_LIST_HEAD(&cgrp->release_list); 1398 INIT_LIST_HEAD(&cgrp->release_list);
1338 INIT_LIST_HEAD(&cgrp->pidlists); 1399 INIT_LIST_HEAD(&cgrp->pidlists);
@@ -1344,11 +1405,14 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
1344static void init_cgroup_root(struct cgroupfs_root *root) 1405static void init_cgroup_root(struct cgroupfs_root *root)
1345{ 1406{
1346 struct cgroup *cgrp = &root->top_cgroup; 1407 struct cgroup *cgrp = &root->top_cgroup;
1408
1347 INIT_LIST_HEAD(&root->subsys_list); 1409 INIT_LIST_HEAD(&root->subsys_list);
1348 INIT_LIST_HEAD(&root->root_list); 1410 INIT_LIST_HEAD(&root->root_list);
1411 INIT_LIST_HEAD(&root->allcg_list);
1349 root->number_of_cgroups = 1; 1412 root->number_of_cgroups = 1;
1350 cgrp->root = root; 1413 cgrp->root = root;
1351 cgrp->top_cgroup = cgrp; 1414 cgrp->top_cgroup = cgrp;
1415 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
1352 init_cgroup_housekeeping(cgrp); 1416 init_cgroup_housekeeping(cgrp);
1353} 1417}
1354 1418
@@ -1692,16 +1756,6 @@ static struct file_system_type cgroup_fs_type = {
1692 1756
1693static struct kobject *cgroup_kobj; 1757static struct kobject *cgroup_kobj;
1694 1758
1695static inline struct cgroup *__d_cgrp(struct dentry *dentry)
1696{
1697 return dentry->d_fsdata;
1698}
1699
1700static inline struct cftype *__d_cft(struct dentry *dentry)
1701{
1702 return dentry->d_fsdata;
1703}
1704
1705/** 1759/**
1706 * cgroup_path - generate the path of a cgroup 1760 * cgroup_path - generate the path of a cgroup
1707 * @cgrp: the cgroup in question 1761 * @cgrp: the cgroup in question
@@ -2172,6 +2226,18 @@ retry_find_task:
2172 2226
2173 if (threadgroup) 2227 if (threadgroup)
2174 tsk = tsk->group_leader; 2228 tsk = tsk->group_leader;
2229
2230 /*
2231 * Workqueue threads may acquire PF_THREAD_BOUND and become
2232 * trapped in a cpuset, or RT worker may be born in a cgroup
2233 * with no rt_runtime allocated. Just say no.
2234 */
2235 if (tsk == kthreadd_task || (tsk->flags & PF_THREAD_BOUND)) {
2236 ret = -EINVAL;
2237 rcu_read_unlock();
2238 goto out_unlock_cgroup;
2239 }
2240
2175 get_task_struct(tsk); 2241 get_task_struct(tsk);
2176 rcu_read_unlock(); 2242 rcu_read_unlock();
2177 2243
@@ -2603,50 +2669,191 @@ static umode_t cgroup_file_mode(const struct cftype *cft)
2603 return mode; 2669 return mode;
2604} 2670}
2605 2671
2606int cgroup_add_file(struct cgroup *cgrp, 2672static int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2607 struct cgroup_subsys *subsys, 2673 const struct cftype *cft)
2608 const struct cftype *cft)
2609{ 2674{
2610 struct dentry *dir = cgrp->dentry; 2675 struct dentry *dir = cgrp->dentry;
2676 struct cgroup *parent = __d_cgrp(dir);
2611 struct dentry *dentry; 2677 struct dentry *dentry;
2678 struct cfent *cfe;
2612 int error; 2679 int error;
2613 umode_t mode; 2680 umode_t mode;
2614
2615 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 }; 2681 char name[MAX_CGROUP_TYPE_NAMELEN + MAX_CFTYPE_NAME + 2] = { 0 };
2682
2683 /* does @cft->flags tell us to skip creation on @cgrp? */
2684 if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgrp->parent)
2685 return 0;
2686 if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgrp->parent)
2687 return 0;
2688
2616 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) { 2689 if (subsys && !test_bit(ROOT_NOPREFIX, &cgrp->root->flags)) {
2617 strcpy(name, subsys->name); 2690 strcpy(name, subsys->name);
2618 strcat(name, "."); 2691 strcat(name, ".");
2619 } 2692 }
2620 strcat(name, cft->name); 2693 strcat(name, cft->name);
2694
2621 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex)); 2695 BUG_ON(!mutex_is_locked(&dir->d_inode->i_mutex));
2696
2697 cfe = kzalloc(sizeof(*cfe), GFP_KERNEL);
2698 if (!cfe)
2699 return -ENOMEM;
2700
2622 dentry = lookup_one_len(name, dir, strlen(name)); 2701 dentry = lookup_one_len(name, dir, strlen(name));
2623 if (!IS_ERR(dentry)) { 2702 if (IS_ERR(dentry)) {
2624 mode = cgroup_file_mode(cft);
2625 error = cgroup_create_file(dentry, mode | S_IFREG,
2626 cgrp->root->sb);
2627 if (!error)
2628 dentry->d_fsdata = (void *)cft;
2629 dput(dentry);
2630 } else
2631 error = PTR_ERR(dentry); 2703 error = PTR_ERR(dentry);
2704 goto out;
2705 }
2706
2707 mode = cgroup_file_mode(cft);
2708 error = cgroup_create_file(dentry, mode | S_IFREG, cgrp->root->sb);
2709 if (!error) {
2710 cfe->type = (void *)cft;
2711 cfe->dentry = dentry;
2712 dentry->d_fsdata = cfe;
2713 list_add_tail(&cfe->node, &parent->files);
2714 cfe = NULL;
2715 }
2716 dput(dentry);
2717out:
2718 kfree(cfe);
2632 return error; 2719 return error;
2633} 2720}
2634EXPORT_SYMBOL_GPL(cgroup_add_file);
2635 2721
2636int cgroup_add_files(struct cgroup *cgrp, 2722static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys,
2637 struct cgroup_subsys *subsys, 2723 const struct cftype cfts[], bool is_add)
2638 const struct cftype cft[],
2639 int count)
2640{ 2724{
2641 int i, err; 2725 const struct cftype *cft;
2642 for (i = 0; i < count; i++) { 2726 int err, ret = 0;
2643 err = cgroup_add_file(cgrp, subsys, &cft[i]); 2727
2644 if (err) 2728 for (cft = cfts; cft->name[0] != '\0'; cft++) {
2645 return err; 2729 if (is_add)
2730 err = cgroup_add_file(cgrp, subsys, cft);
2731 else
2732 err = cgroup_rm_file(cgrp, cft);
2733 if (err) {
2734 pr_warning("cgroup_addrm_files: failed to %s %s, err=%d\n",
2735 is_add ? "add" : "remove", cft->name, err);
2736 ret = err;
2737 }
2738 }
2739 return ret;
2740}
2741
2742static DEFINE_MUTEX(cgroup_cft_mutex);
2743
2744static void cgroup_cfts_prepare(void)
2745 __acquires(&cgroup_cft_mutex) __acquires(&cgroup_mutex)
2746{
2747 /*
2748 * Thanks to the entanglement with vfs inode locking, we can't walk
2749 * the existing cgroups under cgroup_mutex and create files.
2750 * Instead, we increment reference on all cgroups and build list of
2751 * them using @cgrp->cft_q_node. Grab cgroup_cft_mutex to ensure
2752 * exclusive access to the field.
2753 */
2754 mutex_lock(&cgroup_cft_mutex);
2755 mutex_lock(&cgroup_mutex);
2756}
2757
2758static void cgroup_cfts_commit(struct cgroup_subsys *ss,
2759 const struct cftype *cfts, bool is_add)
2760 __releases(&cgroup_mutex) __releases(&cgroup_cft_mutex)
2761{
2762 LIST_HEAD(pending);
2763 struct cgroup *cgrp, *n;
2764
2765 /* %NULL @cfts indicates abort and don't bother if @ss isn't attached */
2766 if (cfts && ss->root != &rootnode) {
2767 list_for_each_entry(cgrp, &ss->root->allcg_list, allcg_node) {
2768 dget(cgrp->dentry);
2769 list_add_tail(&cgrp->cft_q_node, &pending);
2770 }
2771 }
2772
2773 mutex_unlock(&cgroup_mutex);
2774
2775 /*
2776 * All new cgroups will see @cfts update on @ss->cftsets. Add/rm
2777 * files for all cgroups which were created before.
2778 */
2779 list_for_each_entry_safe(cgrp, n, &pending, cft_q_node) {
2780 struct inode *inode = cgrp->dentry->d_inode;
2781
2782 mutex_lock(&inode->i_mutex);
2783 mutex_lock(&cgroup_mutex);
2784 if (!cgroup_is_removed(cgrp))
2785 cgroup_addrm_files(cgrp, ss, cfts, is_add);
2786 mutex_unlock(&cgroup_mutex);
2787 mutex_unlock(&inode->i_mutex);
2788
2789 list_del_init(&cgrp->cft_q_node);
2790 dput(cgrp->dentry);
2646 } 2791 }
2792
2793 mutex_unlock(&cgroup_cft_mutex);
2794}
2795
2796/**
2797 * cgroup_add_cftypes - add an array of cftypes to a subsystem
2798 * @ss: target cgroup subsystem
2799 * @cfts: zero-length name terminated array of cftypes
2800 *
2801 * Register @cfts to @ss. Files described by @cfts are created for all
2802 * existing cgroups to which @ss is attached and all future cgroups will
2803 * have them too. This function can be called anytime whether @ss is
2804 * attached or not.
2805 *
2806 * Returns 0 on successful registration, -errno on failure. Note that this
2807 * function currently returns 0 as long as @cfts registration is successful
2808 * even if some file creation attempts on existing cgroups fail.
2809 */
2810int cgroup_add_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts)
2811{
2812 struct cftype_set *set;
2813
2814 set = kzalloc(sizeof(*set), GFP_KERNEL);
2815 if (!set)
2816 return -ENOMEM;
2817
2818 cgroup_cfts_prepare();
2819 set->cfts = cfts;
2820 list_add_tail(&set->node, &ss->cftsets);
2821 cgroup_cfts_commit(ss, cfts, true);
2822
2647 return 0; 2823 return 0;
2648} 2824}
2649EXPORT_SYMBOL_GPL(cgroup_add_files); 2825EXPORT_SYMBOL_GPL(cgroup_add_cftypes);
2826
2827/**
2828 * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
2829 * @ss: target cgroup subsystem
2830 * @cfts: zero-length name terminated array of cftypes
2831 *
2832 * Unregister @cfts from @ss. Files described by @cfts are removed from
2833 * all existing cgroups to which @ss is attached and all future cgroups
2834 * won't have them either. This function can be called anytime whether @ss
2835 * is attached or not.
2836 *
2837 * Returns 0 on successful unregistration, -ENOENT if @cfts is not
2838 * registered with @ss.
2839 */
2840int cgroup_rm_cftypes(struct cgroup_subsys *ss, const struct cftype *cfts)
2841{
2842 struct cftype_set *set;
2843
2844 cgroup_cfts_prepare();
2845
2846 list_for_each_entry(set, &ss->cftsets, node) {
2847 if (set->cfts == cfts) {
2848 list_del_init(&set->node);
2849 cgroup_cfts_commit(ss, cfts, false);
2850 return 0;
2851 }
2852 }
2853
2854 cgroup_cfts_commit(ss, NULL, false);
2855 return -ENOENT;
2856}
2650 2857
2651/** 2858/**
2652 * cgroup_task_count - count the number of tasks in a cgroup. 2859 * cgroup_task_count - count the number of tasks in a cgroup.
@@ -3625,13 +3832,14 @@ static struct cftype files[] = {
3625 .read_u64 = cgroup_clone_children_read, 3832 .read_u64 = cgroup_clone_children_read,
3626 .write_u64 = cgroup_clone_children_write, 3833 .write_u64 = cgroup_clone_children_write,
3627 }, 3834 },
3628}; 3835 {
3629 3836 .name = "release_agent",
3630static struct cftype cft_release_agent = { 3837 .flags = CFTYPE_ONLY_ON_ROOT,
3631 .name = "release_agent", 3838 .read_seq_string = cgroup_release_agent_show,
3632 .read_seq_string = cgroup_release_agent_show, 3839 .write_string = cgroup_release_agent_write,
3633 .write_string = cgroup_release_agent_write, 3840 .max_write_len = PATH_MAX,
3634 .max_write_len = PATH_MAX, 3841 },
3842 { } /* terminate */
3635}; 3843};
3636 3844
3637static int cgroup_populate_dir(struct cgroup *cgrp) 3845static int cgroup_populate_dir(struct cgroup *cgrp)
@@ -3639,22 +3847,18 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
3639 int err; 3847 int err;
3640 struct cgroup_subsys *ss; 3848 struct cgroup_subsys *ss;
3641 3849
3642 /* First clear out any existing files */ 3850 err = cgroup_addrm_files(cgrp, NULL, files, true);
3643 cgroup_clear_directory(cgrp->dentry);
3644
3645 err = cgroup_add_files(cgrp, NULL, files, ARRAY_SIZE(files));
3646 if (err < 0) 3851 if (err < 0)
3647 return err; 3852 return err;
3648 3853
3649 if (cgrp == cgrp->top_cgroup) { 3854 /* process cftsets of each subsystem */
3650 if ((err = cgroup_add_file(cgrp, NULL, &cft_release_agent)) < 0)
3651 return err;
3652 }
3653
3654 for_each_subsys(cgrp->root, ss) { 3855 for_each_subsys(cgrp->root, ss) {
3655 if (ss->populate && (err = ss->populate(ss, cgrp)) < 0) 3856 struct cftype_set *set;
3656 return err; 3857
3858 list_for_each_entry(set, &ss->cftsets, node)
3859 cgroup_addrm_files(cgrp, ss, set->cfts, true);
3657 } 3860 }
3861
3658 /* This cgroup is ready now */ 3862 /* This cgroup is ready now */
3659 for_each_subsys(cgrp->root, ss) { 3863 for_each_subsys(cgrp->root, ss) {
3660 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 3864 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
@@ -3670,6 +3874,14 @@ static int cgroup_populate_dir(struct cgroup *cgrp)
3670 return 0; 3874 return 0;
3671} 3875}
3672 3876
3877static void css_dput_fn(struct work_struct *work)
3878{
3879 struct cgroup_subsys_state *css =
3880 container_of(work, struct cgroup_subsys_state, dput_work);
3881
3882 dput(css->cgroup->dentry);
3883}
3884
3673static void init_cgroup_css(struct cgroup_subsys_state *css, 3885static void init_cgroup_css(struct cgroup_subsys_state *css,
3674 struct cgroup_subsys *ss, 3886 struct cgroup_subsys *ss,
3675 struct cgroup *cgrp) 3887 struct cgroup *cgrp)
@@ -3682,6 +3894,16 @@ static void init_cgroup_css(struct cgroup_subsys_state *css,
3682 set_bit(CSS_ROOT, &css->flags); 3894 set_bit(CSS_ROOT, &css->flags);
3683 BUG_ON(cgrp->subsys[ss->subsys_id]); 3895 BUG_ON(cgrp->subsys[ss->subsys_id]);
3684 cgrp->subsys[ss->subsys_id] = css; 3896 cgrp->subsys[ss->subsys_id] = css;
3897
3898 /*
3899 * If !clear_css_refs, css holds an extra ref to @cgrp->dentry
3900 * which is put on the last css_put(). dput() requires process
3901 * context, which css_put() may be called without. @css->dput_work
3902 * will be used to invoke dput() asynchronously from css_put().
3903 */
3904 INIT_WORK(&css->dput_work, css_dput_fn);
3905 if (ss->__DEPRECATED_clear_css_refs)
3906 set_bit(CSS_CLEAR_CSS_REFS, &css->flags);
3685} 3907}
3686 3908
3687static void cgroup_lock_hierarchy(struct cgroupfs_root *root) 3909static void cgroup_lock_hierarchy(struct cgroupfs_root *root)
@@ -3784,9 +4006,16 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
3784 if (err < 0) 4006 if (err < 0)
3785 goto err_remove; 4007 goto err_remove;
3786 4008
4009 /* If !clear_css_refs, each css holds a ref to the cgroup's dentry */
4010 for_each_subsys(root, ss)
4011 if (!ss->__DEPRECATED_clear_css_refs)
4012 dget(dentry);
4013
3787 /* The cgroup directory was pre-locked for us */ 4014 /* The cgroup directory was pre-locked for us */
3788 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex)); 4015 BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
3789 4016
4017 list_add_tail(&cgrp->allcg_node, &root->allcg_list);
4018
3790 err = cgroup_populate_dir(cgrp); 4019 err = cgroup_populate_dir(cgrp);
3791 /* If err < 0, we have a half-filled directory - oh well ;) */ 4020 /* If err < 0, we have a half-filled directory - oh well ;) */
3792 4021
@@ -3826,18 +4055,19 @@ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3826 return cgroup_create(c_parent, dentry, mode | S_IFDIR); 4055 return cgroup_create(c_parent, dentry, mode | S_IFDIR);
3827} 4056}
3828 4057
4058/*
4059 * Check the reference count on each subsystem. Since we already
4060 * established that there are no tasks in the cgroup, if the css refcount
4061 * is also 1, then there should be no outstanding references, so the
4062 * subsystem is safe to destroy. We scan across all subsystems rather than
4063 * using the per-hierarchy linked list of mounted subsystems since we can
4064 * be called via check_for_release() with no synchronization other than
4065 * RCU, and the subsystem linked list isn't RCU-safe.
4066 */
3829static int cgroup_has_css_refs(struct cgroup *cgrp) 4067static int cgroup_has_css_refs(struct cgroup *cgrp)
3830{ 4068{
3831 /* Check the reference count on each subsystem. Since we
3832 * already established that there are no tasks in the
3833 * cgroup, if the css refcount is also 1, then there should
3834 * be no outstanding references, so the subsystem is safe to
3835 * destroy. We scan across all subsystems rather than using
3836 * the per-hierarchy linked list of mounted subsystems since
3837 * we can be called via check_for_release() with no
3838 * synchronization other than RCU, and the subsystem linked
3839 * list isn't RCU-safe */
3840 int i; 4069 int i;
4070
3841 /* 4071 /*
3842 * We won't need to lock the subsys array, because the subsystems 4072 * We won't need to lock the subsys array, because the subsystems
3843 * we're concerned about aren't going anywhere since our cgroup root 4073 * we're concerned about aren't going anywhere since our cgroup root
@@ -3846,17 +4076,21 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3846 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { 4076 for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
3847 struct cgroup_subsys *ss = subsys[i]; 4077 struct cgroup_subsys *ss = subsys[i];
3848 struct cgroup_subsys_state *css; 4078 struct cgroup_subsys_state *css;
4079
3849 /* Skip subsystems not present or not in this hierarchy */ 4080 /* Skip subsystems not present or not in this hierarchy */
3850 if (ss == NULL || ss->root != cgrp->root) 4081 if (ss == NULL || ss->root != cgrp->root)
3851 continue; 4082 continue;
4083
3852 css = cgrp->subsys[ss->subsys_id]; 4084 css = cgrp->subsys[ss->subsys_id];
3853 /* When called from check_for_release() it's possible 4085 /*
4086 * When called from check_for_release() it's possible
3854 * that by this point the cgroup has been removed 4087 * that by this point the cgroup has been removed
3855 * and the css deleted. But a false-positive doesn't 4088 * and the css deleted. But a false-positive doesn't
3856 * matter, since it can only happen if the cgroup 4089 * matter, since it can only happen if the cgroup
3857 * has been deleted and hence no longer needs the 4090 * has been deleted and hence no longer needs the
3858 * release agent to be called anyway. */ 4091 * release agent to be called anyway.
3859 if (css && (atomic_read(&css->refcnt) > 1)) 4092 */
4093 if (css && css_refcnt(css) > 1)
3860 return 1; 4094 return 1;
3861 } 4095 }
3862 return 0; 4096 return 0;
@@ -3866,51 +4100,63 @@ static int cgroup_has_css_refs(struct cgroup *cgrp)
3866 * Atomically mark all (or else none) of the cgroup's CSS objects as 4100 * Atomically mark all (or else none) of the cgroup's CSS objects as
3867 * CSS_REMOVED. Return true on success, or false if the cgroup has 4101 * CSS_REMOVED. Return true on success, or false if the cgroup has
3868 * busy subsystems. Call with cgroup_mutex held 4102 * busy subsystems. Call with cgroup_mutex held
4103 *
4104 * Depending on whether a subsys has __DEPRECATED_clear_css_refs set or
4105 * not, cgroup removal behaves differently.
4106 *
4107 * If clear is set, css refcnt for the subsystem should be zero before
4108 * cgroup removal can be committed. This is implemented by
4109 * CGRP_WAIT_ON_RMDIR and retry logic around ->pre_destroy(), which may be
4110 * called multiple times until all css refcnts reach zero and is allowed to
4111 * veto removal on any invocation. This behavior is deprecated and will be
4112 * removed as soon as the existing user (memcg) is updated.
4113 *
4114 * If clear is not set, each css holds an extra reference to the cgroup's
4115 * dentry and cgroup removal proceeds regardless of css refs.
4116 * ->pre_destroy() will be called at least once and is not allowed to fail.
4117 * On the last put of each css, whenever that may be, the extra dentry ref
4118 * is put so that dentry destruction happens only after all css's are
4119 * released.
3869 */ 4120 */
3870
3871static int cgroup_clear_css_refs(struct cgroup *cgrp) 4121static int cgroup_clear_css_refs(struct cgroup *cgrp)
3872{ 4122{
3873 struct cgroup_subsys *ss; 4123 struct cgroup_subsys *ss;
3874 unsigned long flags; 4124 unsigned long flags;
3875 bool failed = false; 4125 bool failed = false;
4126
3876 local_irq_save(flags); 4127 local_irq_save(flags);
4128
4129 /*
4130 * Block new css_tryget() by deactivating refcnt. If all refcnts
4131 * for subsystems w/ clear_css_refs set were 1 at the moment of
4132 * deactivation, we succeeded.
4133 */
3877 for_each_subsys(cgrp->root, ss) { 4134 for_each_subsys(cgrp->root, ss) {
3878 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4135 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3879 int refcnt; 4136
3880 while (1) { 4137 WARN_ON(atomic_read(&css->refcnt) < 0);
3881 /* We can only remove a CSS with a refcnt==1 */ 4138 atomic_add(CSS_DEACT_BIAS, &css->refcnt);
3882 refcnt = atomic_read(&css->refcnt); 4139
3883 if (refcnt > 1) { 4140 if (ss->__DEPRECATED_clear_css_refs)
3884 failed = true; 4141 failed |= css_refcnt(css) != 1;
3885 goto done;
3886 }
3887 BUG_ON(!refcnt);
3888 /*
3889 * Drop the refcnt to 0 while we check other
3890 * subsystems. This will cause any racing
3891 * css_tryget() to spin until we set the
3892 * CSS_REMOVED bits or abort
3893 */
3894 if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt)
3895 break;
3896 cpu_relax();
3897 }
3898 } 4142 }
3899 done: 4143
4144 /*
4145 * If succeeded, set REMOVED and put all the base refs; otherwise,
4146 * restore refcnts to positive values. Either way, all in-progress
4147 * css_tryget() will be released.
4148 */
3900 for_each_subsys(cgrp->root, ss) { 4149 for_each_subsys(cgrp->root, ss) {
3901 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; 4150 struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id];
3902 if (failed) { 4151
3903 /* 4152 if (!failed) {
3904 * Restore old refcnt if we previously managed
3905 * to clear it from 1 to 0
3906 */
3907 if (!atomic_read(&css->refcnt))
3908 atomic_set(&css->refcnt, 1);
3909 } else {
3910 /* Commit the fact that the CSS is removed */
3911 set_bit(CSS_REMOVED, &css->flags); 4153 set_bit(CSS_REMOVED, &css->flags);
4154 css_put(css);
4155 } else {
4156 atomic_sub(CSS_DEACT_BIAS, &css->refcnt);
3912 } 4157 }
3913 } 4158 }
4159
3914 local_irq_restore(flags); 4160 local_irq_restore(flags);
3915 return !failed; 4161 return !failed;
3916} 4162}
@@ -3995,6 +4241,8 @@ again:
3995 list_del_init(&cgrp->sibling); 4241 list_del_init(&cgrp->sibling);
3996 cgroup_unlock_hierarchy(cgrp->root); 4242 cgroup_unlock_hierarchy(cgrp->root);
3997 4243
4244 list_del_init(&cgrp->allcg_node);
4245
3998 d = dget(cgrp->dentry); 4246 d = dget(cgrp->dentry);
3999 4247
4000 cgroup_d_remove_dir(d); 4248 cgroup_d_remove_dir(d);
@@ -4021,12 +4269,29 @@ again:
4021 return 0; 4269 return 0;
4022} 4270}
4023 4271
4272static void __init_or_module cgroup_init_cftsets(struct cgroup_subsys *ss)
4273{
4274 INIT_LIST_HEAD(&ss->cftsets);
4275
4276 /*
4277 * base_cftset is embedded in subsys itself, no need to worry about
4278 * deregistration.
4279 */
4280 if (ss->base_cftypes) {
4281 ss->base_cftset.cfts = ss->base_cftypes;
4282 list_add_tail(&ss->base_cftset.node, &ss->cftsets);
4283 }
4284}
4285
4024static void __init cgroup_init_subsys(struct cgroup_subsys *ss) 4286static void __init cgroup_init_subsys(struct cgroup_subsys *ss)
4025{ 4287{
4026 struct cgroup_subsys_state *css; 4288 struct cgroup_subsys_state *css;
4027 4289
4028 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name); 4290 printk(KERN_INFO "Initializing cgroup subsys %s\n", ss->name);
4029 4291
4292 /* init base cftset */
4293 cgroup_init_cftsets(ss);
4294
4030 /* Create the top cgroup state for this subsystem */ 4295 /* Create the top cgroup state for this subsystem */
4031 list_add(&ss->sibling, &rootnode.subsys_list); 4296 list_add(&ss->sibling, &rootnode.subsys_list);
4032 ss->root = &rootnode; 4297 ss->root = &rootnode;
@@ -4096,6 +4361,9 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
4096 return 0; 4361 return 0;
4097 } 4362 }
4098 4363
4364 /* init base cftset */
4365 cgroup_init_cftsets(ss);
4366
4099 /* 4367 /*
4100 * need to register a subsys id before anything else - for example, 4368 * need to register a subsys id before anything else - for example,
4101 * init_cgroup_css needs it. 4369 * init_cgroup_css needs it.
@@ -4685,21 +4953,41 @@ static void check_for_release(struct cgroup *cgrp)
4685} 4953}
4686 4954
4687/* Caller must verify that the css is not for root cgroup */ 4955/* Caller must verify that the css is not for root cgroup */
4688void __css_put(struct cgroup_subsys_state *css, int count) 4956bool __css_tryget(struct cgroup_subsys_state *css)
4957{
4958 do {
4959 int v = css_refcnt(css);
4960
4961 if (atomic_cmpxchg(&css->refcnt, v, v + 1) == v)
4962 return true;
4963 cpu_relax();
4964 } while (!test_bit(CSS_REMOVED, &css->flags));
4965
4966 return false;
4967}
4968EXPORT_SYMBOL_GPL(__css_tryget);
4969
4970/* Caller must verify that the css is not for root cgroup */
4971void __css_put(struct cgroup_subsys_state *css)
4689{ 4972{
4690 struct cgroup *cgrp = css->cgroup; 4973 struct cgroup *cgrp = css->cgroup;
4691 int val; 4974
4692 rcu_read_lock(); 4975 rcu_read_lock();
4693 val = atomic_sub_return(count, &css->refcnt); 4976 atomic_dec(&css->refcnt);
4694 if (val == 1) { 4977 switch (css_refcnt(css)) {
4978 case 1:
4695 if (notify_on_release(cgrp)) { 4979 if (notify_on_release(cgrp)) {
4696 set_bit(CGRP_RELEASABLE, &cgrp->flags); 4980 set_bit(CGRP_RELEASABLE, &cgrp->flags);
4697 check_for_release(cgrp); 4981 check_for_release(cgrp);
4698 } 4982 }
4699 cgroup_wakeup_rmdir_waiter(cgrp); 4983 cgroup_wakeup_rmdir_waiter(cgrp);
4984 break;
4985 case 0:
4986 if (!test_bit(CSS_CLEAR_CSS_REFS, &css->flags))
4987 schedule_work(&css->dput_work);
4988 break;
4700 } 4989 }
4701 rcu_read_unlock(); 4990 rcu_read_unlock();
4702 WARN_ON_ONCE(val < 1);
4703} 4991}
4704EXPORT_SYMBOL_GPL(__css_put); 4992EXPORT_SYMBOL_GPL(__css_put);
4705 4993
@@ -4818,7 +5106,7 @@ unsigned short css_id(struct cgroup_subsys_state *css)
4818 * on this or this is under rcu_read_lock(). Once css->id is allocated, 5106 * on this or this is under rcu_read_lock(). Once css->id is allocated,
4819 * it's unchanged until freed. 5107 * it's unchanged until freed.
4820 */ 5108 */
4821 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5109 cssid = rcu_dereference_check(css->id, css_refcnt(css));
4822 5110
4823 if (cssid) 5111 if (cssid)
4824 return cssid->id; 5112 return cssid->id;
@@ -4830,7 +5118,7 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
4830{ 5118{
4831 struct css_id *cssid; 5119 struct css_id *cssid;
4832 5120
4833 cssid = rcu_dereference_check(css->id, atomic_read(&css->refcnt)); 5121 cssid = rcu_dereference_check(css->id, css_refcnt(css));
4834 5122
4835 if (cssid) 5123 if (cssid)
4836 return cssid->depth; 5124 return cssid->depth;
@@ -5211,19 +5499,15 @@ static struct cftype debug_files[] = {
5211 .name = "releasable", 5499 .name = "releasable",
5212 .read_u64 = releasable_read, 5500 .read_u64 = releasable_read,
5213 }, 5501 },
5214};
5215 5502
5216static int debug_populate(struct cgroup_subsys *ss, struct cgroup *cont) 5503 { } /* terminate */
5217{ 5504};
5218 return cgroup_add_files(cont, ss, debug_files,
5219 ARRAY_SIZE(debug_files));
5220}
5221 5505
5222struct cgroup_subsys debug_subsys = { 5506struct cgroup_subsys debug_subsys = {
5223 .name = "debug", 5507 .name = "debug",
5224 .create = debug_create, 5508 .create = debug_create,
5225 .destroy = debug_destroy, 5509 .destroy = debug_destroy,
5226 .populate = debug_populate,
5227 .subsys_id = debug_subsys_id, 5510 .subsys_id = debug_subsys_id,
5511 .base_cftypes = debug_files,
5228}; 5512};
5229#endif /* CONFIG_CGROUP_DEBUG */ 5513#endif /* CONFIG_CGROUP_DEBUG */
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c
index f86e93920b62..3649fc6b3eaa 100644
--- a/kernel/cgroup_freezer.c
+++ b/kernel/cgroup_freezer.c
@@ -358,24 +358,19 @@ static int freezer_write(struct cgroup *cgroup,
358static struct cftype files[] = { 358static struct cftype files[] = {
359 { 359 {
360 .name = "state", 360 .name = "state",
361 .flags = CFTYPE_NOT_ON_ROOT,
361 .read_seq_string = freezer_read, 362 .read_seq_string = freezer_read,
362 .write_string = freezer_write, 363 .write_string = freezer_write,
363 }, 364 },
365 { } /* terminate */
364}; 366};
365 367
366static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup)
367{
368 if (!cgroup->parent)
369 return 0;
370 return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files));
371}
372
373struct cgroup_subsys freezer_subsys = { 368struct cgroup_subsys freezer_subsys = {
374 .name = "freezer", 369 .name = "freezer",
375 .create = freezer_create, 370 .create = freezer_create,
376 .destroy = freezer_destroy, 371 .destroy = freezer_destroy,
377 .populate = freezer_populate,
378 .subsys_id = freezer_subsys_id, 372 .subsys_id = freezer_subsys_id,
379 .can_attach = freezer_can_attach, 373 .can_attach = freezer_can_attach,
380 .fork = freezer_fork, 374 .fork = freezer_fork,
375 .base_cftypes = files,
381}; 376};
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 14f7070b4ba2..8c8bd652dd12 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1765,28 +1765,17 @@ static struct cftype files[] = {
1765 .write_u64 = cpuset_write_u64, 1765 .write_u64 = cpuset_write_u64,
1766 .private = FILE_SPREAD_SLAB, 1766 .private = FILE_SPREAD_SLAB,
1767 }, 1767 },
1768};
1769
1770static struct cftype cft_memory_pressure_enabled = {
1771 .name = "memory_pressure_enabled",
1772 .read_u64 = cpuset_read_u64,
1773 .write_u64 = cpuset_write_u64,
1774 .private = FILE_MEMORY_PRESSURE_ENABLED,
1775};
1776 1768
1777static int cpuset_populate(struct cgroup_subsys *ss, struct cgroup *cont) 1769 {
1778{ 1770 .name = "memory_pressure_enabled",
1779 int err; 1771 .flags = CFTYPE_ONLY_ON_ROOT,
1772 .read_u64 = cpuset_read_u64,
1773 .write_u64 = cpuset_write_u64,
1774 .private = FILE_MEMORY_PRESSURE_ENABLED,
1775 },
1780 1776
1781 err = cgroup_add_files(cont, ss, files, ARRAY_SIZE(files)); 1777 { } /* terminate */
1782 if (err) 1778};
1783 return err;
1784 /* memory_pressure_enabled is in root cpuset only */
1785 if (!cont->parent)
1786 err = cgroup_add_file(cont, ss,
1787 &cft_memory_pressure_enabled);
1788 return err;
1789}
1790 1779
1791/* 1780/*
1792 * post_clone() is called during cgroup_create() when the 1781 * post_clone() is called during cgroup_create() when the
@@ -1887,9 +1876,9 @@ struct cgroup_subsys cpuset_subsys = {
1887 .destroy = cpuset_destroy, 1876 .destroy = cpuset_destroy,
1888 .can_attach = cpuset_can_attach, 1877 .can_attach = cpuset_can_attach,
1889 .attach = cpuset_attach, 1878 .attach = cpuset_attach,
1890 .populate = cpuset_populate,
1891 .post_clone = cpuset_post_clone, 1879 .post_clone = cpuset_post_clone,
1892 .subsys_id = cpuset_subsys_id, 1880 .subsys_id = cpuset_subsys_id,
1881 .base_cftypes = files,
1893 .early_init = 1, 1882 .early_init = 1,
1894}; 1883};
1895 1884
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index d508363858b3..bebe2b170d49 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -22,75 +22,70 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
22 counter->parent = parent; 22 counter->parent = parent;
23} 23}
24 24
25int res_counter_charge_locked(struct res_counter *counter, unsigned long val) 25int res_counter_charge_locked(struct res_counter *counter, unsigned long val,
26 bool force)
26{ 27{
28 int ret = 0;
29
27 if (counter->usage + val > counter->limit) { 30 if (counter->usage + val > counter->limit) {
28 counter->failcnt++; 31 counter->failcnt++;
29 return -ENOMEM; 32 ret = -ENOMEM;
33 if (!force)
34 return ret;
30 } 35 }
31 36
32 counter->usage += val; 37 counter->usage += val;
33 if (counter->usage > counter->max_usage) 38 if (counter->usage > counter->max_usage)
34 counter->max_usage = counter->usage; 39 counter->max_usage = counter->usage;
35 return 0; 40 return ret;
36} 41}
37 42
38int res_counter_charge(struct res_counter *counter, unsigned long val, 43static int __res_counter_charge(struct res_counter *counter, unsigned long val,
39 struct res_counter **limit_fail_at) 44 struct res_counter **limit_fail_at, bool force)
40{ 45{
41 int ret; 46 int ret, r;
42 unsigned long flags; 47 unsigned long flags;
43 struct res_counter *c, *u; 48 struct res_counter *c, *u;
44 49
50 r = ret = 0;
45 *limit_fail_at = NULL; 51 *limit_fail_at = NULL;
46 local_irq_save(flags); 52 local_irq_save(flags);
47 for (c = counter; c != NULL; c = c->parent) { 53 for (c = counter; c != NULL; c = c->parent) {
48 spin_lock(&c->lock); 54 spin_lock(&c->lock);
49 ret = res_counter_charge_locked(c, val); 55 r = res_counter_charge_locked(c, val, force);
50 spin_unlock(&c->lock); 56 spin_unlock(&c->lock);
51 if (ret < 0) { 57 if (r < 0 && !ret) {
58 ret = r;
52 *limit_fail_at = c; 59 *limit_fail_at = c;
53 goto undo; 60 if (!force)
61 break;
54 } 62 }
55 } 63 }
56 ret = 0; 64
57 goto done; 65 if (ret < 0 && !force) {
58undo: 66 for (u = counter; u != c; u = u->parent) {
59 for (u = counter; u != c; u = u->parent) { 67 spin_lock(&u->lock);
60 spin_lock(&u->lock); 68 res_counter_uncharge_locked(u, val);
61 res_counter_uncharge_locked(u, val); 69 spin_unlock(&u->lock);
62 spin_unlock(&u->lock); 70 }
63 } 71 }
64done:
65 local_irq_restore(flags); 72 local_irq_restore(flags);
73
66 return ret; 74 return ret;
67} 75}
68 76
77int res_counter_charge(struct res_counter *counter, unsigned long val,
78 struct res_counter **limit_fail_at)
79{
80 return __res_counter_charge(counter, val, limit_fail_at, false);
81}
82
69int res_counter_charge_nofail(struct res_counter *counter, unsigned long val, 83int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
70 struct res_counter **limit_fail_at) 84 struct res_counter **limit_fail_at)
71{ 85{
72 int ret, r; 86 return __res_counter_charge(counter, val, limit_fail_at, true);
73 unsigned long flags;
74 struct res_counter *c;
75
76 r = ret = 0;
77 *limit_fail_at = NULL;
78 local_irq_save(flags);
79 for (c = counter; c != NULL; c = c->parent) {
80 spin_lock(&c->lock);
81 r = res_counter_charge_locked(c, val);
82 if (r)
83 c->usage += val;
84 spin_unlock(&c->lock);
85 if (r < 0 && ret == 0) {
86 *limit_fail_at = c;
87 ret = r;
88 }
89 }
90 local_irq_restore(flags);
91
92 return ret;
93} 87}
88
94void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) 89void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val)
95{ 90{
96 if (WARN_ON(counter->usage < val)) 91 if (WARN_ON(counter->usage < val))
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ea8a4769fea5..03667c3fdb33 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7985,13 +7985,9 @@ static struct cftype cpu_files[] = {
7985 .write_u64 = cpu_rt_period_write_uint, 7985 .write_u64 = cpu_rt_period_write_uint,
7986 }, 7986 },
7987#endif 7987#endif
7988 { } /* terminate */
7988}; 7989};
7989 7990
7990static int cpu_cgroup_populate(struct cgroup_subsys *ss, struct cgroup *cont)
7991{
7992 return cgroup_add_files(cont, ss, cpu_files, ARRAY_SIZE(cpu_files));
7993}
7994
7995struct cgroup_subsys cpu_cgroup_subsys = { 7991struct cgroup_subsys cpu_cgroup_subsys = {
7996 .name = "cpu", 7992 .name = "cpu",
7997 .create = cpu_cgroup_create, 7993 .create = cpu_cgroup_create,
@@ -7999,8 +7995,8 @@ struct cgroup_subsys cpu_cgroup_subsys = {
7999 .can_attach = cpu_cgroup_can_attach, 7995 .can_attach = cpu_cgroup_can_attach,
8000 .attach = cpu_cgroup_attach, 7996 .attach = cpu_cgroup_attach,
8001 .exit = cpu_cgroup_exit, 7997 .exit = cpu_cgroup_exit,
8002 .populate = cpu_cgroup_populate,
8003 .subsys_id = cpu_cgroup_subsys_id, 7998 .subsys_id = cpu_cgroup_subsys_id,
7999 .base_cftypes = cpu_files,
8004 .early_init = 1, 8000 .early_init = 1,
8005}; 8001};
8006 8002
@@ -8185,13 +8181,9 @@ static struct cftype files[] = {
8185 .name = "stat", 8181 .name = "stat",
8186 .read_map = cpuacct_stats_show, 8182 .read_map = cpuacct_stats_show,
8187 }, 8183 },
8184 { } /* terminate */
8188}; 8185};
8189 8186
8190static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
8191{
8192 return cgroup_add_files(cgrp, ss, files, ARRAY_SIZE(files));
8193}
8194
8195/* 8187/*
8196 * charge this task's execution time to its accounting group. 8188 * charge this task's execution time to its accounting group.
8197 * 8189 *
@@ -8223,7 +8215,7 @@ struct cgroup_subsys cpuacct_subsys = {
8223 .name = "cpuacct", 8215 .name = "cpuacct",
8224 .create = cpuacct_create, 8216 .create = cpuacct_create,
8225 .destroy = cpuacct_destroy, 8217 .destroy = cpuacct_destroy,
8226 .populate = cpuacct_populate,
8227 .subsys_id = cpuacct_subsys_id, 8218 .subsys_id = cpuacct_subsys_id,
8219 .base_cftypes = files,
8228}; 8220};
8229#endif /* CONFIG_CGROUP_CPUACCT */ 8221#endif /* CONFIG_CGROUP_CPUACCT */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7685d4a0b3ce..f342778a0c0a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3873,14 +3873,21 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
3873 return val << PAGE_SHIFT; 3873 return val << PAGE_SHIFT;
3874} 3874}
3875 3875
3876static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft) 3876static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
3877 struct file *file, char __user *buf,
3878 size_t nbytes, loff_t *ppos)
3877{ 3879{
3878 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 3880 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3881 char str[64];
3879 u64 val; 3882 u64 val;
3880 int type, name; 3883 int type, name, len;
3881 3884
3882 type = MEMFILE_TYPE(cft->private); 3885 type = MEMFILE_TYPE(cft->private);
3883 name = MEMFILE_ATTR(cft->private); 3886 name = MEMFILE_ATTR(cft->private);
3887
3888 if (!do_swap_account && type == _MEMSWAP)
3889 return -EOPNOTSUPP;
3890
3884 switch (type) { 3891 switch (type) {
3885 case _MEM: 3892 case _MEM:
3886 if (name == RES_USAGE) 3893 if (name == RES_USAGE)
@@ -3897,7 +3904,9 @@ static u64 mem_cgroup_read(struct cgroup *cont, struct cftype *cft)
3897 default: 3904 default:
3898 BUG(); 3905 BUG();
3899 } 3906 }
3900 return val; 3907
3908 len = scnprintf(str, sizeof(str), "%llu\n", (unsigned long long)val);
3909 return simple_read_from_buffer(buf, nbytes, ppos, str, len);
3901} 3910}
3902/* 3911/*
3903 * The user of this function is... 3912 * The user of this function is...
@@ -3913,6 +3922,10 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
3913 3922
3914 type = MEMFILE_TYPE(cft->private); 3923 type = MEMFILE_TYPE(cft->private);
3915 name = MEMFILE_ATTR(cft->private); 3924 name = MEMFILE_ATTR(cft->private);
3925
3926 if (!do_swap_account && type == _MEMSWAP)
3927 return -EOPNOTSUPP;
3928
3916 switch (name) { 3929 switch (name) {
3917 case RES_LIMIT: 3930 case RES_LIMIT:
3918 if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */ 3931 if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
@@ -3978,12 +3991,15 @@ out:
3978 3991
3979static int mem_cgroup_reset(struct cgroup *cont, unsigned int event) 3992static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
3980{ 3993{
3981 struct mem_cgroup *memcg; 3994 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
3982 int type, name; 3995 int type, name;
3983 3996
3984 memcg = mem_cgroup_from_cont(cont);
3985 type = MEMFILE_TYPE(event); 3997 type = MEMFILE_TYPE(event);
3986 name = MEMFILE_ATTR(event); 3998 name = MEMFILE_ATTR(event);
3999
4000 if (!do_swap_account && type == _MEMSWAP)
4001 return -EOPNOTSUPP;
4002
3987 switch (name) { 4003 switch (name) {
3988 case RES_MAX_USAGE: 4004 case RES_MAX_USAGE:
3989 if (type == _MEM) 4005 if (type == _MEM)
@@ -4624,29 +4640,22 @@ static int mem_control_numa_stat_open(struct inode *unused, struct file *file)
4624#endif /* CONFIG_NUMA */ 4640#endif /* CONFIG_NUMA */
4625 4641
4626#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 4642#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
4627static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4643static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4628{ 4644{
4629 /* 4645 return mem_cgroup_sockets_init(memcg, ss);
4630 * Part of this would be better living in a separate allocation
4631 * function, leaving us with just the cgroup tree population work.
4632 * We, however, depend on state such as network's proto_list that
4633 * is only initialized after cgroup creation. I found the less
4634 * cumbersome way to deal with it to defer it all to populate time
4635 */
4636 return mem_cgroup_sockets_init(cont, ss);
4637}; 4646};
4638 4647
4639static void kmem_cgroup_destroy(struct cgroup *cont) 4648static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
4640{ 4649{
4641 mem_cgroup_sockets_destroy(cont); 4650 mem_cgroup_sockets_destroy(memcg);
4642} 4651}
4643#else 4652#else
4644static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4653static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4645{ 4654{
4646 return 0; 4655 return 0;
4647} 4656}
4648 4657
4649static void kmem_cgroup_destroy(struct cgroup *cont) 4658static void kmem_cgroup_destroy(struct mem_cgroup *memcg)
4650{ 4659{
4651} 4660}
4652#endif 4661#endif
@@ -4655,7 +4664,7 @@ static struct cftype mem_cgroup_files[] = {
4655 { 4664 {
4656 .name = "usage_in_bytes", 4665 .name = "usage_in_bytes",
4657 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE), 4666 .private = MEMFILE_PRIVATE(_MEM, RES_USAGE),
4658 .read_u64 = mem_cgroup_read, 4667 .read = mem_cgroup_read,
4659 .register_event = mem_cgroup_usage_register_event, 4668 .register_event = mem_cgroup_usage_register_event,
4660 .unregister_event = mem_cgroup_usage_unregister_event, 4669 .unregister_event = mem_cgroup_usage_unregister_event,
4661 }, 4670 },
@@ -4663,25 +4672,25 @@ static struct cftype mem_cgroup_files[] = {
4663 .name = "max_usage_in_bytes", 4672 .name = "max_usage_in_bytes",
4664 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE), 4673 .private = MEMFILE_PRIVATE(_MEM, RES_MAX_USAGE),
4665 .trigger = mem_cgroup_reset, 4674 .trigger = mem_cgroup_reset,
4666 .read_u64 = mem_cgroup_read, 4675 .read = mem_cgroup_read,
4667 }, 4676 },
4668 { 4677 {
4669 .name = "limit_in_bytes", 4678 .name = "limit_in_bytes",
4670 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT), 4679 .private = MEMFILE_PRIVATE(_MEM, RES_LIMIT),
4671 .write_string = mem_cgroup_write, 4680 .write_string = mem_cgroup_write,
4672 .read_u64 = mem_cgroup_read, 4681 .read = mem_cgroup_read,
4673 }, 4682 },
4674 { 4683 {
4675 .name = "soft_limit_in_bytes", 4684 .name = "soft_limit_in_bytes",
4676 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT), 4685 .private = MEMFILE_PRIVATE(_MEM, RES_SOFT_LIMIT),
4677 .write_string = mem_cgroup_write, 4686 .write_string = mem_cgroup_write,
4678 .read_u64 = mem_cgroup_read, 4687 .read = mem_cgroup_read,
4679 }, 4688 },
4680 { 4689 {
4681 .name = "failcnt", 4690 .name = "failcnt",
4682 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT), 4691 .private = MEMFILE_PRIVATE(_MEM, RES_FAILCNT),
4683 .trigger = mem_cgroup_reset, 4692 .trigger = mem_cgroup_reset,
4684 .read_u64 = mem_cgroup_read, 4693 .read = mem_cgroup_read,
4685 }, 4694 },
4686 { 4695 {
4687 .name = "stat", 4696 .name = "stat",
@@ -4721,14 +4730,11 @@ static struct cftype mem_cgroup_files[] = {
4721 .mode = S_IRUGO, 4730 .mode = S_IRUGO,
4722 }, 4731 },
4723#endif 4732#endif
4724};
4725
4726#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4733#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
4727static struct cftype memsw_cgroup_files[] = {
4728 { 4734 {
4729 .name = "memsw.usage_in_bytes", 4735 .name = "memsw.usage_in_bytes",
4730 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), 4736 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE),
4731 .read_u64 = mem_cgroup_read, 4737 .read = mem_cgroup_read,
4732 .register_event = mem_cgroup_usage_register_event, 4738 .register_event = mem_cgroup_usage_register_event,
4733 .unregister_event = mem_cgroup_usage_unregister_event, 4739 .unregister_event = mem_cgroup_usage_unregister_event,
4734 }, 4740 },
@@ -4736,35 +4742,23 @@ static struct cftype memsw_cgroup_files[] = {
4736 .name = "memsw.max_usage_in_bytes", 4742 .name = "memsw.max_usage_in_bytes",
4737 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE), 4743 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_MAX_USAGE),
4738 .trigger = mem_cgroup_reset, 4744 .trigger = mem_cgroup_reset,
4739 .read_u64 = mem_cgroup_read, 4745 .read = mem_cgroup_read,
4740 }, 4746 },
4741 { 4747 {
4742 .name = "memsw.limit_in_bytes", 4748 .name = "memsw.limit_in_bytes",
4743 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT), 4749 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_LIMIT),
4744 .write_string = mem_cgroup_write, 4750 .write_string = mem_cgroup_write,
4745 .read_u64 = mem_cgroup_read, 4751 .read = mem_cgroup_read,
4746 }, 4752 },
4747 { 4753 {
4748 .name = "memsw.failcnt", 4754 .name = "memsw.failcnt",
4749 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT), 4755 .private = MEMFILE_PRIVATE(_MEMSWAP, RES_FAILCNT),
4750 .trigger = mem_cgroup_reset, 4756 .trigger = mem_cgroup_reset,
4751 .read_u64 = mem_cgroup_read, 4757 .read = mem_cgroup_read,
4752 }, 4758 },
4753};
4754
4755static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
4756{
4757 if (!do_swap_account)
4758 return 0;
4759 return cgroup_add_files(cont, ss, memsw_cgroup_files,
4760 ARRAY_SIZE(memsw_cgroup_files));
4761};
4762#else
4763static int register_memsw_files(struct cgroup *cont, struct cgroup_subsys *ss)
4764{
4765 return 0;
4766}
4767#endif 4759#endif
4760 { }, /* terminate */
4761};
4768 4762
4769static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) 4763static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
4770{ 4764{
@@ -5016,6 +5010,17 @@ mem_cgroup_create(struct cgroup *cont)
5016 memcg->move_charge_at_immigrate = 0; 5010 memcg->move_charge_at_immigrate = 0;
5017 mutex_init(&memcg->thresholds_lock); 5011 mutex_init(&memcg->thresholds_lock);
5018 spin_lock_init(&memcg->move_lock); 5012 spin_lock_init(&memcg->move_lock);
5013
5014 error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
5015 if (error) {
5016 /*
5017 * We call put now because our (and parent's) refcnts
5018 * are already in place. mem_cgroup_put() will internally
5019 * call __mem_cgroup_free, so return directly
5020 */
5021 mem_cgroup_put(memcg);
5022 return ERR_PTR(error);
5023 }
5019 return &memcg->css; 5024 return &memcg->css;
5020free_out: 5025free_out:
5021 __mem_cgroup_free(memcg); 5026 __mem_cgroup_free(memcg);
@@ -5033,28 +5038,11 @@ static void mem_cgroup_destroy(struct cgroup *cont)
5033{ 5038{
5034 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 5039 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5035 5040
5036 kmem_cgroup_destroy(cont); 5041 kmem_cgroup_destroy(memcg);
5037 5042
5038 mem_cgroup_put(memcg); 5043 mem_cgroup_put(memcg);
5039} 5044}
5040 5045
5041static int mem_cgroup_populate(struct cgroup_subsys *ss,
5042 struct cgroup *cont)
5043{
5044 int ret;
5045
5046 ret = cgroup_add_files(cont, ss, mem_cgroup_files,
5047 ARRAY_SIZE(mem_cgroup_files));
5048
5049 if (!ret)
5050 ret = register_memsw_files(cont, ss);
5051
5052 if (!ret)
5053 ret = register_kmem_files(cont, ss);
5054
5055 return ret;
5056}
5057
5058#ifdef CONFIG_MMU 5046#ifdef CONFIG_MMU
5059/* Handlers for move charge at task migration. */ 5047/* Handlers for move charge at task migration. */
5060#define PRECHARGE_COUNT_AT_ONCE 256 5048#define PRECHARGE_COUNT_AT_ONCE 256
@@ -5638,12 +5626,13 @@ struct cgroup_subsys mem_cgroup_subsys = {
5638 .create = mem_cgroup_create, 5626 .create = mem_cgroup_create,
5639 .pre_destroy = mem_cgroup_pre_destroy, 5627 .pre_destroy = mem_cgroup_pre_destroy,
5640 .destroy = mem_cgroup_destroy, 5628 .destroy = mem_cgroup_destroy,
5641 .populate = mem_cgroup_populate,
5642 .can_attach = mem_cgroup_can_attach, 5629 .can_attach = mem_cgroup_can_attach,
5643 .cancel_attach = mem_cgroup_cancel_attach, 5630 .cancel_attach = mem_cgroup_cancel_attach,
5644 .attach = mem_cgroup_move_task, 5631 .attach = mem_cgroup_move_task,
5632 .base_cftypes = mem_cgroup_files,
5645 .early_init = 0, 5633 .early_init = 0,
5646 .use_id = 1, 5634 .use_id = 1,
5635 .__DEPRECATED_clear_css_refs = true,
5647}; 5636};
5648 5637
5649#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 5638#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 09eda68b6763..5b8aa2fae48b 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -25,21 +25,6 @@
25#include <net/sock.h> 25#include <net/sock.h>
26#include <net/netprio_cgroup.h> 26#include <net/netprio_cgroup.h>
27 27
28static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp);
29static void cgrp_destroy(struct cgroup *cgrp);
30static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
31
32struct cgroup_subsys net_prio_subsys = {
33 .name = "net_prio",
34 .create = cgrp_create,
35 .destroy = cgrp_destroy,
36 .populate = cgrp_populate,
37#ifdef CONFIG_NETPRIO_CGROUP
38 .subsys_id = net_prio_subsys_id,
39#endif
40 .module = THIS_MODULE
41};
42
43#define PRIOIDX_SZ 128 28#define PRIOIDX_SZ 128
44 29
45static unsigned long prioidx_map[PRIOIDX_SZ]; 30static unsigned long prioidx_map[PRIOIDX_SZ];
@@ -259,12 +244,19 @@ static struct cftype ss_files[] = {
259 .read_map = read_priomap, 244 .read_map = read_priomap,
260 .write_string = write_priomap, 245 .write_string = write_priomap,
261 }, 246 },
247 { } /* terminate */
262}; 248};
263 249
264static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 250struct cgroup_subsys net_prio_subsys = {
265{ 251 .name = "net_prio",
266 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 252 .create = cgrp_create,
267} 253 .destroy = cgrp_destroy,
254#ifdef CONFIG_NETPRIO_CGROUP
255 .subsys_id = net_prio_subsys_id,
256#endif
257 .base_cftypes = ss_files,
258 .module = THIS_MODULE
259};
268 260
269static int netprio_device_event(struct notifier_block *unused, 261static int netprio_device_event(struct notifier_block *unused,
270 unsigned long event, void *ptr) 262 unsigned long event, void *ptr)
diff --git a/net/core/sock.c b/net/core/sock.c
index 5efcd6307fa7..f372d9bf4976 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -143,7 +143,7 @@ static DEFINE_MUTEX(proto_list_mutex);
143static LIST_HEAD(proto_list); 143static LIST_HEAD(proto_list);
144 144
145#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 145#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
146int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss) 146int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
147{ 147{
148 struct proto *proto; 148 struct proto *proto;
149 int ret = 0; 149 int ret = 0;
@@ -151,7 +151,7 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
151 mutex_lock(&proto_list_mutex); 151 mutex_lock(&proto_list_mutex);
152 list_for_each_entry(proto, &proto_list, node) { 152 list_for_each_entry(proto, &proto_list, node) {
153 if (proto->init_cgroup) { 153 if (proto->init_cgroup) {
154 ret = proto->init_cgroup(cgrp, ss); 154 ret = proto->init_cgroup(memcg, ss);
155 if (ret) 155 if (ret)
156 goto out; 156 goto out;
157 } 157 }
@@ -162,19 +162,19 @@ int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
162out: 162out:
163 list_for_each_entry_continue_reverse(proto, &proto_list, node) 163 list_for_each_entry_continue_reverse(proto, &proto_list, node)
164 if (proto->destroy_cgroup) 164 if (proto->destroy_cgroup)
165 proto->destroy_cgroup(cgrp); 165 proto->destroy_cgroup(memcg);
166 mutex_unlock(&proto_list_mutex); 166 mutex_unlock(&proto_list_mutex);
167 return ret; 167 return ret;
168} 168}
169 169
170void mem_cgroup_sockets_destroy(struct cgroup *cgrp) 170void mem_cgroup_sockets_destroy(struct mem_cgroup *memcg)
171{ 171{
172 struct proto *proto; 172 struct proto *proto;
173 173
174 mutex_lock(&proto_list_mutex); 174 mutex_lock(&proto_list_mutex);
175 list_for_each_entry_reverse(proto, &proto_list, node) 175 list_for_each_entry_reverse(proto, &proto_list, node)
176 if (proto->destroy_cgroup) 176 if (proto->destroy_cgroup)
177 proto->destroy_cgroup(cgrp); 177 proto->destroy_cgroup(memcg);
178 mutex_unlock(&proto_list_mutex); 178 mutex_unlock(&proto_list_mutex);
179} 179}
180#endif 180#endif
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index e795272fbe9e..151703791bb0 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -6,37 +6,6 @@
6#include <linux/memcontrol.h> 6#include <linux/memcontrol.h>
7#include <linux/module.h> 7#include <linux/module.h>
8 8
9static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft);
10static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft,
11 const char *buffer);
12static int tcp_cgroup_reset(struct cgroup *cont, unsigned int event);
13
14static struct cftype tcp_files[] = {
15 {
16 .name = "kmem.tcp.limit_in_bytes",
17 .write_string = tcp_cgroup_write,
18 .read_u64 = tcp_cgroup_read,
19 .private = RES_LIMIT,
20 },
21 {
22 .name = "kmem.tcp.usage_in_bytes",
23 .read_u64 = tcp_cgroup_read,
24 .private = RES_USAGE,
25 },
26 {
27 .name = "kmem.tcp.failcnt",
28 .private = RES_FAILCNT,
29 .trigger = tcp_cgroup_reset,
30 .read_u64 = tcp_cgroup_read,
31 },
32 {
33 .name = "kmem.tcp.max_usage_in_bytes",
34 .private = RES_MAX_USAGE,
35 .trigger = tcp_cgroup_reset,
36 .read_u64 = tcp_cgroup_read,
37 },
38};
39
40static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) 9static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
41{ 10{
42 return container_of(cg_proto, struct tcp_memcontrol, cg_proto); 11 return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
@@ -49,7 +18,7 @@ static void memcg_tcp_enter_memory_pressure(struct sock *sk)
49} 18}
50EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure); 19EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
51 20
52int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) 21int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
53{ 22{
54 /* 23 /*
55 * The root cgroup does not use res_counters, but rather, 24 * The root cgroup does not use res_counters, but rather,
@@ -59,13 +28,12 @@ int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
59 struct res_counter *res_parent = NULL; 28 struct res_counter *res_parent = NULL;
60 struct cg_proto *cg_proto, *parent_cg; 29 struct cg_proto *cg_proto, *parent_cg;
61 struct tcp_memcontrol *tcp; 30 struct tcp_memcontrol *tcp;
62 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
63 struct mem_cgroup *parent = parent_mem_cgroup(memcg); 31 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
64 struct net *net = current->nsproxy->net_ns; 32 struct net *net = current->nsproxy->net_ns;
65 33
66 cg_proto = tcp_prot.proto_cgroup(memcg); 34 cg_proto = tcp_prot.proto_cgroup(memcg);
67 if (!cg_proto) 35 if (!cg_proto)
68 goto create_files; 36 return 0;
69 37
70 tcp = tcp_from_cgproto(cg_proto); 38 tcp = tcp_from_cgproto(cg_proto);
71 39
@@ -88,15 +56,12 @@ int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
88 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; 56 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
89 cg_proto->memcg = memcg; 57 cg_proto->memcg = memcg;
90 58
91create_files: 59 return 0;
92 return cgroup_add_files(cgrp, ss, tcp_files,
93 ARRAY_SIZE(tcp_files));
94} 60}
95EXPORT_SYMBOL(tcp_init_cgroup); 61EXPORT_SYMBOL(tcp_init_cgroup);
96 62
97void tcp_destroy_cgroup(struct cgroup *cgrp) 63void tcp_destroy_cgroup(struct mem_cgroup *memcg)
98{ 64{
99 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
100 struct cg_proto *cg_proto; 65 struct cg_proto *cg_proto;
101 struct tcp_memcontrol *tcp; 66 struct tcp_memcontrol *tcp;
102 u64 val; 67 u64 val;
@@ -270,3 +235,37 @@ void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
270 235
271 tcp->tcp_prot_mem[idx] = val; 236 tcp->tcp_prot_mem[idx] = val;
272} 237}
238
239static struct cftype tcp_files[] = {
240 {
241 .name = "kmem.tcp.limit_in_bytes",
242 .write_string = tcp_cgroup_write,
243 .read_u64 = tcp_cgroup_read,
244 .private = RES_LIMIT,
245 },
246 {
247 .name = "kmem.tcp.usage_in_bytes",
248 .read_u64 = tcp_cgroup_read,
249 .private = RES_USAGE,
250 },
251 {
252 .name = "kmem.tcp.failcnt",
253 .private = RES_FAILCNT,
254 .trigger = tcp_cgroup_reset,
255 .read_u64 = tcp_cgroup_read,
256 },
257 {
258 .name = "kmem.tcp.max_usage_in_bytes",
259 .private = RES_MAX_USAGE,
260 .trigger = tcp_cgroup_reset,
261 .read_u64 = tcp_cgroup_read,
262 },
263 { } /* terminate */
264};
265
266static int __init tcp_memcontrol_init(void)
267{
268 WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
269 return 0;
270}
271__initcall(tcp_memcontrol_init);
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 1afaa284fcd7..7743ea8d1d38 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -22,22 +22,6 @@
22#include <net/sock.h> 22#include <net/sock.h>
23#include <net/cls_cgroup.h> 23#include <net/cls_cgroup.h>
24 24
25static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp);
26static void cgrp_destroy(struct cgroup *cgrp);
27static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
28
29struct cgroup_subsys net_cls_subsys = {
30 .name = "net_cls",
31 .create = cgrp_create,
32 .destroy = cgrp_destroy,
33 .populate = cgrp_populate,
34#ifdef CONFIG_NET_CLS_CGROUP
35 .subsys_id = net_cls_subsys_id,
36#endif
37 .module = THIS_MODULE,
38};
39
40
41static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp) 25static inline struct cgroup_cls_state *cgrp_cls_state(struct cgroup *cgrp)
42{ 26{
43 return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id), 27 return container_of(cgroup_subsys_state(cgrp, net_cls_subsys_id),
@@ -86,12 +70,19 @@ static struct cftype ss_files[] = {
86 .read_u64 = read_classid, 70 .read_u64 = read_classid,
87 .write_u64 = write_classid, 71 .write_u64 = write_classid,
88 }, 72 },
73 { } /* terminate */
89}; 74};
90 75
91static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) 76struct cgroup_subsys net_cls_subsys = {
92{ 77 .name = "net_cls",
93 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 78 .create = cgrp_create,
94} 79 .destroy = cgrp_destroy,
80#ifdef CONFIG_NET_CLS_CGROUP
81 .subsys_id = net_cls_subsys_id,
82#endif
83 .base_cftypes = ss_files,
84 .module = THIS_MODULE,
85};
95 86
96struct cls_cgroup_head { 87struct cls_cgroup_head {
97 u32 handle; 88 u32 handle;
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index c43a3323feea..442204cc22d9 100644
--- a/security/device_cgroup.c
+++ b/security/device_cgroup.c
@@ -447,22 +447,16 @@ static struct cftype dev_cgroup_files[] = {
447 .read_seq_string = devcgroup_seq_read, 447 .read_seq_string = devcgroup_seq_read,
448 .private = DEVCG_LIST, 448 .private = DEVCG_LIST,
449 }, 449 },
450 { } /* terminate */
450}; 451};
451 452
452static int devcgroup_populate(struct cgroup_subsys *ss,
453 struct cgroup *cgroup)
454{
455 return cgroup_add_files(cgroup, ss, dev_cgroup_files,
456 ARRAY_SIZE(dev_cgroup_files));
457}
458
459struct cgroup_subsys devices_subsys = { 453struct cgroup_subsys devices_subsys = {
460 .name = "devices", 454 .name = "devices",
461 .can_attach = devcgroup_can_attach, 455 .can_attach = devcgroup_can_attach,
462 .create = devcgroup_create, 456 .create = devcgroup_create,
463 .destroy = devcgroup_destroy, 457 .destroy = devcgroup_destroy,
464 .populate = devcgroup_populate,
465 .subsys_id = devices_subsys_id, 458 .subsys_id = devices_subsys_id,
459 .base_cftypes = dev_cgroup_files,
466}; 460};
467 461
468int __devcgroup_inode_permission(struct inode *inode, int mask) 462int __devcgroup_inode_permission(struct inode *inode, int mask)