aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 11:18:24 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 11:18:24 -0500
commitd206e09036d6201f90b2719484c8a59526c46125 (patch)
tree84b9057919bcb8cfd1cff47baa5fc74457e77d6d /net
parentfef3ff2eb777e76cfa5ae67591982d902c17139c (diff)
parent15ef4ffaa797034d5ff82844daf8f595d7c6d53c (diff)
Merge branch 'for-3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup changes from Tejun Heo: "A lot of activities on cgroup side. The big changes are focused on making cgroup hierarchy handling saner. - cgroup_rmdir() had peculiar semantics - it allowed cgroup destruction to be vetoed by individual controllers and tried to drain refcnt synchronously. The vetoing never worked properly and caused good deal of contortions in cgroup. memcg was the last reamining user. Michal Hocko removed the usage and cgroup_rmdir() path has been simplified significantly. This was done in a separate branch so that the memcg people can base further memcg changes on top. - The above allowed cleaning up cgroup lifecycle management and implementation of generic cgroup iterators which are used to improve hierarchy support. - cgroup_freezer updated to allow migration in and out of a frozen cgroup and handle hierarchy. If a cgroup is frozen, all descendant cgroups are frozen. - netcls_cgroup and netprio_cgroup updated to handle hierarchy properly. - Various fixes and cleanups. - Two merge commits. One to pull in memcg and rmdir cleanups (needed to build iterators). The other pulled in cgroup/for-3.7-fixes for device_cgroup fixes so that further device_cgroup patches can be stacked on top." Fixed up a trivial conflict in mm/memcontrol.c as per Tejun (due to commit bea8c150a7 ("memcg: fix hotplugged memory zone oops") in master touching code close to commit 2ef37d3fe4 ("memcg: Simplify mem_cgroup_force_empty_list error handling") in for-3.8) * 'for-3.8' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (65 commits) cgroup: update Documentation/cgroups/00-INDEX cgroup_rm_file: don't delete the uncreated files cgroup: remove subsystem files when remounting cgroup cgroup: use cgroup_addrm_files() in cgroup_clear_directory() cgroup: warn about broken hierarchies only after css_online cgroup: list_del_init() on removed events cgroup: fix lockdep warning for event_control cgroup: move list add after list head initilization netprio_cgroup: allow nesting and inherit config on cgroup creation netprio_cgroup: implement netprio[_set]_prio() helpers netprio_cgroup: use cgroup->id instead of cgroup_netprio_state->prioidx netprio_cgroup: reimplement priomap expansion netprio_cgroup: shorten variable names in extend_netdev_table() netprio_cgroup: simplify write_priomap() netcls_cgroup: move config inheritance to ->css_online() and remove .broken_hierarchy marking cgroup: remove obsolete guarantee from cgroup_task_migrate. cgroup: add cgroup->id cgroup, cpuset: remove cgroup_subsys->post_clone() cgroup: s/CGRP_CLONE_CHILDREN/CGRP_CPUSET_CLONE_CHILDREN/ cgroup: rename ->create/post_create/pre_destroy/destroy() to ->css_alloc/online/offline/free() ...
Diffstat (limited to 'net')
-rw-r--r--net/core/netprio_cgroup.c260
-rw-r--r--net/sched/cls_cgroup.c28
2 files changed, 125 insertions, 163 deletions
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 79285a36035f..bde53da9cd86 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -27,11 +27,7 @@
27 27
28#include <linux/fdtable.h> 28#include <linux/fdtable.h>
29 29
30#define PRIOIDX_SZ 128 30#define PRIOMAP_MIN_SZ 128
31
32static unsigned long prioidx_map[PRIOIDX_SZ];
33static DEFINE_SPINLOCK(prioidx_map_lock);
34static atomic_t max_prioidx = ATOMIC_INIT(0);
35 31
36static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) 32static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
37{ 33{
@@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr
39 struct cgroup_netprio_state, css); 35 struct cgroup_netprio_state, css);
40} 36}
41 37
42static int get_prioidx(u32 *prio) 38/*
43{ 39 * Extend @dev->priomap so that it's large enough to accomodate
44 unsigned long flags; 40 * @target_idx. @dev->priomap.priomap_len > @target_idx after successful
45 u32 prioidx; 41 * return. Must be called under rtnl lock.
46 42 */
47 spin_lock_irqsave(&prioidx_map_lock, flags); 43static int extend_netdev_table(struct net_device *dev, u32 target_idx)
48 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
49 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) {
50 spin_unlock_irqrestore(&prioidx_map_lock, flags);
51 return -ENOSPC;
52 }
53 set_bit(prioidx, prioidx_map);
54 if (atomic_read(&max_prioidx) < prioidx)
55 atomic_set(&max_prioidx, prioidx);
56 spin_unlock_irqrestore(&prioidx_map_lock, flags);
57 *prio = prioidx;
58 return 0;
59}
60
61static void put_prioidx(u32 idx)
62{ 44{
63 unsigned long flags; 45 struct netprio_map *old, *new;
64 46 size_t new_sz, new_len;
65 spin_lock_irqsave(&prioidx_map_lock, flags);
66 clear_bit(idx, prioidx_map);
67 spin_unlock_irqrestore(&prioidx_map_lock, flags);
68}
69 47
70static int extend_netdev_table(struct net_device *dev, u32 new_len) 48 /* is the existing priomap large enough? */
71{ 49 old = rtnl_dereference(dev->priomap);
72 size_t new_size = sizeof(struct netprio_map) + 50 if (old && old->priomap_len > target_idx)
73 ((sizeof(u32) * new_len)); 51 return 0;
74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
75 struct netprio_map *old_priomap;
76 52
77 old_priomap = rtnl_dereference(dev->priomap); 53 /*
54 * Determine the new size. Let's keep it power-of-two. We start
55 * from PRIOMAP_MIN_SZ and double it until it's large enough to
56 * accommodate @target_idx.
57 */
58 new_sz = PRIOMAP_MIN_SZ;
59 while (true) {
60 new_len = (new_sz - offsetof(struct netprio_map, priomap)) /
61 sizeof(new->priomap[0]);
62 if (new_len > target_idx)
63 break;
64 new_sz *= 2;
65 /* overflowed? */
66 if (WARN_ON(new_sz < PRIOMAP_MIN_SZ))
67 return -ENOSPC;
68 }
78 69
79 if (!new_priomap) { 70 /* allocate & copy */
71 new = kzalloc(new_sz, GFP_KERNEL);
72 if (!new) {
80 pr_warn("Unable to alloc new priomap!\n"); 73 pr_warn("Unable to alloc new priomap!\n");
81 return -ENOMEM; 74 return -ENOMEM;
82 } 75 }
83 76
84 if (old_priomap) 77 if (old)
85 memcpy(new_priomap->priomap, old_priomap->priomap, 78 memcpy(new->priomap, old->priomap,
86 old_priomap->priomap_len * 79 old->priomap_len * sizeof(old->priomap[0]));
87 sizeof(old_priomap->priomap[0]));
88 80
89 new_priomap->priomap_len = new_len; 81 new->priomap_len = new_len;
90 82
91 rcu_assign_pointer(dev->priomap, new_priomap); 83 /* install the new priomap */
92 if (old_priomap) 84 rcu_assign_pointer(dev->priomap, new);
93 kfree_rcu(old_priomap, rcu); 85 if (old)
86 kfree_rcu(old, rcu);
94 return 0; 87 return 0;
95} 88}
96 89
97static int write_update_netdev_table(struct net_device *dev) 90/**
91 * netprio_prio - return the effective netprio of a cgroup-net_device pair
92 * @cgrp: cgroup part of the target pair
93 * @dev: net_device part of the target pair
94 *
95 * Should be called under RCU read or rtnl lock.
96 */
97static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev)
98{
99 struct netprio_map *map = rcu_dereference_rtnl(dev->priomap);
100
101 if (map && cgrp->id < map->priomap_len)
102 return map->priomap[cgrp->id];
103 return 0;
104}
105
106/**
107 * netprio_set_prio - set netprio on a cgroup-net_device pair
108 * @cgrp: cgroup part of the target pair
109 * @dev: net_device part of the target pair
110 * @prio: prio to set
111 *
112 * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl
113 * lock and may fail under memory pressure for non-zero @prio.
114 */
115static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev,
116 u32 prio)
98{ 117{
99 int ret = 0;
100 u32 max_len;
101 struct netprio_map *map; 118 struct netprio_map *map;
119 int ret;
102 120
103 max_len = atomic_read(&max_prioidx) + 1; 121 /* avoid extending priomap for zero writes */
104 map = rtnl_dereference(dev->priomap); 122 map = rtnl_dereference(dev->priomap);
105 if (!map || map->priomap_len < max_len) 123 if (!prio && (!map || map->priomap_len <= cgrp->id))
106 ret = extend_netdev_table(dev, max_len); 124 return 0;
107 125
108 return ret; 126 ret = extend_netdev_table(dev, cgrp->id);
127 if (ret)
128 return ret;
129
130 map = rtnl_dereference(dev->priomap);
131 map->priomap[cgrp->id] = prio;
132 return 0;
109} 133}
110 134
111static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 135static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
112{ 136{
113 struct cgroup_netprio_state *cs; 137 struct cgroup_netprio_state *cs;
114 int ret = -EINVAL;
115 138
116 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 139 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
117 if (!cs) 140 if (!cs)
118 return ERR_PTR(-ENOMEM); 141 return ERR_PTR(-ENOMEM);
119 142
120 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx)
121 goto out;
122
123 ret = get_prioidx(&cs->prioidx);
124 if (ret < 0) {
125 pr_warn("No space in priority index array\n");
126 goto out;
127 }
128
129 return &cs->css; 143 return &cs->css;
130out:
131 kfree(cs);
132 return ERR_PTR(ret);
133} 144}
134 145
135static void cgrp_destroy(struct cgroup *cgrp) 146static int cgrp_css_online(struct cgroup *cgrp)
136{ 147{
137 struct cgroup_netprio_state *cs; 148 struct cgroup *parent = cgrp->parent;
138 struct net_device *dev; 149 struct net_device *dev;
139 struct netprio_map *map; 150 int ret = 0;
151
152 if (!parent)
153 return 0;
140 154
141 cs = cgrp_netprio_state(cgrp);
142 rtnl_lock(); 155 rtnl_lock();
156 /*
157 * Inherit prios from the parent. As all prios are set during
158 * onlining, there is no need to clear them on offline.
159 */
143 for_each_netdev(&init_net, dev) { 160 for_each_netdev(&init_net, dev) {
144 map = rtnl_dereference(dev->priomap); 161 u32 prio = netprio_prio(parent, dev);
145 if (map && cs->prioidx < map->priomap_len) 162
146 map->priomap[cs->prioidx] = 0; 163 ret = netprio_set_prio(cgrp, dev, prio);
164 if (ret)
165 break;
147 } 166 }
148 rtnl_unlock(); 167 rtnl_unlock();
149 put_prioidx(cs->prioidx); 168 return ret;
150 kfree(cs); 169}
170
171static void cgrp_css_free(struct cgroup *cgrp)
172{
173 kfree(cgrp_netprio_state(cgrp));
151} 174}
152 175
153static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 176static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
154{ 177{
155 return (u64)cgrp_netprio_state(cgrp)->prioidx; 178 return cgrp->id;
156} 179}
157 180
158static int read_priomap(struct cgroup *cont, struct cftype *cft, 181static int read_priomap(struct cgroup *cont, struct cftype *cft,
159 struct cgroup_map_cb *cb) 182 struct cgroup_map_cb *cb)
160{ 183{
161 struct net_device *dev; 184 struct net_device *dev;
162 u32 prioidx = cgrp_netprio_state(cont)->prioidx;
163 u32 priority;
164 struct netprio_map *map;
165 185
166 rcu_read_lock(); 186 rcu_read_lock();
167 for_each_netdev_rcu(&init_net, dev) { 187 for_each_netdev_rcu(&init_net, dev)
168 map = rcu_dereference(dev->priomap); 188 cb->fill(cb, dev->name, netprio_prio(cont, dev));
169 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0;
170 cb->fill(cb, dev->name, priority);
171 }
172 rcu_read_unlock(); 189 rcu_read_unlock();
173 return 0; 190 return 0;
174} 191}
@@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft,
176static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 193static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
177 const char *buffer) 194 const char *buffer)
178{ 195{
179 char *devname = kstrdup(buffer, GFP_KERNEL); 196 char devname[IFNAMSIZ + 1];
180 int ret = -EINVAL;
181 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
182 unsigned long priority;
183 char *priostr;
184 struct net_device *dev; 197 struct net_device *dev;
185 struct netprio_map *map; 198 u32 prio;
186 199 int ret;
187 if (!devname)
188 return -ENOMEM;
189
190 /*
191 * Minimally sized valid priomap string
192 */
193 if (strlen(devname) < 3)
194 goto out_free_devname;
195
196 priostr = strstr(devname, " ");
197 if (!priostr)
198 goto out_free_devname;
199
200 /*
201 *Separate the devname from the associated priority
202 *and advance the priostr pointer to the priority value
203 */
204 *priostr = '\0';
205 priostr++;
206
207 /*
208 * If the priostr points to NULL, we're at the end of the passed
209 * in string, and its not a valid write
210 */
211 if (*priostr == '\0')
212 goto out_free_devname;
213
214 ret = kstrtoul(priostr, 10, &priority);
215 if (ret < 0)
216 goto out_free_devname;
217 200
218 ret = -ENODEV; 201 if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2)
202 return -EINVAL;
219 203
220 dev = dev_get_by_name(&init_net, devname); 204 dev = dev_get_by_name(&init_net, devname);
221 if (!dev) 205 if (!dev)
222 goto out_free_devname; 206 return -ENODEV;
223 207
224 rtnl_lock(); 208 rtnl_lock();
225 ret = write_update_netdev_table(dev);
226 if (ret < 0)
227 goto out_put_dev;
228 209
229 map = rtnl_dereference(dev->priomap); 210 ret = netprio_set_prio(cgrp, dev, prio);
230 if (map)
231 map->priomap[prioidx] = priority;
232 211
233out_put_dev:
234 rtnl_unlock(); 212 rtnl_unlock();
235 dev_put(dev); 213 dev_put(dev);
236
237out_free_devname:
238 kfree(devname);
239 return ret; 214 return ret;
240} 215}
241 216
@@ -276,22 +251,13 @@ static struct cftype ss_files[] = {
276 251
277struct cgroup_subsys net_prio_subsys = { 252struct cgroup_subsys net_prio_subsys = {
278 .name = "net_prio", 253 .name = "net_prio",
279 .create = cgrp_create, 254 .css_alloc = cgrp_css_alloc,
280 .destroy = cgrp_destroy, 255 .css_online = cgrp_css_online,
256 .css_free = cgrp_css_free,
281 .attach = net_prio_attach, 257 .attach = net_prio_attach,
282 .subsys_id = net_prio_subsys_id, 258 .subsys_id = net_prio_subsys_id,
283 .base_cftypes = ss_files, 259 .base_cftypes = ss_files,
284 .module = THIS_MODULE, 260 .module = THIS_MODULE,
285
286 /*
287 * net_prio has artificial limit on the number of cgroups and
288 * disallows nesting making it impossible to co-mount it with other
289 * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ
290 * limit and properly nest configuration such that children follow
291 * their parents' configurations by default and are allowed to
292 * override and remove the following.
293 */
294 .broken_hierarchy = true,
295}; 261};
296 262
297static int netprio_device_event(struct notifier_block *unused, 263static int netprio_device_event(struct notifier_block *unused,
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 2ecde225ae60..31f06b633574 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,21 +34,25 @@ static inline struct cgroup_cls_state *task_cls_state(struct task_struct *p)
34 struct cgroup_cls_state, css); 34 struct cgroup_cls_state, css);
35} 35}
36 36
37static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 37static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp)
38{ 38{
39 struct cgroup_cls_state *cs; 39 struct cgroup_cls_state *cs;
40 40
41 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 41 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
42 if (!cs) 42 if (!cs)
43 return ERR_PTR(-ENOMEM); 43 return ERR_PTR(-ENOMEM);
44 return &cs->css;
45}
44 46
47static int cgrp_css_online(struct cgroup *cgrp)
48{
45 if (cgrp->parent) 49 if (cgrp->parent)
46 cs->classid = cgrp_cls_state(cgrp->parent)->classid; 50 cgrp_cls_state(cgrp)->classid =
47 51 cgrp_cls_state(cgrp->parent)->classid;
48 return &cs->css; 52 return 0;
49} 53}
50 54
51static void cgrp_destroy(struct cgroup *cgrp) 55static void cgrp_css_free(struct cgroup *cgrp)
52{ 56{
53 kfree(cgrp_cls_state(cgrp)); 57 kfree(cgrp_cls_state(cgrp));
54} 58}
@@ -75,20 +79,12 @@ static struct cftype ss_files[] = {
75 79
76struct cgroup_subsys net_cls_subsys = { 80struct cgroup_subsys net_cls_subsys = {
77 .name = "net_cls", 81 .name = "net_cls",
78 .create = cgrp_create, 82 .css_alloc = cgrp_css_alloc,
79 .destroy = cgrp_destroy, 83 .css_online = cgrp_css_online,
84 .css_free = cgrp_css_free,
80 .subsys_id = net_cls_subsys_id, 85 .subsys_id = net_cls_subsys_id,
81 .base_cftypes = ss_files, 86 .base_cftypes = ss_files,
82 .module = THIS_MODULE, 87 .module = THIS_MODULE,
83
84 /*
85 * While net_cls cgroup has the rudimentary hierarchy support of
86 * inheriting the parent's classid on cgroup creation, it doesn't
87 * properly propagates config changes in ancestors to their
88 * descendents. A child should follow the parent's configuration
89 * but be allowed to override it. Fix it and remove the following.
90 */
91 .broken_hierarchy = true,
92}; 88};
93 89
94struct cls_cgroup_head { 90struct cls_cgroup_head {