diff options
-rw-r--r-- | include/linux/cgroup_rdma.h | 53 | ||||
-rw-r--r-- | include/linux/cgroup_subsys.h | 4 | ||||
-rw-r--r-- | init/Kconfig | 10 | ||||
-rw-r--r-- | kernel/cgroup/Makefile | 1 | ||||
-rw-r--r-- | kernel/cgroup/rdma.c | 617 |
5 files changed, 685 insertions, 0 deletions
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h new file mode 100644 index 000000000000..e94290b29e99 --- /dev/null +++ b/include/linux/cgroup_rdma.h | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> | ||
3 | * | ||
4 | * This file is subject to the terms and conditions of version 2 of the GNU | ||
5 | * General Public License. See the file COPYING in the main directory of the | ||
6 | * Linux distribution for more details. | ||
7 | */ | ||
8 | |||
9 | #ifndef _CGROUP_RDMA_H | ||
10 | #define _CGROUP_RDMA_H | ||
11 | |||
12 | #include <linux/cgroup.h> | ||
13 | |||
14 | enum rdmacg_resource_type { | ||
15 | RDMACG_RESOURCE_HCA_HANDLE, | ||
16 | RDMACG_RESOURCE_HCA_OBJECT, | ||
17 | RDMACG_RESOURCE_MAX, | ||
18 | }; | ||
19 | |||
20 | #ifdef CONFIG_CGROUP_RDMA | ||
21 | |||
22 | struct rdma_cgroup { | ||
23 | struct cgroup_subsys_state css; | ||
24 | |||
25 | /* | ||
26 | * head to keep track of all resource pools | ||
27 | * that belongs to this cgroup. | ||
28 | */ | ||
29 | struct list_head rpools; | ||
30 | }; | ||
31 | |||
32 | struct rdmacg_device { | ||
33 | struct list_head dev_node; | ||
34 | struct list_head rpools; | ||
35 | char *name; | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * APIs for RDMA/IB stack to publish when a device wants to | ||
40 | * participate in resource accounting | ||
41 | */ | ||
42 | int rdmacg_register_device(struct rdmacg_device *device); | ||
43 | void rdmacg_unregister_device(struct rdmacg_device *device); | ||
44 | |||
45 | /* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ | ||
46 | int rdmacg_try_charge(struct rdma_cgroup **rdmacg, | ||
47 | struct rdmacg_device *device, | ||
48 | enum rdmacg_resource_type index); | ||
49 | void rdmacg_uncharge(struct rdma_cgroup *cg, | ||
50 | struct rdmacg_device *device, | ||
51 | enum rdmacg_resource_type index); | ||
52 | #endif /* CONFIG_CGROUP_RDMA */ | ||
53 | #endif /* _CGROUP_RDMA_H */ | ||
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0df0336acee9..d0e597c44585 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h | |||
@@ -56,6 +56,10 @@ SUBSYS(hugetlb) | |||
56 | SUBSYS(pids) | 56 | SUBSYS(pids) |
57 | #endif | 57 | #endif |
58 | 58 | ||
59 | #if IS_ENABLED(CONFIG_CGROUP_RDMA) | ||
60 | SUBSYS(rdma) | ||
61 | #endif | ||
62 | |||
59 | /* | 63 | /* |
60 | * The following subsystems are not supported on the default hierarchy. | 64 | * The following subsystems are not supported on the default hierarchy. |
61 | */ | 65 | */ |
diff --git a/init/Kconfig b/init/Kconfig index 223b734abccd..ef80d46a32b6 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -1090,6 +1090,16 @@ config CGROUP_PIDS | |||
1090 | since the PIDs limit only affects a process's ability to fork, not to | 1090 | since the PIDs limit only affects a process's ability to fork, not to |
1091 | attach to a cgroup. | 1091 | attach to a cgroup. |
1092 | 1092 | ||
1093 | config CGROUP_RDMA | ||
1094 | bool "RDMA controller" | ||
1095 | help | ||
1096 | Provides enforcement of RDMA resources defined by IB stack. | ||
1097 | It is fairly easy for consumers to exhaust RDMA resources, which | ||
1098 | can result into resource unavailability to other consumers. | ||
1099 | RDMA controller is designed to stop this from happening. | ||
1100 | Attaching processes with active RDMA resources to the cgroup | ||
1101 | hierarchy is allowed even if can cross the hierarchy's limit. | ||
1102 | |||
1093 | config CGROUP_FREEZER | 1103 | config CGROUP_FREEZER |
1094 | bool "Freezer controller" | 1104 | bool "Freezer controller" |
1095 | help | 1105 | help |
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile index 6d42a3211164..387348a40c64 100644 --- a/kernel/cgroup/Makefile +++ b/kernel/cgroup/Makefile | |||
@@ -2,4 +2,5 @@ obj-y := cgroup.o namespace.o cgroup-v1.o | |||
2 | 2 | ||
3 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o | 3 | obj-$(CONFIG_CGROUP_FREEZER) += freezer.o |
4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o | 4 | obj-$(CONFIG_CGROUP_PIDS) += pids.o |
5 | obj-$(CONFIG_CGROUP_RDMA) += rdma.o | ||
5 | obj-$(CONFIG_CPUSETS) += cpuset.o | 6 | obj-$(CONFIG_CPUSETS) += cpuset.o |
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c new file mode 100644 index 000000000000..021bee7a9692 --- /dev/null +++ b/kernel/cgroup/rdma.c | |||
@@ -0,0 +1,617 @@ | |||
1 | /* | ||
2 | * RDMA resource limiting controller for cgroups. | ||
3 | * | ||
4 | * Used to allow a cgroup hierarchy to stop processes from consuming | ||
5 | * additional RDMA resources after a certain limit is reached. | ||
6 | * | ||
7 | * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> | ||
8 | * | ||
9 | * This file is subject to the terms and conditions of version 2 of the GNU | ||
10 | * General Public License. See the file COPYING in the main directory of the | ||
11 | * Linux distribution for more details. | ||
12 | */ | ||
13 | |||
14 | #include <linux/bitops.h> | ||
15 | #include <linux/slab.h> | ||
16 | #include <linux/seq_file.h> | ||
17 | #include <linux/cgroup.h> | ||
18 | #include <linux/parser.h> | ||
19 | #include <linux/cgroup_rdma.h> | ||
20 | |||
21 | #define RDMACG_MAX_STR "max" | ||
22 | |||
23 | /* | ||
24 | * Protects list of resource pools maintained on per cgroup basis | ||
25 | * and rdma device list. | ||
26 | */ | ||
27 | static DEFINE_MUTEX(rdmacg_mutex); | ||
28 | static LIST_HEAD(rdmacg_devices); | ||
29 | |||
30 | enum rdmacg_file_type { | ||
31 | RDMACG_RESOURCE_TYPE_MAX, | ||
32 | RDMACG_RESOURCE_TYPE_STAT, | ||
33 | }; | ||
34 | |||
35 | /* | ||
36 | * resource table definition as to be seen by the user. | ||
37 | * Need to add entries to it when more resources are | ||
38 | * added/defined at IB verb/core layer. | ||
39 | */ | ||
40 | static char const *rdmacg_resource_names[] = { | ||
41 | [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle", | ||
42 | [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object", | ||
43 | }; | ||
44 | |||
45 | /* resource tracker for each resource of rdma cgroup */ | ||
46 | struct rdmacg_resource { | ||
47 | int max; | ||
48 | int usage; | ||
49 | }; | ||
50 | |||
51 | /* | ||
52 | * resource pool object which represents per cgroup, per device | ||
53 | * resources. There are multiple instances of this object per cgroup, | ||
54 | * therefore it cannot be embedded within rdma_cgroup structure. It | ||
55 | * is maintained as list. | ||
56 | */ | ||
57 | struct rdmacg_resource_pool { | ||
58 | struct rdmacg_device *device; | ||
59 | struct rdmacg_resource resources[RDMACG_RESOURCE_MAX]; | ||
60 | |||
61 | struct list_head cg_node; | ||
62 | struct list_head dev_node; | ||
63 | |||
64 | /* count active user tasks of this pool */ | ||
65 | u64 usage_sum; | ||
66 | /* total number counts which are set to max */ | ||
67 | int num_max_cnt; | ||
68 | }; | ||
69 | |||
70 | static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) | ||
71 | { | ||
72 | return container_of(css, struct rdma_cgroup, css); | ||
73 | } | ||
74 | |||
75 | static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) | ||
76 | { | ||
77 | return css_rdmacg(cg->css.parent); | ||
78 | } | ||
79 | |||
80 | static inline struct rdma_cgroup *get_current_rdmacg(void) | ||
81 | { | ||
82 | return css_rdmacg(task_get_css(current, rdma_cgrp_id)); | ||
83 | } | ||
84 | |||
85 | static void set_resource_limit(struct rdmacg_resource_pool *rpool, | ||
86 | int index, int new_max) | ||
87 | { | ||
88 | if (new_max == S32_MAX) { | ||
89 | if (rpool->resources[index].max != S32_MAX) | ||
90 | rpool->num_max_cnt++; | ||
91 | } else { | ||
92 | if (rpool->resources[index].max == S32_MAX) | ||
93 | rpool->num_max_cnt--; | ||
94 | } | ||
95 | rpool->resources[index].max = new_max; | ||
96 | } | ||
97 | |||
98 | static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) | ||
99 | { | ||
100 | int i; | ||
101 | |||
102 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) | ||
103 | set_resource_limit(rpool, i, S32_MAX); | ||
104 | } | ||
105 | |||
106 | static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) | ||
107 | { | ||
108 | lockdep_assert_held(&rdmacg_mutex); | ||
109 | |||
110 | list_del(&rpool->cg_node); | ||
111 | list_del(&rpool->dev_node); | ||
112 | kfree(rpool); | ||
113 | } | ||
114 | |||
115 | static struct rdmacg_resource_pool * | ||
116 | find_cg_rpool_locked(struct rdma_cgroup *cg, | ||
117 | struct rdmacg_device *device) | ||
118 | |||
119 | { | ||
120 | struct rdmacg_resource_pool *pool; | ||
121 | |||
122 | lockdep_assert_held(&rdmacg_mutex); | ||
123 | |||
124 | list_for_each_entry(pool, &cg->rpools, cg_node) | ||
125 | if (pool->device == device) | ||
126 | return pool; | ||
127 | |||
128 | return NULL; | ||
129 | } | ||
130 | |||
131 | static struct rdmacg_resource_pool * | ||
132 | get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) | ||
133 | { | ||
134 | struct rdmacg_resource_pool *rpool; | ||
135 | |||
136 | rpool = find_cg_rpool_locked(cg, device); | ||
137 | if (rpool) | ||
138 | return rpool; | ||
139 | |||
140 | rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); | ||
141 | if (!rpool) | ||
142 | return ERR_PTR(-ENOMEM); | ||
143 | |||
144 | rpool->device = device; | ||
145 | set_all_resource_max_limit(rpool); | ||
146 | |||
147 | INIT_LIST_HEAD(&rpool->cg_node); | ||
148 | INIT_LIST_HEAD(&rpool->dev_node); | ||
149 | list_add_tail(&rpool->cg_node, &cg->rpools); | ||
150 | list_add_tail(&rpool->dev_node, &device->rpools); | ||
151 | return rpool; | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * uncharge_cg_locked - uncharge resource for rdma cgroup | ||
156 | * @cg: pointer to cg to uncharge and all parents in hierarchy | ||
157 | * @device: pointer to rdmacg device | ||
158 | * @index: index of the resource to uncharge in cg (resource pool) | ||
159 | * | ||
160 | * It also frees the resource pool which was created as part of | ||
161 | * charging operation when there are no resources attached to | ||
162 | * resource pool. | ||
163 | */ | ||
164 | static void | ||
165 | uncharge_cg_locked(struct rdma_cgroup *cg, | ||
166 | struct rdmacg_device *device, | ||
167 | enum rdmacg_resource_type index) | ||
168 | { | ||
169 | struct rdmacg_resource_pool *rpool; | ||
170 | |||
171 | rpool = find_cg_rpool_locked(cg, device); | ||
172 | |||
173 | /* | ||
174 | * rpool cannot be null at this stage. Let kernel operate in case | ||
175 | * if there a bug in IB stack or rdma controller, instead of crashing | ||
176 | * the system. | ||
177 | */ | ||
178 | if (unlikely(!rpool)) { | ||
179 | pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | rpool->resources[index].usage--; | ||
184 | |||
185 | /* | ||
186 | * A negative count (or overflow) is invalid, | ||
187 | * it indicates a bug in the rdma controller. | ||
188 | */ | ||
189 | WARN_ON_ONCE(rpool->resources[index].usage < 0); | ||
190 | rpool->usage_sum--; | ||
191 | if (rpool->usage_sum == 0 && | ||
192 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { | ||
193 | /* | ||
194 | * No user of the rpool and all entries are set to max, so | ||
195 | * safe to delete this rpool. | ||
196 | */ | ||
197 | free_cg_rpool_locked(rpool); | ||
198 | } | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count | ||
203 | * @device: pointer to rdmacg device | ||
204 | * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup | ||
205 | * stop uncharging | ||
206 | * @index: index of the resource to uncharge in cg in given resource pool | ||
207 | */ | ||
208 | static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, | ||
209 | struct rdmacg_device *device, | ||
210 | struct rdma_cgroup *stop_cg, | ||
211 | enum rdmacg_resource_type index) | ||
212 | { | ||
213 | struct rdma_cgroup *p; | ||
214 | |||
215 | mutex_lock(&rdmacg_mutex); | ||
216 | |||
217 | for (p = cg; p != stop_cg; p = parent_rdmacg(p)) | ||
218 | uncharge_cg_locked(p, device, index); | ||
219 | |||
220 | mutex_unlock(&rdmacg_mutex); | ||
221 | |||
222 | css_put(&cg->css); | ||
223 | } | ||
224 | |||
225 | /** | ||
226 | * rdmacg_uncharge - hierarchically uncharge rdma resource count | ||
227 | * @device: pointer to rdmacg device | ||
228 | * @index: index of the resource to uncharge in cgroup in given resource pool | ||
229 | */ | ||
230 | void rdmacg_uncharge(struct rdma_cgroup *cg, | ||
231 | struct rdmacg_device *device, | ||
232 | enum rdmacg_resource_type index) | ||
233 | { | ||
234 | if (index >= RDMACG_RESOURCE_MAX) | ||
235 | return; | ||
236 | |||
237 | rdmacg_uncharge_hierarchy(cg, device, NULL, index); | ||
238 | } | ||
239 | EXPORT_SYMBOL(rdmacg_uncharge); | ||
240 | |||
241 | /** | ||
242 | * rdmacg_try_charge - hierarchically try to charge the rdma resource | ||
243 | * @rdmacg: pointer to rdma cgroup which will own this resource | ||
244 | * @device: pointer to rdmacg device | ||
245 | * @index: index of the resource to charge in cgroup (resource pool) | ||
246 | * | ||
247 | * This function follows charging resource in hierarchical way. | ||
248 | * It will fail if the charge would cause the new value to exceed the | ||
249 | * hierarchical limit. | ||
250 | * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. | ||
251 | * Returns pointer to rdmacg for this resource when charging is successful. | ||
252 | * | ||
253 | * Charger needs to account resources on two criteria. | ||
254 | * (a) per cgroup & (b) per device resource usage. | ||
255 | * Per cgroup resource usage ensures that tasks of cgroup doesn't cross | ||
256 | * the configured limits. Per device provides granular configuration | ||
257 | * in multi device usage. It allocates resource pool in the hierarchy | ||
258 | * for each parent it come across for first resource. Later on resource | ||
259 | * pool will be available. Therefore it will be much faster thereon | ||
260 | * to charge/uncharge. | ||
261 | */ | ||
262 | int rdmacg_try_charge(struct rdma_cgroup **rdmacg, | ||
263 | struct rdmacg_device *device, | ||
264 | enum rdmacg_resource_type index) | ||
265 | { | ||
266 | struct rdma_cgroup *cg, *p; | ||
267 | struct rdmacg_resource_pool *rpool; | ||
268 | s64 new; | ||
269 | int ret = 0; | ||
270 | |||
271 | if (index >= RDMACG_RESOURCE_MAX) | ||
272 | return -EINVAL; | ||
273 | |||
274 | /* | ||
275 | * hold on to css, as cgroup can be removed but resource | ||
276 | * accounting happens on css. | ||
277 | */ | ||
278 | cg = get_current_rdmacg(); | ||
279 | |||
280 | mutex_lock(&rdmacg_mutex); | ||
281 | for (p = cg; p; p = parent_rdmacg(p)) { | ||
282 | rpool = get_cg_rpool_locked(p, device); | ||
283 | if (IS_ERR(rpool)) { | ||
284 | ret = PTR_ERR(rpool); | ||
285 | goto err; | ||
286 | } else { | ||
287 | new = rpool->resources[index].usage + 1; | ||
288 | if (new > rpool->resources[index].max) { | ||
289 | ret = -EAGAIN; | ||
290 | goto err; | ||
291 | } else { | ||
292 | rpool->resources[index].usage = new; | ||
293 | rpool->usage_sum++; | ||
294 | } | ||
295 | } | ||
296 | } | ||
297 | mutex_unlock(&rdmacg_mutex); | ||
298 | |||
299 | *rdmacg = cg; | ||
300 | return 0; | ||
301 | |||
302 | err: | ||
303 | mutex_unlock(&rdmacg_mutex); | ||
304 | rdmacg_uncharge_hierarchy(cg, device, p, index); | ||
305 | return ret; | ||
306 | } | ||
307 | EXPORT_SYMBOL(rdmacg_try_charge); | ||
308 | |||
309 | /** | ||
310 | * rdmacg_register_device - register rdmacg device to rdma controller. | ||
311 | * @device: pointer to rdmacg device whose resources need to be accounted. | ||
312 | * | ||
313 | * If IB stack wish a device to participate in rdma cgroup resource | ||
314 | * tracking, it must invoke this API to register with rdma cgroup before | ||
315 | * any user space application can start using the RDMA resources. | ||
316 | * Returns 0 on success or EINVAL when table length given is beyond | ||
317 | * supported size. | ||
318 | */ | ||
319 | int rdmacg_register_device(struct rdmacg_device *device) | ||
320 | { | ||
321 | INIT_LIST_HEAD(&device->dev_node); | ||
322 | INIT_LIST_HEAD(&device->rpools); | ||
323 | |||
324 | mutex_lock(&rdmacg_mutex); | ||
325 | list_add_tail(&device->dev_node, &rdmacg_devices); | ||
326 | mutex_unlock(&rdmacg_mutex); | ||
327 | return 0; | ||
328 | } | ||
329 | EXPORT_SYMBOL(rdmacg_register_device); | ||
330 | |||
331 | /** | ||
332 | * rdmacg_unregister_device - unregister rdmacg device from rdma controller. | ||
333 | * @device: pointer to rdmacg device which was previously registered with rdma | ||
334 | * controller using rdmacg_register_device(). | ||
335 | * | ||
336 | * IB stack must invoke this after all the resources of the IB device | ||
337 | * are destroyed and after ensuring that no more resources will be created | ||
338 | * when this API is invoked. | ||
339 | */ | ||
340 | void rdmacg_unregister_device(struct rdmacg_device *device) | ||
341 | { | ||
342 | struct rdmacg_resource_pool *rpool, *tmp; | ||
343 | |||
344 | /* | ||
345 | * Synchronize with any active resource settings, | ||
346 | * usage query happening via configfs. | ||
347 | */ | ||
348 | mutex_lock(&rdmacg_mutex); | ||
349 | list_del_init(&device->dev_node); | ||
350 | |||
351 | /* | ||
352 | * Now that this device is off the cgroup list, its safe to free | ||
353 | * all the rpool resources. | ||
354 | */ | ||
355 | list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) | ||
356 | free_cg_rpool_locked(rpool); | ||
357 | |||
358 | mutex_unlock(&rdmacg_mutex); | ||
359 | } | ||
360 | EXPORT_SYMBOL(rdmacg_unregister_device); | ||
361 | |||
362 | static int parse_resource(char *c, int *intval) | ||
363 | { | ||
364 | substring_t argstr; | ||
365 | const char **table = &rdmacg_resource_names[0]; | ||
366 | char *name, *value = c; | ||
367 | size_t len; | ||
368 | int ret, i = 0; | ||
369 | |||
370 | name = strsep(&value, "="); | ||
371 | if (!name || !value) | ||
372 | return -EINVAL; | ||
373 | |||
374 | len = strlen(value); | ||
375 | |||
376 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { | ||
377 | if (strcmp(table[i], name)) | ||
378 | continue; | ||
379 | |||
380 | argstr.from = value; | ||
381 | argstr.to = value + len; | ||
382 | |||
383 | ret = match_int(&argstr, intval); | ||
384 | if (ret >= 0) { | ||
385 | if (*intval < 0) | ||
386 | break; | ||
387 | return i; | ||
388 | } | ||
389 | if (strncmp(value, RDMACG_MAX_STR, len) == 0) { | ||
390 | *intval = S32_MAX; | ||
391 | return i; | ||
392 | } | ||
393 | break; | ||
394 | } | ||
395 | return -EINVAL; | ||
396 | } | ||
397 | |||
398 | static int rdmacg_parse_limits(char *options, | ||
399 | int *new_limits, unsigned long *enables) | ||
400 | { | ||
401 | char *c; | ||
402 | int err = -EINVAL; | ||
403 | |||
404 | /* parse resource options */ | ||
405 | while ((c = strsep(&options, " ")) != NULL) { | ||
406 | int index, intval; | ||
407 | |||
408 | index = parse_resource(c, &intval); | ||
409 | if (index < 0) | ||
410 | goto err; | ||
411 | |||
412 | new_limits[index] = intval; | ||
413 | *enables |= BIT(index); | ||
414 | } | ||
415 | return 0; | ||
416 | |||
417 | err: | ||
418 | return err; | ||
419 | } | ||
420 | |||
421 | static struct rdmacg_device *rdmacg_get_device_locked(const char *name) | ||
422 | { | ||
423 | struct rdmacg_device *device; | ||
424 | |||
425 | lockdep_assert_held(&rdmacg_mutex); | ||
426 | |||
427 | list_for_each_entry(device, &rdmacg_devices, dev_node) | ||
428 | if (!strcmp(name, device->name)) | ||
429 | return device; | ||
430 | |||
431 | return NULL; | ||
432 | } | ||
433 | |||
434 | static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, | ||
435 | char *buf, size_t nbytes, loff_t off) | ||
436 | { | ||
437 | struct rdma_cgroup *cg = css_rdmacg(of_css(of)); | ||
438 | const char *dev_name; | ||
439 | struct rdmacg_resource_pool *rpool; | ||
440 | struct rdmacg_device *device; | ||
441 | char *options = strstrip(buf); | ||
442 | int *new_limits; | ||
443 | unsigned long enables = 0; | ||
444 | int i = 0, ret = 0; | ||
445 | |||
446 | /* extract the device name first */ | ||
447 | dev_name = strsep(&options, " "); | ||
448 | if (!dev_name) { | ||
449 | ret = -EINVAL; | ||
450 | goto err; | ||
451 | } | ||
452 | |||
453 | new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); | ||
454 | if (!new_limits) { | ||
455 | ret = -ENOMEM; | ||
456 | goto err; | ||
457 | } | ||
458 | |||
459 | ret = rdmacg_parse_limits(options, new_limits, &enables); | ||
460 | if (ret) | ||
461 | goto parse_err; | ||
462 | |||
463 | /* acquire lock to synchronize with hot plug devices */ | ||
464 | mutex_lock(&rdmacg_mutex); | ||
465 | |||
466 | device = rdmacg_get_device_locked(dev_name); | ||
467 | if (!device) { | ||
468 | ret = -ENODEV; | ||
469 | goto dev_err; | ||
470 | } | ||
471 | |||
472 | rpool = get_cg_rpool_locked(cg, device); | ||
473 | if (IS_ERR(rpool)) { | ||
474 | ret = PTR_ERR(rpool); | ||
475 | goto dev_err; | ||
476 | } | ||
477 | |||
478 | /* now set the new limits of the rpool */ | ||
479 | for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) | ||
480 | set_resource_limit(rpool, i, new_limits[i]); | ||
481 | |||
482 | if (rpool->usage_sum == 0 && | ||
483 | rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { | ||
484 | /* | ||
485 | * No user of the rpool and all entries are set to max, so | ||
486 | * safe to delete this rpool. | ||
487 | */ | ||
488 | free_cg_rpool_locked(rpool); | ||
489 | } | ||
490 | |||
491 | dev_err: | ||
492 | mutex_unlock(&rdmacg_mutex); | ||
493 | |||
494 | parse_err: | ||
495 | kfree(new_limits); | ||
496 | |||
497 | err: | ||
498 | return ret ?: nbytes; | ||
499 | } | ||
500 | |||
501 | static void print_rpool_values(struct seq_file *sf, | ||
502 | struct rdmacg_resource_pool *rpool) | ||
503 | { | ||
504 | enum rdmacg_file_type sf_type; | ||
505 | int i; | ||
506 | u32 value; | ||
507 | |||
508 | sf_type = seq_cft(sf)->private; | ||
509 | |||
510 | for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { | ||
511 | seq_puts(sf, rdmacg_resource_names[i]); | ||
512 | seq_putc(sf, '='); | ||
513 | if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { | ||
514 | if (rpool) | ||
515 | value = rpool->resources[i].max; | ||
516 | else | ||
517 | value = S32_MAX; | ||
518 | } else { | ||
519 | if (rpool) | ||
520 | value = rpool->resources[i].usage; | ||
521 | } | ||
522 | |||
523 | if (value == S32_MAX) | ||
524 | seq_puts(sf, RDMACG_MAX_STR); | ||
525 | else | ||
526 | seq_printf(sf, "%d", value); | ||
527 | seq_putc(sf, ' '); | ||
528 | } | ||
529 | } | ||
530 | |||
531 | static int rdmacg_resource_read(struct seq_file *sf, void *v) | ||
532 | { | ||
533 | struct rdmacg_device *device; | ||
534 | struct rdmacg_resource_pool *rpool; | ||
535 | struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); | ||
536 | |||
537 | mutex_lock(&rdmacg_mutex); | ||
538 | |||
539 | list_for_each_entry(device, &rdmacg_devices, dev_node) { | ||
540 | seq_printf(sf, "%s ", device->name); | ||
541 | |||
542 | rpool = find_cg_rpool_locked(cg, device); | ||
543 | print_rpool_values(sf, rpool); | ||
544 | |||
545 | seq_putc(sf, '\n'); | ||
546 | } | ||
547 | |||
548 | mutex_unlock(&rdmacg_mutex); | ||
549 | return 0; | ||
550 | } | ||
551 | |||
552 | static struct cftype rdmacg_files[] = { | ||
553 | { | ||
554 | .name = "max", | ||
555 | .write = rdmacg_resource_set_max, | ||
556 | .seq_show = rdmacg_resource_read, | ||
557 | .private = RDMACG_RESOURCE_TYPE_MAX, | ||
558 | .flags = CFTYPE_NOT_ON_ROOT, | ||
559 | }, | ||
560 | { | ||
561 | .name = "current", | ||
562 | .seq_show = rdmacg_resource_read, | ||
563 | .private = RDMACG_RESOURCE_TYPE_STAT, | ||
564 | .flags = CFTYPE_NOT_ON_ROOT, | ||
565 | }, | ||
566 | { } /* terminate */ | ||
567 | }; | ||
568 | |||
569 | static struct cgroup_subsys_state * | ||
570 | rdmacg_css_alloc(struct cgroup_subsys_state *parent) | ||
571 | { | ||
572 | struct rdma_cgroup *cg; | ||
573 | |||
574 | cg = kzalloc(sizeof(*cg), GFP_KERNEL); | ||
575 | if (!cg) | ||
576 | return ERR_PTR(-ENOMEM); | ||
577 | |||
578 | INIT_LIST_HEAD(&cg->rpools); | ||
579 | return &cg->css; | ||
580 | } | ||
581 | |||
582 | static void rdmacg_css_free(struct cgroup_subsys_state *css) | ||
583 | { | ||
584 | struct rdma_cgroup *cg = css_rdmacg(css); | ||
585 | |||
586 | kfree(cg); | ||
587 | } | ||
588 | |||
589 | /** | ||
590 | * rdmacg_css_offline - cgroup css_offline callback | ||
591 | * @css: css of interest | ||
592 | * | ||
593 | * This function is called when @css is about to go away and responsible | ||
594 | * for shooting down all rdmacg associated with @css. As part of that it | ||
595 | * marks all the resource pool entries to max value, so that when resources are | ||
596 | * uncharged, associated resource pool can be freed as well. | ||
597 | */ | ||
598 | static void rdmacg_css_offline(struct cgroup_subsys_state *css) | ||
599 | { | ||
600 | struct rdma_cgroup *cg = css_rdmacg(css); | ||
601 | struct rdmacg_resource_pool *rpool; | ||
602 | |||
603 | mutex_lock(&rdmacg_mutex); | ||
604 | |||
605 | list_for_each_entry(rpool, &cg->rpools, cg_node) | ||
606 | set_all_resource_max_limit(rpool); | ||
607 | |||
608 | mutex_unlock(&rdmacg_mutex); | ||
609 | } | ||
610 | |||
611 | struct cgroup_subsys rdma_cgrp_subsys = { | ||
612 | .css_alloc = rdmacg_css_alloc, | ||
613 | .css_free = rdmacg_css_free, | ||
614 | .css_offline = rdmacg_css_offline, | ||
615 | .legacy_cftypes = rdmacg_files, | ||
616 | .dfl_cftypes = rdmacg_files, | ||
617 | }; | ||