aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorParav Pandit <pandit.parav@gmail.com>2017-01-09 19:02:13 -0500
committerTejun Heo <tj@kernel.org>2017-01-10 11:14:27 -0500
commit39d3e7584a686541a3295ff1624d341e669e1afc (patch)
tree611c0f26db570d8207fb22a8ca2b96392196f0a3
parent7b4632f048415263669676dda20fd5d811c3d3e4 (diff)
rdmacg: Added rdma cgroup controller
Added rdma cgroup controller that does accounting, limit enforcement on rdma/IB resources. Added rdma cgroup header file which defines its APIs to perform charging/uncharging functionality. It also defined APIs for RDMA/IB stack for device registration. Devices which are registered will participate in controller functions of accounting and limit enforcements. It define rdmacg_device structure to bind IB stack and RDMA cgroup controller. RDMA resources are tracked using resource pool. Resource pool is per device, per cgroup entity which allows setting up accounting limits on per device basis. Currently resources are defined by the RDMA cgroup. Resource pool is created/destroyed dynamically whenever charging/uncharging occurs respectively and whenever user configuration is done. Its a tradeoff of memory vs little more code space that creates resource pool object whenever necessary, instead of creating them during cgroup creation and device registration time. Signed-off-by: Parav Pandit <pandit.parav@gmail.com> Signed-off-by: Tejun Heo <tj@kernel.org>
-rw-r--r--include/linux/cgroup_rdma.h53
-rw-r--r--include/linux/cgroup_subsys.h4
-rw-r--r--init/Kconfig10
-rw-r--r--kernel/cgroup/Makefile1
-rw-r--r--kernel/cgroup/rdma.c617
5 files changed, 685 insertions, 0 deletions
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h
new file mode 100644
index 000000000000..e94290b29e99
--- /dev/null
+++ b/include/linux/cgroup_rdma.h
@@ -0,0 +1,53 @@
1/*
2 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
3 *
4 * This file is subject to the terms and conditions of version 2 of the GNU
5 * General Public License. See the file COPYING in the main directory of the
6 * Linux distribution for more details.
7 */
8
9#ifndef _CGROUP_RDMA_H
10#define _CGROUP_RDMA_H
11
12#include <linux/cgroup.h>
13
14enum rdmacg_resource_type {
15 RDMACG_RESOURCE_HCA_HANDLE,
16 RDMACG_RESOURCE_HCA_OBJECT,
17 RDMACG_RESOURCE_MAX,
18};
19
20#ifdef CONFIG_CGROUP_RDMA
21
22struct rdma_cgroup {
23 struct cgroup_subsys_state css;
24
25 /*
26 * head to keep track of all resource pools
27 * that belongs to this cgroup.
28 */
29 struct list_head rpools;
30};
31
32struct rdmacg_device {
33 struct list_head dev_node;
34 struct list_head rpools;
35 char *name;
36};
37
38/*
39 * APIs for RDMA/IB stack to publish when a device wants to
40 * participate in resource accounting
41 */
42int rdmacg_register_device(struct rdmacg_device *device);
43void rdmacg_unregister_device(struct rdmacg_device *device);
44
45/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */
46int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
47 struct rdmacg_device *device,
48 enum rdmacg_resource_type index);
49void rdmacg_uncharge(struct rdma_cgroup *cg,
50 struct rdmacg_device *device,
51 enum rdmacg_resource_type index);
52#endif /* CONFIG_CGROUP_RDMA */
53#endif /* _CGROUP_RDMA_H */
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 0df0336acee9..d0e597c44585 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -56,6 +56,10 @@ SUBSYS(hugetlb)
56SUBSYS(pids) 56SUBSYS(pids)
57#endif 57#endif
58 58
59#if IS_ENABLED(CONFIG_CGROUP_RDMA)
60SUBSYS(rdma)
61#endif
62
59/* 63/*
60 * The following subsystems are not supported on the default hierarchy. 64 * The following subsystems are not supported on the default hierarchy.
61 */ 65 */
diff --git a/init/Kconfig b/init/Kconfig
index 223b734abccd..ef80d46a32b6 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1090,6 +1090,16 @@ config CGROUP_PIDS
1090 since the PIDs limit only affects a process's ability to fork, not to 1090 since the PIDs limit only affects a process's ability to fork, not to
1091 attach to a cgroup. 1091 attach to a cgroup.
1092 1092
1093config CGROUP_RDMA
1094 bool "RDMA controller"
1095 help
1096 Provides enforcement of RDMA resources defined by IB stack.
1097 It is fairly easy for consumers to exhaust RDMA resources, which
1098 can result into resource unavailability to other consumers.
1099 RDMA controller is designed to stop this from happening.
1100 Attaching processes with active RDMA resources to the cgroup
1101 hierarchy is allowed even if can cross the hierarchy's limit.
1102
1093config CGROUP_FREEZER 1103config CGROUP_FREEZER
1094 bool "Freezer controller" 1104 bool "Freezer controller"
1095 help 1105 help
diff --git a/kernel/cgroup/Makefile b/kernel/cgroup/Makefile
index 6d42a3211164..387348a40c64 100644
--- a/kernel/cgroup/Makefile
+++ b/kernel/cgroup/Makefile
@@ -2,4 +2,5 @@ obj-y := cgroup.o namespace.o cgroup-v1.o
2 2
3obj-$(CONFIG_CGROUP_FREEZER) += freezer.o 3obj-$(CONFIG_CGROUP_FREEZER) += freezer.o
4obj-$(CONFIG_CGROUP_PIDS) += pids.o 4obj-$(CONFIG_CGROUP_PIDS) += pids.o
5obj-$(CONFIG_CGROUP_RDMA) += rdma.o
5obj-$(CONFIG_CPUSETS) += cpuset.o 6obj-$(CONFIG_CPUSETS) += cpuset.o
diff --git a/kernel/cgroup/rdma.c b/kernel/cgroup/rdma.c
new file mode 100644
index 000000000000..021bee7a9692
--- /dev/null
+++ b/kernel/cgroup/rdma.c
@@ -0,0 +1,617 @@
1/*
2 * RDMA resource limiting controller for cgroups.
3 *
4 * Used to allow a cgroup hierarchy to stop processes from consuming
5 * additional RDMA resources after a certain limit is reached.
6 *
7 * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
8 *
9 * This file is subject to the terms and conditions of version 2 of the GNU
10 * General Public License. See the file COPYING in the main directory of the
11 * Linux distribution for more details.
12 */
13
14#include <linux/bitops.h>
15#include <linux/slab.h>
16#include <linux/seq_file.h>
17#include <linux/cgroup.h>
18#include <linux/parser.h>
19#include <linux/cgroup_rdma.h>
20
21#define RDMACG_MAX_STR "max"
22
23/*
24 * Protects list of resource pools maintained on per cgroup basis
25 * and rdma device list.
26 */
27static DEFINE_MUTEX(rdmacg_mutex);
28static LIST_HEAD(rdmacg_devices);
29
30enum rdmacg_file_type {
31 RDMACG_RESOURCE_TYPE_MAX,
32 RDMACG_RESOURCE_TYPE_STAT,
33};
34
35/*
36 * resource table definition as to be seen by the user.
37 * Need to add entries to it when more resources are
38 * added/defined at IB verb/core layer.
39 */
40static char const *rdmacg_resource_names[] = {
41 [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
42 [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
43};
44
45/* resource tracker for each resource of rdma cgroup */
46struct rdmacg_resource {
47 int max;
48 int usage;
49};
50
51/*
52 * resource pool object which represents per cgroup, per device
53 * resources. There are multiple instances of this object per cgroup,
54 * therefore it cannot be embedded within rdma_cgroup structure. It
55 * is maintained as list.
56 */
57struct rdmacg_resource_pool {
58 struct rdmacg_device *device;
59 struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
60
61 struct list_head cg_node;
62 struct list_head dev_node;
63
64 /* count active user tasks of this pool */
65 u64 usage_sum;
66 /* total number counts which are set to max */
67 int num_max_cnt;
68};
69
70static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
71{
72 return container_of(css, struct rdma_cgroup, css);
73}
74
75static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
76{
77 return css_rdmacg(cg->css.parent);
78}
79
80static inline struct rdma_cgroup *get_current_rdmacg(void)
81{
82 return css_rdmacg(task_get_css(current, rdma_cgrp_id));
83}
84
85static void set_resource_limit(struct rdmacg_resource_pool *rpool,
86 int index, int new_max)
87{
88 if (new_max == S32_MAX) {
89 if (rpool->resources[index].max != S32_MAX)
90 rpool->num_max_cnt++;
91 } else {
92 if (rpool->resources[index].max == S32_MAX)
93 rpool->num_max_cnt--;
94 }
95 rpool->resources[index].max = new_max;
96}
97
98static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
99{
100 int i;
101
102 for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
103 set_resource_limit(rpool, i, S32_MAX);
104}
105
106static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
107{
108 lockdep_assert_held(&rdmacg_mutex);
109
110 list_del(&rpool->cg_node);
111 list_del(&rpool->dev_node);
112 kfree(rpool);
113}
114
115static struct rdmacg_resource_pool *
116find_cg_rpool_locked(struct rdma_cgroup *cg,
117 struct rdmacg_device *device)
118
119{
120 struct rdmacg_resource_pool *pool;
121
122 lockdep_assert_held(&rdmacg_mutex);
123
124 list_for_each_entry(pool, &cg->rpools, cg_node)
125 if (pool->device == device)
126 return pool;
127
128 return NULL;
129}
130
131static struct rdmacg_resource_pool *
132get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
133{
134 struct rdmacg_resource_pool *rpool;
135
136 rpool = find_cg_rpool_locked(cg, device);
137 if (rpool)
138 return rpool;
139
140 rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
141 if (!rpool)
142 return ERR_PTR(-ENOMEM);
143
144 rpool->device = device;
145 set_all_resource_max_limit(rpool);
146
147 INIT_LIST_HEAD(&rpool->cg_node);
148 INIT_LIST_HEAD(&rpool->dev_node);
149 list_add_tail(&rpool->cg_node, &cg->rpools);
150 list_add_tail(&rpool->dev_node, &device->rpools);
151 return rpool;
152}
153
154/**
155 * uncharge_cg_locked - uncharge resource for rdma cgroup
156 * @cg: pointer to cg to uncharge and all parents in hierarchy
157 * @device: pointer to rdmacg device
158 * @index: index of the resource to uncharge in cg (resource pool)
159 *
160 * It also frees the resource pool which was created as part of
161 * charging operation when there are no resources attached to
162 * resource pool.
163 */
164static void
165uncharge_cg_locked(struct rdma_cgroup *cg,
166 struct rdmacg_device *device,
167 enum rdmacg_resource_type index)
168{
169 struct rdmacg_resource_pool *rpool;
170
171 rpool = find_cg_rpool_locked(cg, device);
172
173 /*
174 * rpool cannot be null at this stage. Let kernel operate in case
175 * if there a bug in IB stack or rdma controller, instead of crashing
176 * the system.
177 */
178 if (unlikely(!rpool)) {
179 pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
180 return;
181 }
182
183 rpool->resources[index].usage--;
184
185 /*
186 * A negative count (or overflow) is invalid,
187 * it indicates a bug in the rdma controller.
188 */
189 WARN_ON_ONCE(rpool->resources[index].usage < 0);
190 rpool->usage_sum--;
191 if (rpool->usage_sum == 0 &&
192 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
193 /*
194 * No user of the rpool and all entries are set to max, so
195 * safe to delete this rpool.
196 */
197 free_cg_rpool_locked(rpool);
198 }
199}
200
201/**
202 * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
203 * @device: pointer to rdmacg device
204 * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup
205 * stop uncharging
206 * @index: index of the resource to uncharge in cg in given resource pool
207 */
208static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
209 struct rdmacg_device *device,
210 struct rdma_cgroup *stop_cg,
211 enum rdmacg_resource_type index)
212{
213 struct rdma_cgroup *p;
214
215 mutex_lock(&rdmacg_mutex);
216
217 for (p = cg; p != stop_cg; p = parent_rdmacg(p))
218 uncharge_cg_locked(p, device, index);
219
220 mutex_unlock(&rdmacg_mutex);
221
222 css_put(&cg->css);
223}
224
225/**
226 * rdmacg_uncharge - hierarchically uncharge rdma resource count
227 * @device: pointer to rdmacg device
228 * @index: index of the resource to uncharge in cgroup in given resource pool
229 */
230void rdmacg_uncharge(struct rdma_cgroup *cg,
231 struct rdmacg_device *device,
232 enum rdmacg_resource_type index)
233{
234 if (index >= RDMACG_RESOURCE_MAX)
235 return;
236
237 rdmacg_uncharge_hierarchy(cg, device, NULL, index);
238}
239EXPORT_SYMBOL(rdmacg_uncharge);
240
241/**
242 * rdmacg_try_charge - hierarchically try to charge the rdma resource
243 * @rdmacg: pointer to rdma cgroup which will own this resource
244 * @device: pointer to rdmacg device
245 * @index: index of the resource to charge in cgroup (resource pool)
246 *
247 * This function follows charging resource in hierarchical way.
248 * It will fail if the charge would cause the new value to exceed the
249 * hierarchical limit.
250 * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
251 * Returns pointer to rdmacg for this resource when charging is successful.
252 *
253 * Charger needs to account resources on two criteria.
254 * (a) per cgroup & (b) per device resource usage.
255 * Per cgroup resource usage ensures that tasks of cgroup doesn't cross
256 * the configured limits. Per device provides granular configuration
257 * in multi device usage. It allocates resource pool in the hierarchy
258 * for each parent it come across for first resource. Later on resource
259 * pool will be available. Therefore it will be much faster thereon
260 * to charge/uncharge.
261 */
262int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
263 struct rdmacg_device *device,
264 enum rdmacg_resource_type index)
265{
266 struct rdma_cgroup *cg, *p;
267 struct rdmacg_resource_pool *rpool;
268 s64 new;
269 int ret = 0;
270
271 if (index >= RDMACG_RESOURCE_MAX)
272 return -EINVAL;
273
274 /*
275 * hold on to css, as cgroup can be removed but resource
276 * accounting happens on css.
277 */
278 cg = get_current_rdmacg();
279
280 mutex_lock(&rdmacg_mutex);
281 for (p = cg; p; p = parent_rdmacg(p)) {
282 rpool = get_cg_rpool_locked(p, device);
283 if (IS_ERR(rpool)) {
284 ret = PTR_ERR(rpool);
285 goto err;
286 } else {
287 new = rpool->resources[index].usage + 1;
288 if (new > rpool->resources[index].max) {
289 ret = -EAGAIN;
290 goto err;
291 } else {
292 rpool->resources[index].usage = new;
293 rpool->usage_sum++;
294 }
295 }
296 }
297 mutex_unlock(&rdmacg_mutex);
298
299 *rdmacg = cg;
300 return 0;
301
302err:
303 mutex_unlock(&rdmacg_mutex);
304 rdmacg_uncharge_hierarchy(cg, device, p, index);
305 return ret;
306}
307EXPORT_SYMBOL(rdmacg_try_charge);
308
309/**
310 * rdmacg_register_device - register rdmacg device to rdma controller.
311 * @device: pointer to rdmacg device whose resources need to be accounted.
312 *
313 * If IB stack wish a device to participate in rdma cgroup resource
314 * tracking, it must invoke this API to register with rdma cgroup before
315 * any user space application can start using the RDMA resources.
316 * Returns 0 on success or EINVAL when table length given is beyond
317 * supported size.
318 */
319int rdmacg_register_device(struct rdmacg_device *device)
320{
321 INIT_LIST_HEAD(&device->dev_node);
322 INIT_LIST_HEAD(&device->rpools);
323
324 mutex_lock(&rdmacg_mutex);
325 list_add_tail(&device->dev_node, &rdmacg_devices);
326 mutex_unlock(&rdmacg_mutex);
327 return 0;
328}
329EXPORT_SYMBOL(rdmacg_register_device);
330
331/**
332 * rdmacg_unregister_device - unregister rdmacg device from rdma controller.
333 * @device: pointer to rdmacg device which was previously registered with rdma
334 * controller using rdmacg_register_device().
335 *
336 * IB stack must invoke this after all the resources of the IB device
337 * are destroyed and after ensuring that no more resources will be created
338 * when this API is invoked.
339 */
340void rdmacg_unregister_device(struct rdmacg_device *device)
341{
342 struct rdmacg_resource_pool *rpool, *tmp;
343
344 /*
345 * Synchronize with any active resource settings,
346 * usage query happening via configfs.
347 */
348 mutex_lock(&rdmacg_mutex);
349 list_del_init(&device->dev_node);
350
351 /*
352 * Now that this device is off the cgroup list, its safe to free
353 * all the rpool resources.
354 */
355 list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
356 free_cg_rpool_locked(rpool);
357
358 mutex_unlock(&rdmacg_mutex);
359}
360EXPORT_SYMBOL(rdmacg_unregister_device);
361
362static int parse_resource(char *c, int *intval)
363{
364 substring_t argstr;
365 const char **table = &rdmacg_resource_names[0];
366 char *name, *value = c;
367 size_t len;
368 int ret, i = 0;
369
370 name = strsep(&value, "=");
371 if (!name || !value)
372 return -EINVAL;
373
374 len = strlen(value);
375
376 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
377 if (strcmp(table[i], name))
378 continue;
379
380 argstr.from = value;
381 argstr.to = value + len;
382
383 ret = match_int(&argstr, intval);
384 if (ret >= 0) {
385 if (*intval < 0)
386 break;
387 return i;
388 }
389 if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
390 *intval = S32_MAX;
391 return i;
392 }
393 break;
394 }
395 return -EINVAL;
396}
397
398static int rdmacg_parse_limits(char *options,
399 int *new_limits, unsigned long *enables)
400{
401 char *c;
402 int err = -EINVAL;
403
404 /* parse resource options */
405 while ((c = strsep(&options, " ")) != NULL) {
406 int index, intval;
407
408 index = parse_resource(c, &intval);
409 if (index < 0)
410 goto err;
411
412 new_limits[index] = intval;
413 *enables |= BIT(index);
414 }
415 return 0;
416
417err:
418 return err;
419}
420
421static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
422{
423 struct rdmacg_device *device;
424
425 lockdep_assert_held(&rdmacg_mutex);
426
427 list_for_each_entry(device, &rdmacg_devices, dev_node)
428 if (!strcmp(name, device->name))
429 return device;
430
431 return NULL;
432}
433
434static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
435 char *buf, size_t nbytes, loff_t off)
436{
437 struct rdma_cgroup *cg = css_rdmacg(of_css(of));
438 const char *dev_name;
439 struct rdmacg_resource_pool *rpool;
440 struct rdmacg_device *device;
441 char *options = strstrip(buf);
442 int *new_limits;
443 unsigned long enables = 0;
444 int i = 0, ret = 0;
445
446 /* extract the device name first */
447 dev_name = strsep(&options, " ");
448 if (!dev_name) {
449 ret = -EINVAL;
450 goto err;
451 }
452
453 new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
454 if (!new_limits) {
455 ret = -ENOMEM;
456 goto err;
457 }
458
459 ret = rdmacg_parse_limits(options, new_limits, &enables);
460 if (ret)
461 goto parse_err;
462
463 /* acquire lock to synchronize with hot plug devices */
464 mutex_lock(&rdmacg_mutex);
465
466 device = rdmacg_get_device_locked(dev_name);
467 if (!device) {
468 ret = -ENODEV;
469 goto dev_err;
470 }
471
472 rpool = get_cg_rpool_locked(cg, device);
473 if (IS_ERR(rpool)) {
474 ret = PTR_ERR(rpool);
475 goto dev_err;
476 }
477
478 /* now set the new limits of the rpool */
479 for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
480 set_resource_limit(rpool, i, new_limits[i]);
481
482 if (rpool->usage_sum == 0 &&
483 rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
484 /*
485 * No user of the rpool and all entries are set to max, so
486 * safe to delete this rpool.
487 */
488 free_cg_rpool_locked(rpool);
489 }
490
491dev_err:
492 mutex_unlock(&rdmacg_mutex);
493
494parse_err:
495 kfree(new_limits);
496
497err:
498 return ret ?: nbytes;
499}
500
501static void print_rpool_values(struct seq_file *sf,
502 struct rdmacg_resource_pool *rpool)
503{
504 enum rdmacg_file_type sf_type;
505 int i;
506 u32 value;
507
508 sf_type = seq_cft(sf)->private;
509
510 for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
511 seq_puts(sf, rdmacg_resource_names[i]);
512 seq_putc(sf, '=');
513 if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
514 if (rpool)
515 value = rpool->resources[i].max;
516 else
517 value = S32_MAX;
518 } else {
519 if (rpool)
520 value = rpool->resources[i].usage;
521 }
522
523 if (value == S32_MAX)
524 seq_puts(sf, RDMACG_MAX_STR);
525 else
526 seq_printf(sf, "%d", value);
527 seq_putc(sf, ' ');
528 }
529}
530
531static int rdmacg_resource_read(struct seq_file *sf, void *v)
532{
533 struct rdmacg_device *device;
534 struct rdmacg_resource_pool *rpool;
535 struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
536
537 mutex_lock(&rdmacg_mutex);
538
539 list_for_each_entry(device, &rdmacg_devices, dev_node) {
540 seq_printf(sf, "%s ", device->name);
541
542 rpool = find_cg_rpool_locked(cg, device);
543 print_rpool_values(sf, rpool);
544
545 seq_putc(sf, '\n');
546 }
547
548 mutex_unlock(&rdmacg_mutex);
549 return 0;
550}
551
552static struct cftype rdmacg_files[] = {
553 {
554 .name = "max",
555 .write = rdmacg_resource_set_max,
556 .seq_show = rdmacg_resource_read,
557 .private = RDMACG_RESOURCE_TYPE_MAX,
558 .flags = CFTYPE_NOT_ON_ROOT,
559 },
560 {
561 .name = "current",
562 .seq_show = rdmacg_resource_read,
563 .private = RDMACG_RESOURCE_TYPE_STAT,
564 .flags = CFTYPE_NOT_ON_ROOT,
565 },
566 { } /* terminate */
567};
568
569static struct cgroup_subsys_state *
570rdmacg_css_alloc(struct cgroup_subsys_state *parent)
571{
572 struct rdma_cgroup *cg;
573
574 cg = kzalloc(sizeof(*cg), GFP_KERNEL);
575 if (!cg)
576 return ERR_PTR(-ENOMEM);
577
578 INIT_LIST_HEAD(&cg->rpools);
579 return &cg->css;
580}
581
582static void rdmacg_css_free(struct cgroup_subsys_state *css)
583{
584 struct rdma_cgroup *cg = css_rdmacg(css);
585
586 kfree(cg);
587}
588
589/**
590 * rdmacg_css_offline - cgroup css_offline callback
591 * @css: css of interest
592 *
593 * This function is called when @css is about to go away and responsible
594 * for shooting down all rdmacg associated with @css. As part of that it
595 * marks all the resource pool entries to max value, so that when resources are
596 * uncharged, associated resource pool can be freed as well.
597 */
598static void rdmacg_css_offline(struct cgroup_subsys_state *css)
599{
600 struct rdma_cgroup *cg = css_rdmacg(css);
601 struct rdmacg_resource_pool *rpool;
602
603 mutex_lock(&rdmacg_mutex);
604
605 list_for_each_entry(rpool, &cg->rpools, cg_node)
606 set_all_resource_max_limit(rpool);
607
608 mutex_unlock(&rdmacg_mutex);
609}
610
611struct cgroup_subsys rdma_cgrp_subsys = {
612 .css_alloc = rdmacg_css_alloc,
613 .css_free = rdmacg_css_free,
614 .css_offline = rdmacg_css_offline,
615 .legacy_cftypes = rdmacg_files,
616 .dfl_cftypes = rdmacg_files,
617};