diff options
author | Pavel Emelianov <xemul@openvz.org> | 2008-02-07 03:13:49 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-07 11:42:18 -0500 |
commit | e552b6617067ab785256dcec5ca29eeea981aacb (patch) | |
tree | 672cccc2e21abfa4dcdc1bdb198e748894bbbbc6 | |
parent | 59bd26582de660d4c9c26125747f1b4a5eb40d1e (diff) |
Memory controller: resource counters
With fixes from David Rientjes <rientjes@google.com>
Introduce generic structures and routines for resource accounting.
Each resource accounting cgroup is supposed to aggregate it,
cgroup_subsystem_state and its resource-specific members within.
Signed-off-by: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Kirill Korotaev <dev@sw.ru>
Cc: Herbert Poetzl <herbert@13thfloor.at>
Cc: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com>
Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Pavel Emelianov <xemul@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/res_counter.h | 102 | ||||
-rw-r--r-- | init/Kconfig | 7 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/res_counter.c | 120 |
4 files changed, 230 insertions, 0 deletions
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h new file mode 100644 index 000000000000..eeb3f7749772 --- /dev/null +++ b/include/linux/res_counter.h | |||
@@ -0,0 +1,102 @@ | |||
1 | #ifndef __RES_COUNTER_H__ | ||
2 | #define __RES_COUNTER_H__ | ||
3 | |||
4 | /* | ||
5 | * Resource Counters | ||
6 | * Contain common data types and routines for resource accounting | ||
7 | * | ||
8 | * Copyright 2007 OpenVZ SWsoft Inc | ||
9 | * | ||
10 | * Author: Pavel Emelianov <xemul@openvz.org> | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <linux/cgroup.h> | ||
15 | |||
16 | /* | ||
17 | * The core object. the cgroup that wishes to account for some | ||
18 | * resource may include this counter into its structures and use | ||
19 | * the helpers described beyond | ||
20 | */ | ||
21 | |||
22 | struct res_counter { | ||
23 | /* | ||
24 | * the current resource consumption level | ||
25 | */ | ||
26 | unsigned long usage; | ||
27 | /* | ||
28 | * the limit that usage cannot exceed | ||
29 | */ | ||
30 | unsigned long limit; | ||
31 | /* | ||
32 | * the number of unsuccessful attempts to consume the resource | ||
33 | */ | ||
34 | unsigned long failcnt; | ||
35 | /* | ||
36 | * the lock to protect all of the above. | ||
37 | * the routines below consider this to be IRQ-safe | ||
38 | */ | ||
39 | spinlock_t lock; | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * Helpers to interact with userspace | ||
44 | * res_counter_read/_write - put/get the specified fields from the | ||
45 | * res_counter struct to/from the user | ||
46 | * | ||
47 | * @counter: the counter in question | ||
48 | * @member: the field to work with (see RES_xxx below) | ||
49 | * @buf: the buffer to opeate on,... | ||
50 | * @nbytes: its size... | ||
51 | * @pos: and the offset. | ||
52 | */ | ||
53 | |||
54 | ssize_t res_counter_read(struct res_counter *counter, int member, | ||
55 | const char __user *buf, size_t nbytes, loff_t *pos); | ||
56 | ssize_t res_counter_write(struct res_counter *counter, int member, | ||
57 | const char __user *buf, size_t nbytes, loff_t *pos); | ||
58 | |||
59 | /* | ||
60 | * the field descriptors. one for each member of res_counter | ||
61 | */ | ||
62 | |||
63 | enum { | ||
64 | RES_USAGE, | ||
65 | RES_LIMIT, | ||
66 | RES_FAILCNT, | ||
67 | }; | ||
68 | |||
69 | /* | ||
70 | * helpers for accounting | ||
71 | */ | ||
72 | |||
73 | void res_counter_init(struct res_counter *counter); | ||
74 | |||
75 | /* | ||
76 | * charge - try to consume more resource. | ||
77 | * | ||
78 | * @counter: the counter | ||
79 | * @val: the amount of the resource. each controller defines its own | ||
80 | * units, e.g. numbers, bytes, Kbytes, etc | ||
81 | * | ||
82 | * returns 0 on success and <0 if the counter->usage will exceed the | ||
83 | * counter->limit _locked call expects the counter->lock to be taken | ||
84 | */ | ||
85 | |||
86 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val); | ||
87 | int res_counter_charge(struct res_counter *counter, unsigned long val); | ||
88 | |||
89 | /* | ||
90 | * uncharge - tell that some portion of the resource is released | ||
91 | * | ||
92 | * @counter: the counter | ||
93 | * @val: the amount of the resource | ||
94 | * | ||
95 | * these calls check for usage underflow and show a warning on the console | ||
96 | * _locked call expects the counter->lock to be taken | ||
97 | */ | ||
98 | |||
99 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); | ||
100 | void res_counter_uncharge(struct res_counter *counter, unsigned long val); | ||
101 | |||
102 | #endif | ||
diff --git a/init/Kconfig b/init/Kconfig index 92b23e256614..d372bd616b0c 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -369,6 +369,13 @@ config CGROUP_CPUACCT | |||
369 | Provides a simple Resource Controller for monitoring the | 369 | Provides a simple Resource Controller for monitoring the |
370 | total CPU consumed by the tasks in a cgroup | 370 | total CPU consumed by the tasks in a cgroup |
371 | 371 | ||
372 | config RESOURCE_COUNTERS | ||
373 | bool "Resource counters" | ||
374 | help | ||
375 | This option enables controller independent resource accounting | ||
376 | infrastructure that works with cgroups | ||
377 | depends on CGROUPS | ||
378 | |||
372 | config SYSFS_DEPRECATED | 379 | config SYSFS_DEPRECATED |
373 | bool "Create deprecated sysfs files" | 380 | bool "Create deprecated sysfs files" |
374 | depends on SYSFS | 381 | depends on SYSFS |
diff --git a/kernel/Makefile b/kernel/Makefile index 135a1b943446..685697c0a181 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -43,6 +43,7 @@ obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o | |||
43 | obj-$(CONFIG_CPUSETS) += cpuset.o | 43 | obj-$(CONFIG_CPUSETS) += cpuset.o |
44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o | 44 | obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o |
45 | obj-$(CONFIG_IKCONFIG) += configs.o | 45 | obj-$(CONFIG_IKCONFIG) += configs.o |
46 | obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o | ||
46 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o | 47 | obj-$(CONFIG_STOP_MACHINE) += stop_machine.o |
47 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o | 48 | obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o |
48 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o | 49 | obj-$(CONFIG_AUDIT) += audit.o auditfilter.o |
diff --git a/kernel/res_counter.c b/kernel/res_counter.c new file mode 100644 index 000000000000..722c484b068b --- /dev/null +++ b/kernel/res_counter.c | |||
@@ -0,0 +1,120 @@ | |||
1 | /* | ||
2 | * resource cgroups | ||
3 | * | ||
4 | * Copyright 2007 OpenVZ SWsoft Inc | ||
5 | * | ||
6 | * Author: Pavel Emelianov <xemul@openvz.org> | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/types.h> | ||
11 | #include <linux/parser.h> | ||
12 | #include <linux/fs.h> | ||
13 | #include <linux/res_counter.h> | ||
14 | #include <linux/uaccess.h> | ||
15 | |||
16 | void res_counter_init(struct res_counter *counter) | ||
17 | { | ||
18 | spin_lock_init(&counter->lock); | ||
19 | counter->limit = (unsigned long)LONG_MAX; | ||
20 | } | ||
21 | |||
22 | int res_counter_charge_locked(struct res_counter *counter, unsigned long val) | ||
23 | { | ||
24 | if (counter->usage + val > counter->limit) { | ||
25 | counter->failcnt++; | ||
26 | return -ENOMEM; | ||
27 | } | ||
28 | |||
29 | counter->usage += val; | ||
30 | return 0; | ||
31 | } | ||
32 | |||
33 | int res_counter_charge(struct res_counter *counter, unsigned long val) | ||
34 | { | ||
35 | int ret; | ||
36 | unsigned long flags; | ||
37 | |||
38 | spin_lock_irqsave(&counter->lock, flags); | ||
39 | ret = res_counter_charge_locked(counter, val); | ||
40 | spin_unlock_irqrestore(&counter->lock, flags); | ||
41 | return ret; | ||
42 | } | ||
43 | |||
44 | void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) | ||
45 | { | ||
46 | if (WARN_ON(counter->usage < val)) | ||
47 | val = counter->usage; | ||
48 | |||
49 | counter->usage -= val; | ||
50 | } | ||
51 | |||
52 | void res_counter_uncharge(struct res_counter *counter, unsigned long val) | ||
53 | { | ||
54 | unsigned long flags; | ||
55 | |||
56 | spin_lock_irqsave(&counter->lock, flags); | ||
57 | res_counter_uncharge_locked(counter, val); | ||
58 | spin_unlock_irqrestore(&counter->lock, flags); | ||
59 | } | ||
60 | |||
61 | |||
62 | static inline unsigned long *res_counter_member(struct res_counter *counter, | ||
63 | int member) | ||
64 | { | ||
65 | switch (member) { | ||
66 | case RES_USAGE: | ||
67 | return &counter->usage; | ||
68 | case RES_LIMIT: | ||
69 | return &counter->limit; | ||
70 | case RES_FAILCNT: | ||
71 | return &counter->failcnt; | ||
72 | }; | ||
73 | |||
74 | BUG(); | ||
75 | return NULL; | ||
76 | } | ||
77 | |||
78 | ssize_t res_counter_read(struct res_counter *counter, int member, | ||
79 | const char __user *userbuf, size_t nbytes, loff_t *pos) | ||
80 | { | ||
81 | unsigned long *val; | ||
82 | char buf[64], *s; | ||
83 | |||
84 | s = buf; | ||
85 | val = res_counter_member(counter, member); | ||
86 | s += sprintf(s, "%lu\n", *val); | ||
87 | return simple_read_from_buffer((void __user *)userbuf, nbytes, | ||
88 | pos, buf, s - buf); | ||
89 | } | ||
90 | |||
91 | ssize_t res_counter_write(struct res_counter *counter, int member, | ||
92 | const char __user *userbuf, size_t nbytes, loff_t *pos) | ||
93 | { | ||
94 | int ret; | ||
95 | char *buf, *end; | ||
96 | unsigned long tmp, *val; | ||
97 | |||
98 | buf = kmalloc(nbytes + 1, GFP_KERNEL); | ||
99 | ret = -ENOMEM; | ||
100 | if (buf == NULL) | ||
101 | goto out; | ||
102 | |||
103 | buf[nbytes] = '\0'; | ||
104 | ret = -EFAULT; | ||
105 | if (copy_from_user(buf, userbuf, nbytes)) | ||
106 | goto out_free; | ||
107 | |||
108 | ret = -EINVAL; | ||
109 | tmp = simple_strtoul(buf, &end, 10); | ||
110 | if (*end != '\0') | ||
111 | goto out_free; | ||
112 | |||
113 | val = res_counter_member(counter, member); | ||
114 | *val = tmp; | ||
115 | ret = nbytes; | ||
116 | out_free: | ||
117 | kfree(buf); | ||
118 | out: | ||
119 | return ret; | ||
120 | } | ||