diff options
author | Glauber Costa <glommer@parallels.com> | 2011-12-11 16:47:06 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-12 19:04:11 -0500 |
commit | 3aaabe2342c36bf48567b88fa78b819eee14bb5e (patch) | |
tree | 1364c84c53ba6382f9a9df9d6f42a3bc37427d05 | |
parent | 3dc43e3e4d0b52197d3205214fe8f162f9e0c334 (diff) |
tcp buffer limitation: per-cgroup limit
This patch uses the "tcp.limit_in_bytes" field of the kmem_cgroup to
effectively control the amount of kernel memory pinned by a cgroup.
This value is ignored in the root cgroup, and in all others,
caps the value specified by the admin in the net namespaces'
view of tcp_sysctl_mem.
If namespaces are being used, the admin is allowed to set a
value bigger than cgroup's maximum, the same way it is allowed
to set pretty much unlimited values in a real box.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/cgroups/memory.txt | 1 | ||||
-rw-r--r-- | include/net/tcp_memcontrol.h | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_memcontrol.c | 137 |
4 files changed, 152 insertions, 2 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index 687dea5bf1fd..1c9779a74a25 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt | |||
@@ -78,6 +78,7 @@ Brief summary of control files. | |||
78 | 78 | ||
79 | memory.independent_kmem_limit # select whether or not kernel memory limits are | 79 | memory.independent_kmem_limit # select whether or not kernel memory limits are |
80 | independent of user limits | 80 | independent of user limits |
81 | memory.kmem.tcp.limit_in_bytes # set/show hard limit for tcp buf memory | ||
81 | 82 | ||
82 | 1. History | 83 | 1. History |
83 | 84 | ||
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h index 5f5e1582d764..3512082fa909 100644 --- a/include/net/tcp_memcontrol.h +++ b/include/net/tcp_memcontrol.h | |||
@@ -14,4 +14,6 @@ struct tcp_memcontrol { | |||
14 | struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); | 14 | struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg); |
15 | int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); | 15 | int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); |
16 | void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); | 16 | void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss); |
17 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg); | ||
18 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx); | ||
17 | #endif /* _TCP_MEMCG_H */ | 19 | #endif /* _TCP_MEMCG_H */ |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index bbd67abcb51d..fe9bf915676c 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <net/cipso_ipv4.h> | 24 | #include <net/cipso_ipv4.h> |
25 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
26 | #include <net/ping.h> | 26 | #include <net/ping.h> |
27 | #include <net/tcp_memcontrol.h> | ||
27 | 28 | ||
28 | static int zero; | 29 | static int zero; |
29 | static int tcp_retr1_max = 255; | 30 | static int tcp_retr1_max = 255; |
@@ -182,6 +183,9 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
182 | int ret; | 183 | int ret; |
183 | unsigned long vec[3]; | 184 | unsigned long vec[3]; |
184 | struct net *net = current->nsproxy->net_ns; | 185 | struct net *net = current->nsproxy->net_ns; |
186 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
187 | struct mem_cgroup *memcg; | ||
188 | #endif | ||
185 | 189 | ||
186 | ctl_table tmp = { | 190 | ctl_table tmp = { |
187 | .data = &vec, | 191 | .data = &vec, |
@@ -198,6 +202,16 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, | |||
198 | if (ret) | 202 | if (ret) |
199 | return ret; | 203 | return ret; |
200 | 204 | ||
205 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM | ||
206 | rcu_read_lock(); | ||
207 | memcg = mem_cgroup_from_task(current); | ||
208 | |||
209 | tcp_prot_mem(memcg, vec[0], 0); | ||
210 | tcp_prot_mem(memcg, vec[1], 1); | ||
211 | tcp_prot_mem(memcg, vec[2], 2); | ||
212 | rcu_read_unlock(); | ||
213 | #endif | ||
214 | |||
201 | net->ipv4.sysctl_tcp_mem[0] = vec[0]; | 215 | net->ipv4.sysctl_tcp_mem[0] = vec[0]; |
202 | net->ipv4.sysctl_tcp_mem[1] = vec[1]; | 216 | net->ipv4.sysctl_tcp_mem[1] = vec[1]; |
203 | net->ipv4.sysctl_tcp_mem[2] = vec[2]; | 217 | net->ipv4.sysctl_tcp_mem[2] = vec[2]; |
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index bfb0c2b8df46..e3533903409d 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c | |||
@@ -6,6 +6,19 @@ | |||
6 | #include <linux/memcontrol.h> | 6 | #include <linux/memcontrol.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | 8 | ||
9 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft); | ||
10 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
11 | const char *buffer); | ||
12 | |||
13 | static struct cftype tcp_files[] = { | ||
14 | { | ||
15 | .name = "kmem.tcp.limit_in_bytes", | ||
16 | .write_string = tcp_cgroup_write, | ||
17 | .read_u64 = tcp_cgroup_read, | ||
18 | .private = RES_LIMIT, | ||
19 | }, | ||
20 | }; | ||
21 | |||
9 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) | 22 | static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto) |
10 | { | 23 | { |
11 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); | 24 | return container_of(cg_proto, struct tcp_memcontrol, cg_proto); |
@@ -34,7 +47,7 @@ int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
34 | 47 | ||
35 | cg_proto = tcp_prot.proto_cgroup(memcg); | 48 | cg_proto = tcp_prot.proto_cgroup(memcg); |
36 | if (!cg_proto) | 49 | if (!cg_proto) |
37 | return 0; | 50 | goto create_files; |
38 | 51 | ||
39 | tcp = tcp_from_cgproto(cg_proto); | 52 | tcp = tcp_from_cgproto(cg_proto); |
40 | 53 | ||
@@ -57,7 +70,9 @@ int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
57 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; | 70 | cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated; |
58 | cg_proto->memcg = memcg; | 71 | cg_proto->memcg = memcg; |
59 | 72 | ||
60 | return 0; | 73 | create_files: |
74 | return cgroup_add_files(cgrp, ss, tcp_files, | ||
75 | ARRAY_SIZE(tcp_files)); | ||
61 | } | 76 | } |
62 | EXPORT_SYMBOL(tcp_init_cgroup); | 77 | EXPORT_SYMBOL(tcp_init_cgroup); |
63 | 78 | ||
@@ -66,6 +81,7 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
66 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); | 81 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); |
67 | struct cg_proto *cg_proto; | 82 | struct cg_proto *cg_proto; |
68 | struct tcp_memcontrol *tcp; | 83 | struct tcp_memcontrol *tcp; |
84 | u64 val; | ||
69 | 85 | ||
70 | cg_proto = tcp_prot.proto_cgroup(memcg); | 86 | cg_proto = tcp_prot.proto_cgroup(memcg); |
71 | if (!cg_proto) | 87 | if (!cg_proto) |
@@ -73,5 +89,122 @@ void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss) | |||
73 | 89 | ||
74 | tcp = tcp_from_cgproto(cg_proto); | 90 | tcp = tcp_from_cgproto(cg_proto); |
75 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); | 91 | percpu_counter_destroy(&tcp->tcp_sockets_allocated); |
92 | |||
93 | val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE); | ||
94 | |||
95 | if (val != RESOURCE_MAX) | ||
96 | jump_label_dec(&memcg_socket_limit_enabled); | ||
76 | } | 97 | } |
77 | EXPORT_SYMBOL(tcp_destroy_cgroup); | 98 | EXPORT_SYMBOL(tcp_destroy_cgroup); |
99 | |||
100 | static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) | ||
101 | { | ||
102 | struct net *net = current->nsproxy->net_ns; | ||
103 | struct tcp_memcontrol *tcp; | ||
104 | struct cg_proto *cg_proto; | ||
105 | u64 old_lim; | ||
106 | int i; | ||
107 | int ret; | ||
108 | |||
109 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
110 | if (!cg_proto) | ||
111 | return -EINVAL; | ||
112 | |||
113 | if (val > RESOURCE_MAX) | ||
114 | val = RESOURCE_MAX; | ||
115 | |||
116 | tcp = tcp_from_cgproto(cg_proto); | ||
117 | |||
118 | old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | ||
119 | ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val); | ||
120 | if (ret) | ||
121 | return ret; | ||
122 | |||
123 | for (i = 0; i < 3; i++) | ||
124 | tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, | ||
125 | net->ipv4.sysctl_tcp_mem[i]); | ||
126 | |||
127 | if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) | ||
128 | jump_label_dec(&memcg_socket_limit_enabled); | ||
129 | else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) | ||
130 | jump_label_inc(&memcg_socket_limit_enabled); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static int tcp_cgroup_write(struct cgroup *cont, struct cftype *cft, | ||
136 | const char *buffer) | ||
137 | { | ||
138 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | ||
139 | unsigned long long val; | ||
140 | int ret = 0; | ||
141 | |||
142 | switch (cft->private) { | ||
143 | case RES_LIMIT: | ||
144 | /* see memcontrol.c */ | ||
145 | ret = res_counter_memparse_write_strategy(buffer, &val); | ||
146 | if (ret) | ||
147 | break; | ||
148 | ret = tcp_update_limit(memcg, val); | ||
149 | break; | ||
150 | default: | ||
151 | ret = -EINVAL; | ||
152 | break; | ||
153 | } | ||
154 | return ret; | ||
155 | } | ||
156 | |||
157 | static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val) | ||
158 | { | ||
159 | struct tcp_memcontrol *tcp; | ||
160 | struct cg_proto *cg_proto; | ||
161 | |||
162 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
163 | if (!cg_proto) | ||
164 | return default_val; | ||
165 | |||
166 | tcp = tcp_from_cgproto(cg_proto); | ||
167 | return res_counter_read_u64(&tcp->tcp_memory_allocated, type); | ||
168 | } | ||
169 | |||
170 | static u64 tcp_cgroup_read(struct cgroup *cont, struct cftype *cft) | ||
171 | { | ||
172 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | ||
173 | u64 val; | ||
174 | |||
175 | switch (cft->private) { | ||
176 | case RES_LIMIT: | ||
177 | val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX); | ||
178 | break; | ||
179 | default: | ||
180 | BUG(); | ||
181 | } | ||
182 | return val; | ||
183 | } | ||
184 | |||
185 | unsigned long long tcp_max_memory(const struct mem_cgroup *memcg) | ||
186 | { | ||
187 | struct tcp_memcontrol *tcp; | ||
188 | struct cg_proto *cg_proto; | ||
189 | |||
190 | cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg); | ||
191 | if (!cg_proto) | ||
192 | return 0; | ||
193 | |||
194 | tcp = tcp_from_cgproto(cg_proto); | ||
195 | return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); | ||
196 | } | ||
197 | |||
198 | void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx) | ||
199 | { | ||
200 | struct tcp_memcontrol *tcp; | ||
201 | struct cg_proto *cg_proto; | ||
202 | |||
203 | cg_proto = tcp_prot.proto_cgroup(memcg); | ||
204 | if (!cg_proto) | ||
205 | return; | ||
206 | |||
207 | tcp = tcp_from_cgproto(cg_proto); | ||
208 | |||
209 | tcp->tcp_prot_mem[idx] = val; | ||
210 | } | ||