aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2011-12-11 16:47:04 -0500
committerDavid S. Miller <davem@davemloft.net>2011-12-12 19:04:10 -0500
commitd1a4c0b37c296e600ffe08edb0db2dc1b8f550d7 (patch)
tree5c3675582cbbdc99f720aa1dcc1821e26c2be1ab
parente1aab161e0135aafcd439be20b4f35e4b0922d95 (diff)
tcp memory pressure controls
This patch introduces memory pressure controls for the tcp protocol. It uses the generic socket memory pressure code introduced in earlier patches, and fills in the necessary data in cg_proto struct. Signed-off-by: Glauber Costa <glommer@parallels.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtisu.com> CC: Eric W. Biederman <ebiederm@xmission.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/cgroups/memory.txt2
-rw-r--r--include/linux/memcontrol.h1
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp_memcontrol.h17
-rw-r--r--mm/memcontrol.c40
-rw-r--r--net/core/sock.c43
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv4/tcp_memcontrol.c74
-rw-r--r--net/ipv6/tcp_ipv6.c5
10 files changed, 189 insertions, 5 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23a8dc5319a3..687dea5bf1fd 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -293,6 +293,8 @@ to trigger slab reclaim when those limits are reached.
293thresholds. The Memory Controller allows them to be controlled individually 293thresholds. The Memory Controller allows them to be controlled individually
294per cgroup, instead of globally. 294per cgroup, instead of globally.
295 295
296* tcp memory pressure: sockets memory pressure for the tcp protocol.
297
2963. User Interface 2983. User Interface
297 299
2980. Configuration 3000. Configuration
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f15021b9f734..1513994ce207 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
86extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); 86extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
87 87
88extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); 88extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
89extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
89 90
90static inline 91static inline
91int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup) 92int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
diff --git a/include/net/sock.h b/include/net/sock.h
index d5eab256167c..18ecc9919d29 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -64,6 +64,8 @@
64#include <net/dst.h> 64#include <net/dst.h>
65#include <net/checksum.h> 65#include <net/checksum.h>
66 66
67int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
68void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
67/* 69/*
68 * This structure really needs to be cleaned up. 70 * This structure really needs to be cleaned up.
69 * Most of it is for TCP, and not used by any of 71 * Most of it is for TCP, and not used by any of
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
new file mode 100644
index 000000000000..5f5e1582d764
--- /dev/null
+++ b/include/net/tcp_memcontrol.h
@@ -0,0 +1,17 @@
1#ifndef _TCP_MEMCG_H
2#define _TCP_MEMCG_H
3
4struct tcp_memcontrol {
5 struct cg_proto cg_proto;
6 /* per-cgroup tcp memory pressure knobs */
7 struct res_counter tcp_memory_allocated;
8 struct percpu_counter tcp_sockets_allocated;
9 /* those two are read-mostly, leave them at the end */
10 long tcp_prot_mem[3];
11 int tcp_memory_pressure;
12};
13
14struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
15int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
16void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
17#endif /* _TCP_MEMCG_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3de3901ae0a7..7266202fa7cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -50,6 +50,8 @@
50#include <linux/cpu.h> 50#include <linux/cpu.h>
51#include <linux/oom.h> 51#include <linux/oom.h>
52#include "internal.h" 52#include "internal.h"
53#include <net/sock.h>
54#include <net/tcp_memcontrol.h>
53 55
54#include <asm/uaccess.h> 56#include <asm/uaccess.h>
55 57
@@ -295,6 +297,10 @@ struct mem_cgroup {
295 */ 297 */
296 struct mem_cgroup_stat_cpu nocpu_base; 298 struct mem_cgroup_stat_cpu nocpu_base;
297 spinlock_t pcp_counter_lock; 299 spinlock_t pcp_counter_lock;
300
301#ifdef CONFIG_INET
302 struct tcp_memcontrol tcp_mem;
303#endif
298}; 304};
299 305
300/* Stuffs for move charges at task migration. */ 306/* Stuffs for move charges at task migration. */
@@ -384,6 +390,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg);
384#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM 390#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
385#ifdef CONFIG_INET 391#ifdef CONFIG_INET
386#include <net/sock.h> 392#include <net/sock.h>
393#include <net/ip.h>
387 394
388static bool mem_cgroup_is_root(struct mem_cgroup *memcg); 395static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
389void sock_update_memcg(struct sock *sk) 396void sock_update_memcg(struct sock *sk)
@@ -418,6 +425,15 @@ void sock_release_memcg(struct sock *sk)
418 mem_cgroup_put(memcg); 425 mem_cgroup_put(memcg);
419 } 426 }
420} 427}
428
429struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
430{
431 if (!memcg || mem_cgroup_is_root(memcg))
432 return NULL;
433
434 return &memcg->tcp_mem.cg_proto;
435}
436EXPORT_SYMBOL(tcp_proto_cgroup);
421#endif /* CONFIG_INET */ 437#endif /* CONFIG_INET */
422#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ 438#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
423 439
@@ -800,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
800 preempt_enable(); 816 preempt_enable();
801} 817}
802 818
803static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont) 819struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
804{ 820{
805 return container_of(cgroup_subsys_state(cont, 821 return container_of(cgroup_subsys_state(cont,
806 mem_cgroup_subsys_id), struct mem_cgroup, 822 mem_cgroup_subsys_id), struct mem_cgroup,
@@ -4732,14 +4748,34 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
4732 4748
4733 ret = cgroup_add_files(cont, ss, kmem_cgroup_files, 4749 ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
4734 ARRAY_SIZE(kmem_cgroup_files)); 4750 ARRAY_SIZE(kmem_cgroup_files));
4751
4752 /*
4753 * Part of this would be better living in a separate allocation
4754 * function, leaving us with just the cgroup tree population work.
4755 * We, however, depend on state such as network's proto_list that
4756 * is only initialized after cgroup creation. I found the less
4757 * cumbersome way to deal with it to defer it all to populate time
4758 */
4759 if (!ret)
4760 ret = mem_cgroup_sockets_init(cont, ss);
4735 return ret; 4761 return ret;
4736}; 4762};
4737 4763
4764static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
4765 struct cgroup *cont)
4766{
4767 mem_cgroup_sockets_destroy(cont, ss);
4768}
4738#else 4769#else
4739static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss) 4770static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
4740{ 4771{
4741 return 0; 4772 return 0;
4742} 4773}
4774
4775static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
4776 struct cgroup *cont)
4777{
4778}
4743#endif 4779#endif
4744 4780
4745static struct cftype mem_cgroup_files[] = { 4781static struct cftype mem_cgroup_files[] = {
@@ -5098,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
5098{ 5134{
5099 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); 5135 struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
5100 5136
5137 kmem_cgroup_destroy(ss, cont);
5138
5101 mem_cgroup_put(memcg); 5139 mem_cgroup_put(memcg);
5102} 5140}
5103 5141
diff --git a/net/core/sock.c b/net/core/sock.c
index 6a871b8fdd20..5a6a90620656 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -136,6 +136,46 @@
136#include <net/tcp.h> 136#include <net/tcp.h>
137#endif 137#endif
138 138
139static DEFINE_RWLOCK(proto_list_lock);
140static LIST_HEAD(proto_list);
141
142#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
143int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
144{
145 struct proto *proto;
146 int ret = 0;
147
148 read_lock(&proto_list_lock);
149 list_for_each_entry(proto, &proto_list, node) {
150 if (proto->init_cgroup) {
151 ret = proto->init_cgroup(cgrp, ss);
152 if (ret)
153 goto out;
154 }
155 }
156
157 read_unlock(&proto_list_lock);
158 return ret;
159out:
160 list_for_each_entry_continue_reverse(proto, &proto_list, node)
161 if (proto->destroy_cgroup)
162 proto->destroy_cgroup(cgrp, ss);
163 read_unlock(&proto_list_lock);
164 return ret;
165}
166
167void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
168{
169 struct proto *proto;
170
171 read_lock(&proto_list_lock);
172 list_for_each_entry_reverse(proto, &proto_list, node)
173 if (proto->destroy_cgroup)
174 proto->destroy_cgroup(cgrp, ss);
175 read_unlock(&proto_list_lock);
176}
177#endif
178
139/* 179/*
140 * Each address family might have different locking rules, so we have 180 * Each address family might have different locking rules, so we have
141 * one slock key per address family: 181 * one slock key per address family:
@@ -2291,9 +2331,6 @@ void sk_common_release(struct sock *sk)
2291} 2331}
2292EXPORT_SYMBOL(sk_common_release); 2332EXPORT_SYMBOL(sk_common_release);
2293 2333
2294static DEFINE_RWLOCK(proto_list_lock);
2295static LIST_HEAD(proto_list);
2296
2297#ifdef CONFIG_PROC_FS 2334#ifdef CONFIG_PROC_FS
2298#define PROTO_INUSE_NR 64 /* should be enough for the first time */ 2335#define PROTO_INUSE_NR 64 /* should be enough for the first time */
2299struct prot_inuse { 2336struct prot_inuse {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e9d98e621112..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
48obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o 48obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
49obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o 49obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
50obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o 50obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
51obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
51obj-$(CONFIG_NETLABEL) += cipso_ipv4.o 52obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
52 53
53obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ 54obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f48bf312cfe8..42714cb1fef3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
73#include <net/xfrm.h> 73#include <net/xfrm.h>
74#include <net/netdma.h> 74#include <net/netdma.h>
75#include <net/secure_seq.h> 75#include <net/secure_seq.h>
76#include <net/tcp_memcontrol.h>
76 77
77#include <linux/inet.h> 78#include <linux/inet.h>
78#include <linux/ipv6.h> 79#include <linux/ipv6.h>
@@ -1917,6 +1918,7 @@ static int tcp_v4_init_sock(struct sock *sk)
1917 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1918 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1918 1919
1919 local_bh_disable(); 1920 local_bh_disable();
1921 sock_update_memcg(sk);
1920 sk_sockets_allocated_inc(sk); 1922 sk_sockets_allocated_inc(sk);
1921 local_bh_enable(); 1923 local_bh_enable();
1922 1924
@@ -1974,6 +1976,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
1974 } 1976 }
1975 1977
1976 sk_sockets_allocated_dec(sk); 1978 sk_sockets_allocated_dec(sk);
1979 sock_release_memcg(sk);
1977} 1980}
1978EXPORT_SYMBOL(tcp_v4_destroy_sock); 1981EXPORT_SYMBOL(tcp_v4_destroy_sock);
1979 1982
@@ -2634,10 +2637,14 @@ struct proto tcp_prot = {
2634 .compat_setsockopt = compat_tcp_setsockopt, 2637 .compat_setsockopt = compat_tcp_setsockopt,
2635 .compat_getsockopt = compat_tcp_getsockopt, 2638 .compat_getsockopt = compat_tcp_getsockopt,
2636#endif 2639#endif
2640#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2641 .init_cgroup = tcp_init_cgroup,
2642 .destroy_cgroup = tcp_destroy_cgroup,
2643 .proto_cgroup = tcp_proto_cgroup,
2644#endif
2637}; 2645};
2638EXPORT_SYMBOL(tcp_prot); 2646EXPORT_SYMBOL(tcp_prot);
2639 2647
2640
2641static int __net_init tcp_sk_init(struct net *net) 2648static int __net_init tcp_sk_init(struct net *net)
2642{ 2649{
2643 return inet_ctl_sock_create(&net->ipv4.tcp_sock, 2650 return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..4a68d2c24556
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,74 @@
1#include <net/tcp.h>
2#include <net/tcp_memcontrol.h>
3#include <net/sock.h>
4#include <linux/memcontrol.h>
5#include <linux/module.h>
6
7static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
8{
9 return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
10}
11
12static void memcg_tcp_enter_memory_pressure(struct sock *sk)
13{
14 if (!sk->sk_cgrp->memory_pressure)
15 *sk->sk_cgrp->memory_pressure = 1;
16}
17EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
18
19int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
20{
21 /*
22 * The root cgroup does not use res_counters, but rather,
23 * rely on the data already collected by the network
24 * subsystem
25 */
26 struct res_counter *res_parent = NULL;
27 struct cg_proto *cg_proto, *parent_cg;
28 struct tcp_memcontrol *tcp;
29 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
30 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
31
32 cg_proto = tcp_prot.proto_cgroup(memcg);
33 if (!cg_proto)
34 return 0;
35
36 tcp = tcp_from_cgproto(cg_proto);
37
38 tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
39 tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
40 tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
41 tcp->tcp_memory_pressure = 0;
42
43 parent_cg = tcp_prot.proto_cgroup(parent);
44 if (parent_cg)
45 res_parent = parent_cg->memory_allocated;
46
47 res_counter_init(&tcp->tcp_memory_allocated, res_parent);
48 percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
49
50 cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
51 cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
52 cg_proto->sysctl_mem = tcp->tcp_prot_mem;
53 cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
54 cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
55 cg_proto->memcg = memcg;
56
57 return 0;
58}
59EXPORT_SYMBOL(tcp_init_cgroup);
60
61void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
62{
63 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
64 struct cg_proto *cg_proto;
65 struct tcp_memcontrol *tcp;
66
67 cg_proto = tcp_prot.proto_cgroup(memcg);
68 if (!cg_proto)
69 return;
70
71 tcp = tcp_from_cgproto(cg_proto);
72 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
73}
74EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b69c7030aba9..95d3cfb65d39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -62,6 +62,7 @@
62#include <net/netdma.h> 62#include <net/netdma.h>
63#include <net/inet_common.h> 63#include <net/inet_common.h>
64#include <net/secure_seq.h> 64#include <net/secure_seq.h>
65#include <net/tcp_memcontrol.h>
65 66
66#include <asm/uaccess.h> 67#include <asm/uaccess.h>
67 68
@@ -1994,6 +1995,7 @@ static int tcp_v6_init_sock(struct sock *sk)
1994 sk->sk_rcvbuf = sysctl_tcp_rmem[1]; 1995 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1995 1996
1996 local_bh_disable(); 1997 local_bh_disable();
1998 sock_update_memcg(sk);
1997 sk_sockets_allocated_inc(sk); 1999 sk_sockets_allocated_inc(sk);
1998 local_bh_enable(); 2000 local_bh_enable();
1999 2001
@@ -2227,6 +2229,9 @@ struct proto tcpv6_prot = {
2227 .compat_setsockopt = compat_tcp_setsockopt, 2229 .compat_setsockopt = compat_tcp_setsockopt,
2228 .compat_getsockopt = compat_tcp_getsockopt, 2230 .compat_getsockopt = compat_tcp_getsockopt,
2229#endif 2231#endif
2232#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
2233 .proto_cgroup = tcp_proto_cgroup,
2234#endif
2230}; 2235};
2231 2236
2232static const struct inet6_protocol tcpv6_protocol = { 2237static const struct inet6_protocol tcpv6_protocol = {