aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_memcontrol.c
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-05-29 18:07:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:28 -0400
commit3f134619393cb6c6dfab7890a617d0ceca6d05d7 (patch)
tree39e05b42c99189cd4496e61a3e16107e065b0f04 /net/ipv4/tcp_memcontrol.c
parent3afe36b1fe7d1e3f66752bb9548a763942f3a104 (diff)
memcg: decrement static keys at real destroy time
We call the destroy function when a cgroup starts to be removed, such as by a rmdir event. However, because of our reference counters, some objects are still inflight. Right now, we are decrementing the static_keys at destroy() time, meaning that if we get rid of the last static_key reference, some objects will still have charges, but the code to properly uncharge them won't be run. This becomes a problem specially if it is ever enabled again, because now new charges will be added to the staled charges making keeping it pretty much impossible. We just need to be careful with the static branch activation: since there is no particular preferred order of their activation, we need to make sure that we only start using it after all call sites are active. This is achieved by having a per-memcg flag that is only updated after static_key_slow_inc() returns. At this time, we are sure all sites are active. This is made per-memcg, not global, for a reason: it also has the effect of making socket accounting more consistent. The first memcg to be limited will trigger static_key() activation, therefore, accounting. But all the others will then be accounted no matter what. After this patch, only limited memcgs will have its sockets accounted. [akpm@linux-foundation.org: move enum sock_flag_bits into sock.h, document enum sock_flag_bits, convert memcg_proto_active() and memcg_proto_activated() to test_bit(), redo tcp_update_limit() comment to 80 cols] Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Tejun Heo <tj@kernel.org> Cc: Li Zefan <lizefan@huawei.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Acked-by: David Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net/ipv4/tcp_memcontrol.c')
-rw-r--r--net/ipv4/tcp_memcontrol.c34
1 files changed, 27 insertions, 7 deletions
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 151703791bb0..b6f3583ddfe8 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -74,9 +74,6 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
74 percpu_counter_destroy(&tcp->tcp_sockets_allocated); 74 percpu_counter_destroy(&tcp->tcp_sockets_allocated);
75 75
76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT); 76 val = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
77
78 if (val != RESOURCE_MAX)
79 static_key_slow_dec(&memcg_socket_limit_enabled);
80} 77}
81EXPORT_SYMBOL(tcp_destroy_cgroup); 78EXPORT_SYMBOL(tcp_destroy_cgroup);
82 79
@@ -107,10 +104,33 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
107 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT, 104 tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
108 net->ipv4.sysctl_tcp_mem[i]); 105 net->ipv4.sysctl_tcp_mem[i]);
109 106
110 if (val == RESOURCE_MAX && old_lim != RESOURCE_MAX) 107 if (val == RESOURCE_MAX)
111 static_key_slow_dec(&memcg_socket_limit_enabled); 108 clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
112 else if (old_lim == RESOURCE_MAX && val != RESOURCE_MAX) 109 else if (val != RESOURCE_MAX) {
113 static_key_slow_inc(&memcg_socket_limit_enabled); 110 /*
111 * The active bit needs to be written after the static_key
112 * update. This is what guarantees that the socket activation
113 * function is the last one to run. See sock_update_memcg() for
114 * details, and note that we don't mark any socket as belonging
115 * to this memcg until that flag is up.
116 *
117 * We need to do this, because static_keys will span multiple
118 * sites, but we can't control their order. If we mark a socket
119 * as accounted, but the accounting functions are not patched in
120 * yet, we'll lose accounting.
121 *
122 * We never race with the readers in sock_update_memcg(),
123 * because when this value change, the code to process it is not
124 * patched in yet.
125 *
126 * The activated bit is used to guarantee that no two writers
127 * will do the update in the same memcg. Without that, we can't
128 * properly shutdown the static key.
129 */
130 if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
131 static_key_slow_inc(&memcg_socket_limit_enabled);
132 set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
133 }
114 134
115 return 0; 135 return 0;
116} 136}