aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-01-14 18:21:14 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 19:00:49 -0500
commite805605c721021879a1469bdae45c6f80bc985f4 (patch)
treec0743f5fa5e70ebf1483415c5bcc53dffce23c64 /net
parent80f23124f57c77915a7b4201d8dcba38a38b23f0 (diff)
net: tcp_memcontrol: sanitize tcp memory accounting callbacks
There won't be a tcp control soft limit, so integrating the memcg code into the global skmem limiting scheme complicates things unnecessarily. Replace this with simple and clear charge and uncharge calls--hidden behind a jump label--to account skb memory. Note that this is not purely aesthetic: as a result of shoehorning the per-memcg code into the same memory accounting functions that handle the global level, the old code would compare the per-memcg consumption against the smaller of the per-memcg limit and the global limit. This allowed the total consumption of multiple sockets to exceed the global limit, as long as the individual sockets stayed within bounds. After this change, the code will always compare the per-memcg consumption to the per-memcg limit, and the global consumption to the global limit, and thus close this loophole. Without a soft limit, the per-memcg memory pressure state in sockets is generally questionable. However, we did it until now, so we continue to enter it when the hard limit is hit, and packets are dropped, to let other sockets in the cgroup know that they shouldn't grow their transmit windows, either. However, keep it simple in the new callback model and leave memory pressure lazily when the next packet is accepted (as opposed to doing it synchroneously when packets are processed). When packets are dropped, network performance will already be in the toilet, so that should be a reasonable trade-off. As described above, consumption is now checked on the per-memcg level and the global level separately. Likewise, memory pressure states are maintained on both the per-memcg level and the global level, and a socket is considered under pressure when either level asserts as much. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Vladimir Davydov <vdavydov@virtuozzo.com> Acked-by: David S. Miller <davem@davemloft.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'net')
-rw-r--r--net/core/sock.c26
-rw-r--r--net/ipv4/tcp_output.c7
2 files changed, 21 insertions, 12 deletions
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c5dab01105b..89ae859d2dc5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2084,27 +2084,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
2084 struct proto *prot = sk->sk_prot; 2084 struct proto *prot = sk->sk_prot;
2085 int amt = sk_mem_pages(size); 2085 int amt = sk_mem_pages(size);
2086 long allocated; 2086 long allocated;
2087 int parent_status = UNDER_LIMIT;
2088 2087
2089 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 2088 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2090 2089
2091 allocated = sk_memory_allocated_add(sk, amt, &parent_status); 2090 allocated = sk_memory_allocated_add(sk, amt);
2091
2092 if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
2093 !mem_cgroup_charge_skmem(sk->sk_cgrp, amt))
2094 goto suppress_allocation;
2092 2095
2093 /* Under limit. */ 2096 /* Under limit. */
2094 if (parent_status == UNDER_LIMIT && 2097 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2095 allocated <= sk_prot_mem_limits(sk, 0)) {
2096 sk_leave_memory_pressure(sk); 2098 sk_leave_memory_pressure(sk);
2097 return 1; 2099 return 1;
2098 } 2100 }
2099 2101
2100 /* Under pressure. (we or our parents) */ 2102 /* Under pressure. */
2101 if ((parent_status > SOFT_LIMIT) || 2103 if (allocated > sk_prot_mem_limits(sk, 1))
2102 allocated > sk_prot_mem_limits(sk, 1))
2103 sk_enter_memory_pressure(sk); 2104 sk_enter_memory_pressure(sk);
2104 2105
2105 /* Over hard limit (we or our parents) */ 2106 /* Over hard limit. */
2106 if ((parent_status == OVER_LIMIT) || 2107 if (allocated > sk_prot_mem_limits(sk, 2))
2107 (allocated > sk_prot_mem_limits(sk, 2)))
2108 goto suppress_allocation; 2108 goto suppress_allocation;
2109 2109
2110 /* guarantee minimum buffer size under pressure */ 2110 /* guarantee minimum buffer size under pressure */
@@ -2153,6 +2153,9 @@ suppress_allocation:
2153 2153
2154 sk_memory_allocated_sub(sk, amt); 2154 sk_memory_allocated_sub(sk, amt);
2155 2155
2156 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
2157 mem_cgroup_uncharge_skmem(sk->sk_cgrp, amt);
2158
2156 return 0; 2159 return 0;
2157} 2160}
2158EXPORT_SYMBOL(__sk_mem_schedule); 2161EXPORT_SYMBOL(__sk_mem_schedule);
@@ -2168,6 +2171,9 @@ void __sk_mem_reclaim(struct sock *sk, int amount)
2168 sk_memory_allocated_sub(sk, amount); 2171 sk_memory_allocated_sub(sk, amount);
2169 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT; 2172 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2170 2173
2174 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
2175 mem_cgroup_uncharge_skmem(sk->sk_cgrp, amount);
2176
2171 if (sk_under_memory_pressure(sk) && 2177 if (sk_under_memory_pressure(sk) &&
2172 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) 2178 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2173 sk_leave_memory_pressure(sk); 2179 sk_leave_memory_pressure(sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 412a920fe0ec..493b48945f0c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2813,13 +2813,16 @@ begin_fwd:
2813 */ 2813 */
2814void sk_forced_mem_schedule(struct sock *sk, int size) 2814void sk_forced_mem_schedule(struct sock *sk, int size)
2815{ 2815{
2816 int amt, status; 2816 int amt;
2817 2817
2818 if (size <= sk->sk_forward_alloc) 2818 if (size <= sk->sk_forward_alloc)
2819 return; 2819 return;
2820 amt = sk_mem_pages(size); 2820 amt = sk_mem_pages(size);
2821 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; 2821 sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
2822 sk_memory_allocated_add(sk, amt, &status); 2822 sk_memory_allocated_add(sk, amt);
2823
2824 if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
2825 mem_cgroup_charge_skmem(sk->sk_cgrp, amt);
2823} 2826}
2824 2827
2825/* Send a FIN. The caller locks the socket for us. 2828/* Send a FIN. The caller locks the socket for us.