diff options
author | David S. Miller <davem@davemloft.net> | 2010-08-25 05:27:49 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-08-25 05:27:49 -0400 |
commit | ad1af0fedba14f82b240a03fe20eb9b2fdbd0357 (patch) | |
tree | 4d53aa8bc2d9df782aa792e52670ab55c7a44d5b | |
parent | b2bc85631e72485b984bcd202a104591874babba (diff) |
tcp: Combat per-cpu skew in orphan tests.
As reported by Anton Blanchard when we use
percpu_counter_read_positive() to make our orphan socket limit checks,
the check can be off by up to num_cpus_online() * batch (which is 32
by default) which on a 128 cpu machine can be as large as the default
orphan limit itself.
Fix this by doing the full expensive sum check if the optimized check
triggers.
Reported-by: Anton Blanchard <anton@samba.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
-rw-r--r-- | include/net/tcp.h | 18 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 8 |
3 files changed, 19 insertions, 12 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb20193..eaa9582779d0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -268,11 +268,21 @@ static inline int between(__u32 seq1, __u32 seq2, __u32 seq3) | |||
268 | return seq3 - seq2 >= seq1 - seq2; | 268 | return seq3 - seq2 >= seq1 - seq2; |
269 | } | 269 | } |
270 | 270 | ||
271 | static inline int tcp_too_many_orphans(struct sock *sk, int num) | 271 | static inline bool tcp_too_many_orphans(struct sock *sk, int shift) |
272 | { | 272 | { |
273 | return (num > sysctl_tcp_max_orphans) || | 273 | struct percpu_counter *ocp = sk->sk_prot->orphan_count; |
274 | (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && | 274 | int orphans = percpu_counter_read_positive(ocp); |
275 | atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]); | 275 | |
276 | if (orphans << shift > sysctl_tcp_max_orphans) { | ||
277 | orphans = percpu_counter_sum_positive(ocp); | ||
278 | if (orphans << shift > sysctl_tcp_max_orphans) | ||
279 | return true; | ||
280 | } | ||
281 | |||
282 | if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && | ||
283 | atomic_read(&tcp_memory_allocated) > sysctl_tcp_mem[2]) | ||
284 | return true; | ||
285 | return false; | ||
276 | } | 286 | } |
277 | 287 | ||
278 | /* syncookies: remember time of last synqueue overflow */ | 288 | /* syncookies: remember time of last synqueue overflow */ |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea77..197b9b77fa3e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2011,11 +2011,8 @@ adjudge_to_death: | |||
2011 | } | 2011 | } |
2012 | } | 2012 | } |
2013 | if (sk->sk_state != TCP_CLOSE) { | 2013 | if (sk->sk_state != TCP_CLOSE) { |
2014 | int orphan_count = percpu_counter_read_positive( | ||
2015 | sk->sk_prot->orphan_count); | ||
2016 | |||
2017 | sk_mem_reclaim(sk); | 2014 | sk_mem_reclaim(sk); |
2018 | if (tcp_too_many_orphans(sk, orphan_count)) { | 2015 | if (tcp_too_many_orphans(sk, 0)) { |
2019 | if (net_ratelimit()) | 2016 | if (net_ratelimit()) |
2020 | printk(KERN_INFO "TCP: too many of orphaned " | 2017 | printk(KERN_INFO "TCP: too many of orphaned " |
2021 | "sockets\n"); | 2018 | "sockets\n"); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f5..c35b469e851c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -66,18 +66,18 @@ static void tcp_write_err(struct sock *sk) | |||
66 | static int tcp_out_of_resources(struct sock *sk, int do_reset) | 66 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
67 | { | 67 | { |
68 | struct tcp_sock *tp = tcp_sk(sk); | 68 | struct tcp_sock *tp = tcp_sk(sk); |
69 | int orphans = percpu_counter_read_positive(&tcp_orphan_count); | 69 | int shift = 0; |
70 | 70 | ||
71 | /* If peer does not open window for long time, or did not transmit | 71 | /* If peer does not open window for long time, or did not transmit |
72 | * anything for long time, penalize it. */ | 72 | * anything for long time, penalize it. */ |
73 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) | 73 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) |
74 | orphans <<= 1; | 74 | shift++; |
75 | 75 | ||
76 | /* If some dubious ICMP arrived, penalize even more. */ | 76 | /* If some dubious ICMP arrived, penalize even more. */ |
77 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
78 | orphans <<= 1; | 78 | shift++; |
79 | 79 | ||
80 | if (tcp_too_many_orphans(sk, orphans)) { | 80 | if (tcp_too_many_orphans(sk, shift)) { |
81 | if (net_ratelimit()) | 81 | if (net_ratelimit()) |
82 | printk(KERN_INFO "Out of socket memory\n"); | 82 | printk(KERN_INFO "Out of socket memory\n"); |
83 | 83 | ||