diff options
author | Vitaliy Gusev <vgusev@openvz.org> | 2008-04-15 03:33:38 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-04-15 03:33:38 -0400 |
commit | b000cd3707e7b25d76745f9c0e261c23d21fa578 (patch) | |
tree | d86dfd47954404e581f7f9d730ae3d3ce2532ad1 | |
parent | 066a3b5b2346febf9a655b444567b7138e3bb939 (diff) |
[TCP]: Fix never pruned tcp out-of-order queue.
tcp_prune_queue() doesn't prune an out-of-order queue at all.
Therefore sk_rmem_schedule() can fail but the out-of-order queue isn't
pruned . This can lead to tcp deadlock state if the next two
conditions are held:
1. There are a sequence hole between last received in
order segment and segments enqueued to the out-of-order queue.
2. Size of all segments in the out-of-order queue is more than tcp_mem[2].
Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_input.c | 72 |
1 files changed, 46 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5119856017ab..61db7b1eb995 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk) | |||
3841 | } | 3841 | } |
3842 | } | 3842 | } |
3843 | 3843 | ||
3844 | static void tcp_prune_ofo_queue(struct sock *sk); | ||
3844 | static int tcp_prune_queue(struct sock *sk); | 3845 | static int tcp_prune_queue(struct sock *sk); |
3845 | 3846 | ||
3847 | static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | ||
3848 | { | ||
3849 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | ||
3850 | !sk_rmem_schedule(sk, size)) { | ||
3851 | |||
3852 | if (tcp_prune_queue(sk) < 0) | ||
3853 | return -1; | ||
3854 | |||
3855 | if (!sk_rmem_schedule(sk, size)) { | ||
3856 | tcp_prune_ofo_queue(sk); | ||
3857 | if (!sk_rmem_schedule(sk, size)) | ||
3858 | return -1; | ||
3859 | } | ||
3860 | } | ||
3861 | return 0; | ||
3862 | } | ||
3863 | |||
3846 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 3864 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
3847 | { | 3865 | { |
3848 | struct tcphdr *th = tcp_hdr(skb); | 3866 | struct tcphdr *th = tcp_hdr(skb); |
@@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
3892 | if (eaten <= 0) { | 3910 | if (eaten <= 0) { |
3893 | queue_and_out: | 3911 | queue_and_out: |
3894 | if (eaten < 0 && | 3912 | if (eaten < 0 && |
3895 | (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3913 | tcp_try_rmem_schedule(sk, skb->truesize)) |
3896 | !sk_rmem_schedule(sk, skb->truesize))) { | 3914 | goto drop; |
3897 | if (tcp_prune_queue(sk) < 0 || | 3915 | |
3898 | !sk_rmem_schedule(sk, skb->truesize)) | ||
3899 | goto drop; | ||
3900 | } | ||
3901 | skb_set_owner_r(skb, sk); | 3916 | skb_set_owner_r(skb, sk); |
3902 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 3917 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
3903 | } | 3918 | } |
@@ -3966,12 +3981,8 @@ drop: | |||
3966 | 3981 | ||
3967 | TCP_ECN_check_ce(tp, skb); | 3982 | TCP_ECN_check_ce(tp, skb); |
3968 | 3983 | ||
3969 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3984 | if (tcp_try_rmem_schedule(sk, skb->truesize)) |
3970 | !sk_rmem_schedule(sk, skb->truesize)) { | 3985 | goto drop; |
3971 | if (tcp_prune_queue(sk) < 0 || | ||
3972 | !sk_rmem_schedule(sk, skb->truesize)) | ||
3973 | goto drop; | ||
3974 | } | ||
3975 | 3986 | ||
3976 | /* Disable header prediction. */ | 3987 | /* Disable header prediction. */ |
3977 | tp->pred_flags = 0; | 3988 | tp->pred_flags = 0; |
@@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
4198 | } | 4209 | } |
4199 | } | 4210 | } |
4200 | 4211 | ||
4212 | /* | ||
4213 | * Purge the out-of-order queue. | ||
4214 | */ | ||
4215 | static void tcp_prune_ofo_queue(struct sock *sk) | ||
4216 | { | ||
4217 | struct tcp_sock *tp = tcp_sk(sk); | ||
4218 | |||
4219 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
4220 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
4221 | __skb_queue_purge(&tp->out_of_order_queue); | ||
4222 | |||
4223 | /* Reset SACK state. A conforming SACK implementation will | ||
4224 | * do the same at a timeout based retransmit. When a connection | ||
4225 | * is in a sad state like this, we care only about integrity | ||
4226 | * of the connection not performance. | ||
4227 | */ | ||
4228 | if (tp->rx_opt.sack_ok) | ||
4229 | tcp_sack_reset(&tp->rx_opt); | ||
4230 | sk_mem_reclaim(sk); | ||
4231 | } | ||
4232 | } | ||
4233 | |||
4201 | /* Reduce allocated memory if we can, trying to get | 4234 | /* Reduce allocated memory if we can, trying to get |
4202 | * the socket within its memory limits again. | 4235 | * the socket within its memory limits again. |
4203 | * | 4236 | * |
@@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
4231 | /* Collapsing did not help, destructive actions follow. | 4264 | /* Collapsing did not help, destructive actions follow. |
4232 | * This must not ever occur. */ | 4265 | * This must not ever occur. */ |
4233 | 4266 | ||
4234 | /* First, purge the out_of_order queue. */ | 4267 | tcp_prune_ofo_queue(sk); |
4235 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
4236 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
4237 | __skb_queue_purge(&tp->out_of_order_queue); | ||
4238 | |||
4239 | /* Reset SACK state. A conforming SACK implementation will | ||
4240 | * do the same at a timeout based retransmit. When a connection | ||
4241 | * is in a sad state like this, we care only about integrity | ||
4242 | * of the connection not performance. | ||
4243 | */ | ||
4244 | if (tcp_is_sack(tp)) | ||
4245 | tcp_sack_reset(&tp->rx_opt); | ||
4246 | sk_mem_reclaim(sk); | ||
4247 | } | ||
4248 | 4268 | ||
4249 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | 4269 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) |
4250 | return 0; | 4270 | return 0; |