diff options
| author | Vitaliy Gusev <vgusev@openvz.org> | 2008-04-15 03:33:38 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2008-04-15 03:33:38 -0400 |
| commit | b000cd3707e7b25d76745f9c0e261c23d21fa578 (patch) | |
| tree | d86dfd47954404e581f7f9d730ae3d3ce2532ad1 | |
| parent | 066a3b5b2346febf9a655b444567b7138e3bb939 (diff) | |
[TCP]: Fix never pruned tcp out-of-order queue.
tcp_prune_queue() doesn't prune an out-of-order queue at all.
Therefore sk_rmem_schedule() can fail but the out-of-order queue isn't
pruned . This can lead to tcp deadlock state if the next two
conditions are held:
1. There are a sequence hole between last received in
order segment and segments enqueued to the out-of-order queue.
2. Size of all segments in the out-of-order queue is more than tcp_mem[2].
Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
| -rw-r--r-- | net/ipv4/tcp_input.c | 72 |
1 files changed, 46 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5119856017ab..61db7b1eb995 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk) | |||
| 3841 | } | 3841 | } |
| 3842 | } | 3842 | } |
| 3843 | 3843 | ||
| 3844 | static void tcp_prune_ofo_queue(struct sock *sk); | ||
| 3844 | static int tcp_prune_queue(struct sock *sk); | 3845 | static int tcp_prune_queue(struct sock *sk); |
| 3845 | 3846 | ||
| 3847 | static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) | ||
| 3848 | { | ||
| 3849 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | ||
| 3850 | !sk_rmem_schedule(sk, size)) { | ||
| 3851 | |||
| 3852 | if (tcp_prune_queue(sk) < 0) | ||
| 3853 | return -1; | ||
| 3854 | |||
| 3855 | if (!sk_rmem_schedule(sk, size)) { | ||
| 3856 | tcp_prune_ofo_queue(sk); | ||
| 3857 | if (!sk_rmem_schedule(sk, size)) | ||
| 3858 | return -1; | ||
| 3859 | } | ||
| 3860 | } | ||
| 3861 | return 0; | ||
| 3862 | } | ||
| 3863 | |||
| 3846 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | 3864 | static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) |
| 3847 | { | 3865 | { |
| 3848 | struct tcphdr *th = tcp_hdr(skb); | 3866 | struct tcphdr *th = tcp_hdr(skb); |
| @@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
| 3892 | if (eaten <= 0) { | 3910 | if (eaten <= 0) { |
| 3893 | queue_and_out: | 3911 | queue_and_out: |
| 3894 | if (eaten < 0 && | 3912 | if (eaten < 0 && |
| 3895 | (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3913 | tcp_try_rmem_schedule(sk, skb->truesize)) |
| 3896 | !sk_rmem_schedule(sk, skb->truesize))) { | 3914 | goto drop; |
| 3897 | if (tcp_prune_queue(sk) < 0 || | 3915 | |
| 3898 | !sk_rmem_schedule(sk, skb->truesize)) | ||
| 3899 | goto drop; | ||
| 3900 | } | ||
| 3901 | skb_set_owner_r(skb, sk); | 3916 | skb_set_owner_r(skb, sk); |
| 3902 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 3917 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
| 3903 | } | 3918 | } |
| @@ -3966,12 +3981,8 @@ drop: | |||
| 3966 | 3981 | ||
| 3967 | TCP_ECN_check_ce(tp, skb); | 3982 | TCP_ECN_check_ce(tp, skb); |
| 3968 | 3983 | ||
| 3969 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || | 3984 | if (tcp_try_rmem_schedule(sk, skb->truesize)) |
| 3970 | !sk_rmem_schedule(sk, skb->truesize)) { | 3985 | goto drop; |
| 3971 | if (tcp_prune_queue(sk) < 0 || | ||
| 3972 | !sk_rmem_schedule(sk, skb->truesize)) | ||
| 3973 | goto drop; | ||
| 3974 | } | ||
| 3975 | 3986 | ||
| 3976 | /* Disable header prediction. */ | 3987 | /* Disable header prediction. */ |
| 3977 | tp->pred_flags = 0; | 3988 | tp->pred_flags = 0; |
| @@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk) | |||
| 4198 | } | 4209 | } |
| 4199 | } | 4210 | } |
| 4200 | 4211 | ||
| 4212 | /* | ||
| 4213 | * Purge the out-of-order queue. | ||
| 4214 | */ | ||
| 4215 | static void tcp_prune_ofo_queue(struct sock *sk) | ||
| 4216 | { | ||
| 4217 | struct tcp_sock *tp = tcp_sk(sk); | ||
| 4218 | |||
| 4219 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
| 4220 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
| 4221 | __skb_queue_purge(&tp->out_of_order_queue); | ||
| 4222 | |||
| 4223 | /* Reset SACK state. A conforming SACK implementation will | ||
| 4224 | * do the same at a timeout based retransmit. When a connection | ||
| 4225 | * is in a sad state like this, we care only about integrity | ||
| 4226 | * of the connection not performance. | ||
| 4227 | */ | ||
| 4228 | if (tp->rx_opt.sack_ok) | ||
| 4229 | tcp_sack_reset(&tp->rx_opt); | ||
| 4230 | sk_mem_reclaim(sk); | ||
| 4231 | } | ||
| 4232 | } | ||
| 4233 | |||
| 4201 | /* Reduce allocated memory if we can, trying to get | 4234 | /* Reduce allocated memory if we can, trying to get |
| 4202 | * the socket within its memory limits again. | 4235 | * the socket within its memory limits again. |
| 4203 | * | 4236 | * |
| @@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk) | |||
| 4231 | /* Collapsing did not help, destructive actions follow. | 4264 | /* Collapsing did not help, destructive actions follow. |
| 4232 | * This must not ever occur. */ | 4265 | * This must not ever occur. */ |
| 4233 | 4266 | ||
| 4234 | /* First, purge the out_of_order queue. */ | 4267 | tcp_prune_ofo_queue(sk); |
| 4235 | if (!skb_queue_empty(&tp->out_of_order_queue)) { | ||
| 4236 | NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED); | ||
| 4237 | __skb_queue_purge(&tp->out_of_order_queue); | ||
| 4238 | |||
| 4239 | /* Reset SACK state. A conforming SACK implementation will | ||
| 4240 | * do the same at a timeout based retransmit. When a connection | ||
| 4241 | * is in a sad state like this, we care only about integrity | ||
| 4242 | * of the connection not performance. | ||
| 4243 | */ | ||
| 4244 | if (tcp_is_sack(tp)) | ||
| 4245 | tcp_sack_reset(&tp->rx_opt); | ||
| 4246 | sk_mem_reclaim(sk); | ||
| 4247 | } | ||
| 4248 | 4268 | ||
| 4249 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) | 4269 | if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) |
| 4250 | return 0; | 4270 | return 0; |
