aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorVitaliy Gusev <vgusev@openvz.org>2008-04-15 03:33:38 -0400
committerDavid S. Miller <davem@davemloft.net>2008-04-15 03:33:38 -0400
commitb000cd3707e7b25d76745f9c0e261c23d21fa578 (patch)
treed86dfd47954404e581f7f9d730ae3d3ce2532ad1 /net
parent066a3b5b2346febf9a655b444567b7138e3bb939 (diff)
[TCP]: Fix never pruned tcp out-of-order queue.
tcp_prune_queue() doesn't prune an out-of-order queue at all. Therefore sk_rmem_schedule() can fail but the out-of-order queue isn't pruned . This can lead to tcp deadlock state if the next two conditions are held: 1. There are a sequence hole between last received in order segment and segments enqueued to the out-of-order queue. 2. Size of all segments in the out-of-order queue is more than tcp_mem[2]. Signed-off-by: Vitaliy Gusev <vgusev@openvz.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_input.c72
1 files changed, 46 insertions, 26 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5119856017ab..61db7b1eb995 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3841,8 +3841,26 @@ static void tcp_ofo_queue(struct sock *sk)
3841 } 3841 }
3842} 3842}
3843 3843
3844static void tcp_prune_ofo_queue(struct sock *sk);
3844static int tcp_prune_queue(struct sock *sk); 3845static int tcp_prune_queue(struct sock *sk);
3845 3846
3847static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
3848{
3849 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
3850 !sk_rmem_schedule(sk, size)) {
3851
3852 if (tcp_prune_queue(sk) < 0)
3853 return -1;
3854
3855 if (!sk_rmem_schedule(sk, size)) {
3856 tcp_prune_ofo_queue(sk);
3857 if (!sk_rmem_schedule(sk, size))
3858 return -1;
3859 }
3860 }
3861 return 0;
3862}
3863
3846static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) 3864static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3847{ 3865{
3848 struct tcphdr *th = tcp_hdr(skb); 3866 struct tcphdr *th = tcp_hdr(skb);
@@ -3892,12 +3910,9 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
3892 if (eaten <= 0) { 3910 if (eaten <= 0) {
3893queue_and_out: 3911queue_and_out:
3894 if (eaten < 0 && 3912 if (eaten < 0 &&
3895 (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3913 tcp_try_rmem_schedule(sk, skb->truesize))
3896 !sk_rmem_schedule(sk, skb->truesize))) { 3914 goto drop;
3897 if (tcp_prune_queue(sk) < 0 || 3915
3898 !sk_rmem_schedule(sk, skb->truesize))
3899 goto drop;
3900 }
3901 skb_set_owner_r(skb, sk); 3916 skb_set_owner_r(skb, sk);
3902 __skb_queue_tail(&sk->sk_receive_queue, skb); 3917 __skb_queue_tail(&sk->sk_receive_queue, skb);
3903 } 3918 }
@@ -3966,12 +3981,8 @@ drop:
3966 3981
3967 TCP_ECN_check_ce(tp, skb); 3982 TCP_ECN_check_ce(tp, skb);
3968 3983
3969 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 3984 if (tcp_try_rmem_schedule(sk, skb->truesize))
3970 !sk_rmem_schedule(sk, skb->truesize)) { 3985 goto drop;
3971 if (tcp_prune_queue(sk) < 0 ||
3972 !sk_rmem_schedule(sk, skb->truesize))
3973 goto drop;
3974 }
3975 3986
3976 /* Disable header prediction. */ 3987 /* Disable header prediction. */
3977 tp->pred_flags = 0; 3988 tp->pred_flags = 0;
@@ -4198,6 +4209,28 @@ static void tcp_collapse_ofo_queue(struct sock *sk)
4198 } 4209 }
4199} 4210}
4200 4211
4212/*
4213 * Purge the out-of-order queue.
4214 */
4215static void tcp_prune_ofo_queue(struct sock *sk)
4216{
4217 struct tcp_sock *tp = tcp_sk(sk);
4218
4219 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4220 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
4221 __skb_queue_purge(&tp->out_of_order_queue);
4222
4223 /* Reset SACK state. A conforming SACK implementation will
4224 * do the same at a timeout based retransmit. When a connection
4225 * is in a sad state like this, we care only about integrity
4226 * of the connection not performance.
4227 */
4228 if (tp->rx_opt.sack_ok)
4229 tcp_sack_reset(&tp->rx_opt);
4230 sk_mem_reclaim(sk);
4231 }
4232}
4233
4201/* Reduce allocated memory if we can, trying to get 4234/* Reduce allocated memory if we can, trying to get
4202 * the socket within its memory limits again. 4235 * the socket within its memory limits again.
4203 * 4236 *
@@ -4231,20 +4264,7 @@ static int tcp_prune_queue(struct sock *sk)
4231 /* Collapsing did not help, destructive actions follow. 4264 /* Collapsing did not help, destructive actions follow.
4232 * This must not ever occur. */ 4265 * This must not ever occur. */
4233 4266
4234 /* First, purge the out_of_order queue. */ 4267 tcp_prune_ofo_queue(sk);
4235 if (!skb_queue_empty(&tp->out_of_order_queue)) {
4236 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
4237 __skb_queue_purge(&tp->out_of_order_queue);
4238
4239 /* Reset SACK state. A conforming SACK implementation will
4240 * do the same at a timeout based retransmit. When a connection
4241 * is in a sad state like this, we care only about integrity
4242 * of the connection not performance.
4243 */
4244 if (tcp_is_sack(tp))
4245 tcp_sack_reset(&tp->rx_opt);
4246 sk_mem_reclaim(sk);
4247 }
4248 4268
4249 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) 4269 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
4250 return 0; 4270 return 0;