aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2005-08-16 23:43:40 -0400
committerDavid S. Miller <davem@davemloft.net>2005-08-16 23:43:40 -0400
commitc8ac37746489f05a32a958b048f29ae45487e81e (patch)
treec1d326c64fca3f5902438160b7b28e178318c8be /net
parent793245eeb97bd28e363f2b0f2e766fdbff0c9619 (diff)
[TCP]: Fix bug #5070: kernel BUG at net/ipv4/tcp_output.c:864
1) We send out a normal sized packet with TSO on to start off. 2) ICMP is received indicating a smaller MTU. 3) We send the current sk_send_head which needs to be fragmented since it was created before the ICMP event. The first fragment is then sent out. At this point the remaining fragment is allocated by tcp_fragment. However, its size is padded to fit the L1 cache-line size therefore creating tail-room up to 124 bytes long. This fragment will also be sitting at sk_send_head. 4) tcp_sendmsg is called again and it stores data in the tail-room of of the fragment. 5) tcp_push_one is called by tcp_sendmsg which then calls tso_fragment since the packet as a whole exceeds the MTU. At this point we have a packet that has data in the head area being fed to tso_fragment which bombs out. My take on this is that we shouldn't ever call tcp_fragment on a TSO socket for a packet that is yet to be transmitted since this creates a packet on sk_send_head that cannot be extended. So here is a patch to change it so that tso_fragment is always used in this case. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/tcp_output.c39
1 files changed, 20 insertions, 19 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3ed6fc15815b..566045e58437 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -861,7 +861,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
861 u16 flags; 861 u16 flags;
862 862
863 /* All of a TSO frame must be composed of paged data. */ 863 /* All of a TSO frame must be composed of paged data. */
864 BUG_ON(skb->len != skb->data_len); 864 if (skb->len != skb->data_len)
865 return tcp_fragment(sk, skb, len, mss_now);
865 866
866 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); 867 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
867 if (unlikely(buff == NULL)) 868 if (unlikely(buff == NULL))
@@ -974,6 +975,8 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
974 975
975 sent_pkts = 0; 976 sent_pkts = 0;
976 while ((skb = sk->sk_send_head)) { 977 while ((skb = sk->sk_send_head)) {
978 unsigned int limit;
979
977 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 980 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
978 BUG_ON(!tso_segs); 981 BUG_ON(!tso_segs);
979 982
@@ -994,9 +997,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
994 break; 997 break;
995 } 998 }
996 999
1000 limit = mss_now;
997 if (tso_segs > 1) { 1001 if (tso_segs > 1) {
998 u32 limit = tcp_window_allows(tp, skb, 1002 limit = tcp_window_allows(tp, skb,
999 mss_now, cwnd_quota); 1003 mss_now, cwnd_quota);
1000 1004
1001 if (skb->len < limit) { 1005 if (skb->len < limit) {
1002 unsigned int trim = skb->len % mss_now; 1006 unsigned int trim = skb->len % mss_now;
@@ -1004,15 +1008,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1004 if (trim) 1008 if (trim)
1005 limit = skb->len - trim; 1009 limit = skb->len - trim;
1006 } 1010 }
1007 if (skb->len > limit) {
1008 if (tso_fragment(sk, skb, limit, mss_now))
1009 break;
1010 }
1011 } else if (unlikely(skb->len > mss_now)) {
1012 if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
1013 break;
1014 } 1011 }
1015 1012
1013 if (skb->len > limit &&
1014 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1015 break;
1016
1016 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1017 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1017 1018
1018 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) 1019 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
@@ -1064,11 +1065,14 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1064 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); 1065 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1065 1066
1066 if (likely(cwnd_quota)) { 1067 if (likely(cwnd_quota)) {
1068 unsigned int limit;
1069
1067 BUG_ON(!tso_segs); 1070 BUG_ON(!tso_segs);
1068 1071
1072 limit = mss_now;
1069 if (tso_segs > 1) { 1073 if (tso_segs > 1) {
1070 u32 limit = tcp_window_allows(tp, skb, 1074 limit = tcp_window_allows(tp, skb,
1071 mss_now, cwnd_quota); 1075 mss_now, cwnd_quota);
1072 1076
1073 if (skb->len < limit) { 1077 if (skb->len < limit) {
1074 unsigned int trim = skb->len % mss_now; 1078 unsigned int trim = skb->len % mss_now;
@@ -1076,15 +1080,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1076 if (trim) 1080 if (trim)
1077 limit = skb->len - trim; 1081 limit = skb->len - trim;
1078 } 1082 }
1079 if (skb->len > limit) {
1080 if (unlikely(tso_fragment(sk, skb, limit, mss_now)))
1081 return;
1082 }
1083 } else if (unlikely(skb->len > mss_now)) {
1084 if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
1085 return;
1086 } 1083 }
1087 1084
1085 if (skb->len > limit &&
1086 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1087 return;
1088
1088 /* Send it out now. */ 1089 /* Send it out now. */
1089 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1090 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1090 1091