aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2005-08-04 22:52:01 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-08-05 00:43:14 -0400
commit846998ae87a80b0fd45b4cf5cf001a159d746f27 (patch)
treea81d0f6d2b1f1817d557bd5f5611c81ff9026b51
parent0c3dba1534569734ba353afdf3f11def497ff2ac (diff)
[PATCH] tcp: fix TSO sizing bugs
MSS changes can be lost since we preemptively initialize the tso_segs count for an SKB before we %100 commit to sending it out. So, by the time we send it out, the tso_size information can be stale due to PMTU events. This mucks up all of the logic in our send engine, and can even result in the BUG() triggering in tcp_tso_should_defer(). Another problem we have is that we're storing the tp->mss_cache, not the SACK block normalized MSS, as the tso_size. That's wrong too. Signed-off-by: David S. Miller <davem@davemloft.net> Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--net/ipv4/tcp_output.c56
1 files changed, 28 insertions, 28 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3f8ea1bfa9c..e118b4b5b326 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
403 sk->sk_send_head = skb; 403 sk->sk_send_head = skb;
404} 404}
405 405
406static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) 406static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
407{ 407{
408 struct tcp_sock *tp = tcp_sk(sk); 408 if (skb->len <= mss_now ||
409
410 if (skb->len <= tp->mss_cache ||
411 !(sk->sk_route_caps & NETIF_F_TSO)) { 409 !(sk->sk_route_caps & NETIF_F_TSO)) {
412 /* Avoid the costly divide in the normal 410 /* Avoid the costly divide in the normal
413 * non-TSO case. 411 * non-TSO case.
@@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
417 } else { 415 } else {
418 unsigned int factor; 416 unsigned int factor;
419 417
420 factor = skb->len + (tp->mss_cache - 1); 418 factor = skb->len + (mss_now - 1);
421 factor /= tp->mss_cache; 419 factor /= mss_now;
422 skb_shinfo(skb)->tso_segs = factor; 420 skb_shinfo(skb)->tso_segs = factor;
423 skb_shinfo(skb)->tso_size = tp->mss_cache; 421 skb_shinfo(skb)->tso_size = mss_now;
424 } 422 }
425} 423}
426 424
@@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
429 * packet to the list. This won't be called frequently, I hope. 427 * packet to the list. This won't be called frequently, I hope.
430 * Remember, these are still headerless SKBs at this point. 428 * Remember, these are still headerless SKBs at this point.
431 */ 429 */
432static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) 430static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
433{ 431{
434 struct tcp_sock *tp = tcp_sk(sk); 432 struct tcp_sock *tp = tcp_sk(sk);
435 struct sk_buff *buff; 433 struct sk_buff *buff;
@@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
492 } 490 }
493 491
494 /* Fix up tso_factor for both original and new SKB. */ 492 /* Fix up tso_factor for both original and new SKB. */
495 tcp_set_skb_tso_segs(sk, skb); 493 tcp_set_skb_tso_segs(sk, skb, mss_now);
496 tcp_set_skb_tso_segs(sk, buff); 494 tcp_set_skb_tso_segs(sk, buff, mss_now);
497 495
498 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { 496 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
499 tp->lost_out += tcp_skb_pcount(skb); 497 tp->lost_out += tcp_skb_pcount(skb);
@@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
569 * factor and mss. 567 * factor and mss.
570 */ 568 */
571 if (tcp_skb_pcount(skb) > 1) 569 if (tcp_skb_pcount(skb) > 1)
572 tcp_set_skb_tso_segs(sk, skb); 570 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
573 571
574 return 0; 572 return 0;
575} 573}
@@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
734/* This must be invoked the first time we consider transmitting 732/* This must be invoked the first time we consider transmitting
735 * SKB onto the wire. 733 * SKB onto the wire.
736 */ 734 */
737static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) 735static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
738{ 736{
739 int tso_segs = tcp_skb_pcount(skb); 737 int tso_segs = tcp_skb_pcount(skb);
740 738
741 if (!tso_segs) { 739 if (!tso_segs ||
742 tcp_set_skb_tso_segs(sk, skb); 740 (tso_segs > 1 &&
741 skb_shinfo(skb)->tso_size != mss_now)) {
742 tcp_set_skb_tso_segs(sk, skb, mss_now);
743 tso_segs = tcp_skb_pcount(skb); 743 tso_segs = tcp_skb_pcount(skb);
744 } 744 }
745 return tso_segs; 745 return tso_segs;
@@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
817 struct tcp_sock *tp = tcp_sk(sk); 817 struct tcp_sock *tp = tcp_sk(sk);
818 unsigned int cwnd_quota; 818 unsigned int cwnd_quota;
819 819
820 tcp_init_tso_segs(sk, skb); 820 tcp_init_tso_segs(sk, skb, cur_mss);
821 821
822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) 822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
823 return 0; 823 return 0;
@@ -854,7 +854,7 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
854 * know that all the data is in scatter-gather pages, and that the 854 * know that all the data is in scatter-gather pages, and that the
855 * packet has never been sent out before (and thus is not cloned). 855 * packet has never been sent out before (and thus is not cloned).
856 */ 856 */
857static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) 857static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
858{ 858{
859 struct sk_buff *buff; 859 struct sk_buff *buff;
860 int nlen = skb->len - len; 860 int nlen = skb->len - len;
@@ -887,8 +887,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
887 skb_split(skb, buff, len); 887 skb_split(skb, buff, len);
888 888
889 /* Fix up tso_factor for both original and new SKB. */ 889 /* Fix up tso_factor for both original and new SKB. */
890 tcp_set_skb_tso_segs(sk, skb); 890 tcp_set_skb_tso_segs(sk, skb, mss_now);
891 tcp_set_skb_tso_segs(sk, buff); 891 tcp_set_skb_tso_segs(sk, buff, mss_now);
892 892
893 /* Link BUFF into the send queue. */ 893 /* Link BUFF into the send queue. */
894 skb_header_release(buff); 894 skb_header_release(buff);
@@ -976,7 +976,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
976 if (unlikely(!skb)) 976 if (unlikely(!skb))
977 return 0; 977 return 0;
978 978
979 tso_segs = tcp_init_tso_segs(sk, skb); 979 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
980 cwnd_quota = tcp_cwnd_test(tp, skb); 980 cwnd_quota = tcp_cwnd_test(tp, skb);
981 if (unlikely(!cwnd_quota)) 981 if (unlikely(!cwnd_quota))
982 goto out; 982 goto out;
@@ -1006,11 +1006,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1006 limit = skb->len - trim; 1006 limit = skb->len - trim;
1007 } 1007 }
1008 if (skb->len > limit) { 1008 if (skb->len > limit) {
1009 if (tso_fragment(sk, skb, limit)) 1009 if (tso_fragment(sk, skb, limit, mss_now))
1010 break; 1010 break;
1011 } 1011 }
1012 } else if (unlikely(skb->len > mss_now)) { 1012 } else if (unlikely(skb->len > mss_now)) {
1013 if (unlikely(tcp_fragment(sk, skb, mss_now))) 1013 if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
1014 break; 1014 break;
1015 } 1015 }
1016 1016
@@ -1039,7 +1039,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1039 skb = sk->sk_send_head; 1039 skb = sk->sk_send_head;
1040 if (!skb) 1040 if (!skb)
1041 break; 1041 break;
1042 tso_segs = tcp_init_tso_segs(sk, skb); 1042 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1043 } 1043 }
1044 1044
1045 if (likely(sent_pkts)) { 1045 if (likely(sent_pkts)) {
@@ -1076,7 +1076,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1076 1076
1077 BUG_ON(!skb || skb->len < mss_now); 1077 BUG_ON(!skb || skb->len < mss_now);
1078 1078
1079 tso_segs = tcp_init_tso_segs(sk, skb); 1079 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); 1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1081 1081
1082 if (likely(cwnd_quota)) { 1082 if (likely(cwnd_quota)) {
@@ -1093,11 +1093,11 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1093 limit = skb->len - trim; 1093 limit = skb->len - trim;
1094 } 1094 }
1095 if (skb->len > limit) { 1095 if (skb->len > limit) {
1096 if (unlikely(tso_fragment(sk, skb, limit))) 1096 if (unlikely(tso_fragment(sk, skb, limit, mss_now)))
1097 return; 1097 return;
1098 } 1098 }
1099 } else if (unlikely(skb->len > mss_now)) { 1099 } else if (unlikely(skb->len > mss_now)) {
1100 if (unlikely(tcp_fragment(sk, skb, mss_now))) 1100 if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
1101 return; 1101 return;
1102 } 1102 }
1103 1103
@@ -1388,7 +1388,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1388 int old_factor = tcp_skb_pcount(skb); 1388 int old_factor = tcp_skb_pcount(skb);
1389 int new_factor; 1389 int new_factor;
1390 1390
1391 if (tcp_fragment(sk, skb, cur_mss)) 1391 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1392 return -ENOMEM; /* We'll try again later. */ 1392 return -ENOMEM; /* We'll try again later. */
1393 1393
1394 /* New SKB created, account for it. */ 1394 /* New SKB created, account for it. */
@@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk)
1991 skb->len > mss) { 1991 skb->len > mss) {
1992 seg_size = min(seg_size, mss); 1992 seg_size = min(seg_size, mss);
1993 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 1993 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1994 if (tcp_fragment(sk, skb, seg_size)) 1994 if (tcp_fragment(sk, skb, seg_size, mss))
1995 return -1; 1995 return -1;
1996 /* SWS override triggered forced fragmentation. 1996 /* SWS override triggered forced fragmentation.
1997 * Disable TSO, the connection is too sick. */ 1997 * Disable TSO, the connection is too sick. */
@@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk)
2000 sk->sk_route_caps &= ~NETIF_F_TSO; 2000 sk->sk_route_caps &= ~NETIF_F_TSO;
2001 } 2001 }
2002 } else if (!tcp_skb_pcount(skb)) 2002 } else if (!tcp_skb_pcount(skb))
2003 tcp_set_skb_tso_segs(sk, skb); 2003 tcp_set_skb_tso_segs(sk, skb, mss);
2004 2004
2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2006 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2006 TCP_SKB_CB(skb)->when = tcp_time_stamp;