diff options
author | David S. Miller <davem@davemloft.net> | 2005-08-04 22:52:01 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-08-05 00:43:14 -0400 |
commit | 846998ae87a80b0fd45b4cf5cf001a159d746f27 (patch) | |
tree | a81d0f6d2b1f1817d557bd5f5611c81ff9026b51 | |
parent | 0c3dba1534569734ba353afdf3f11def497ff2ac (diff) |
[PATCH] tcp: fix TSO sizing bugs
MSS changes can be lost since we preemptively initialize the tso_segs count
for an SKB before we %100 commit to sending it out.
So, by the time we send it out, the tso_size information can be stale due
to PMTU events. This mucks up all of the logic in our send engine, and can
even result in the BUG() triggering in tcp_tso_should_defer().
Another problem we have is that we're storing the tp->mss_cache, not the
SACK block normalized MSS, as the tso_size. That's wrong too.
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | net/ipv4/tcp_output.c | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3f8ea1bfa9c..e118b4b5b326 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
403 | sk->sk_send_head = skb; | 403 | sk->sk_send_head = skb; |
404 | } | 404 | } |
405 | 405 | ||
406 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) | 406 | static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) |
407 | { | 407 | { |
408 | struct tcp_sock *tp = tcp_sk(sk); | 408 | if (skb->len <= mss_now || |
409 | |||
410 | if (skb->len <= tp->mss_cache || | ||
411 | !(sk->sk_route_caps & NETIF_F_TSO)) { | 409 | !(sk->sk_route_caps & NETIF_F_TSO)) { |
412 | /* Avoid the costly divide in the normal | 410 | /* Avoid the costly divide in the normal |
413 | * non-TSO case. | 411 | * non-TSO case. |
@@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) | |||
417 | } else { | 415 | } else { |
418 | unsigned int factor; | 416 | unsigned int factor; |
419 | 417 | ||
420 | factor = skb->len + (tp->mss_cache - 1); | 418 | factor = skb->len + (mss_now - 1); |
421 | factor /= tp->mss_cache; | 419 | factor /= mss_now; |
422 | skb_shinfo(skb)->tso_segs = factor; | 420 | skb_shinfo(skb)->tso_segs = factor; |
423 | skb_shinfo(skb)->tso_size = tp->mss_cache; | 421 | skb_shinfo(skb)->tso_size = mss_now; |
424 | } | 422 | } |
425 | } | 423 | } |
426 | 424 | ||
@@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) | |||
429 | * packet to the list. This won't be called frequently, I hope. | 427 | * packet to the list. This won't be called frequently, I hope. |
430 | * Remember, these are still headerless SKBs at this point. | 428 | * Remember, these are still headerless SKBs at this point. |
431 | */ | 429 | */ |
432 | static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) | 430 | static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) |
433 | { | 431 | { |
434 | struct tcp_sock *tp = tcp_sk(sk); | 432 | struct tcp_sock *tp = tcp_sk(sk); |
435 | struct sk_buff *buff; | 433 | struct sk_buff *buff; |
@@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) | |||
492 | } | 490 | } |
493 | 491 | ||
494 | /* Fix up tso_factor for both original and new SKB. */ | 492 | /* Fix up tso_factor for both original and new SKB. */ |
495 | tcp_set_skb_tso_segs(sk, skb); | 493 | tcp_set_skb_tso_segs(sk, skb, mss_now); |
496 | tcp_set_skb_tso_segs(sk, buff); | 494 | tcp_set_skb_tso_segs(sk, buff, mss_now); |
497 | 495 | ||
498 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { | 496 | if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { |
499 | tp->lost_out += tcp_skb_pcount(skb); | 497 | tp->lost_out += tcp_skb_pcount(skb); |
@@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
569 | * factor and mss. | 567 | * factor and mss. |
570 | */ | 568 | */ |
571 | if (tcp_skb_pcount(skb) > 1) | 569 | if (tcp_skb_pcount(skb) > 1) |
572 | tcp_set_skb_tso_segs(sk, skb); | 570 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); |
573 | 571 | ||
574 | return 0; | 572 | return 0; |
575 | } | 573 | } |
@@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk | |||
734 | /* This must be invoked the first time we consider transmitting | 732 | /* This must be invoked the first time we consider transmitting |
735 | * SKB onto the wire. | 733 | * SKB onto the wire. |
736 | */ | 734 | */ |
737 | static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) | 735 | static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) |
738 | { | 736 | { |
739 | int tso_segs = tcp_skb_pcount(skb); | 737 | int tso_segs = tcp_skb_pcount(skb); |
740 | 738 | ||
741 | if (!tso_segs) { | 739 | if (!tso_segs || |
742 | tcp_set_skb_tso_segs(sk, skb); | 740 | (tso_segs > 1 && |
741 | skb_shinfo(skb)->tso_size != mss_now)) { | ||
742 | tcp_set_skb_tso_segs(sk, skb, mss_now); | ||
743 | tso_segs = tcp_skb_pcount(skb); | 743 | tso_segs = tcp_skb_pcount(skb); |
744 | } | 744 | } |
745 | return tso_segs; | 745 | return tso_segs; |
@@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, | |||
817 | struct tcp_sock *tp = tcp_sk(sk); | 817 | struct tcp_sock *tp = tcp_sk(sk); |
818 | unsigned int cwnd_quota; | 818 | unsigned int cwnd_quota; |
819 | 819 | ||
820 | tcp_init_tso_segs(sk, skb); | 820 | tcp_init_tso_segs(sk, skb, cur_mss); |
821 | 821 | ||
822 | if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) | 822 | if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) |
823 | return 0; | 823 | return 0; |
@@ -854,7 +854,7 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) | |||
854 | * know that all the data is in scatter-gather pages, and that the | 854 | * know that all the data is in scatter-gather pages, and that the |
855 | * packet has never been sent out before (and thus is not cloned). | 855 | * packet has never been sent out before (and thus is not cloned). |
856 | */ | 856 | */ |
857 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) | 857 | static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) |
858 | { | 858 | { |
859 | struct sk_buff *buff; | 859 | struct sk_buff *buff; |
860 | int nlen = skb->len - len; | 860 | int nlen = skb->len - len; |
@@ -887,8 +887,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) | |||
887 | skb_split(skb, buff, len); | 887 | skb_split(skb, buff, len); |
888 | 888 | ||
889 | /* Fix up tso_factor for both original and new SKB. */ | 889 | /* Fix up tso_factor for both original and new SKB. */ |
890 | tcp_set_skb_tso_segs(sk, skb); | 890 | tcp_set_skb_tso_segs(sk, skb, mss_now); |
891 | tcp_set_skb_tso_segs(sk, buff); | 891 | tcp_set_skb_tso_segs(sk, buff, mss_now); |
892 | 892 | ||
893 | /* Link BUFF into the send queue. */ | 893 | /* Link BUFF into the send queue. */ |
894 | skb_header_release(buff); | 894 | skb_header_release(buff); |
@@ -976,7 +976,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
976 | if (unlikely(!skb)) | 976 | if (unlikely(!skb)) |
977 | return 0; | 977 | return 0; |
978 | 978 | ||
979 | tso_segs = tcp_init_tso_segs(sk, skb); | 979 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
980 | cwnd_quota = tcp_cwnd_test(tp, skb); | 980 | cwnd_quota = tcp_cwnd_test(tp, skb); |
981 | if (unlikely(!cwnd_quota)) | 981 | if (unlikely(!cwnd_quota)) |
982 | goto out; | 982 | goto out; |
@@ -1006,11 +1006,11 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1006 | limit = skb->len - trim; | 1006 | limit = skb->len - trim; |
1007 | } | 1007 | } |
1008 | if (skb->len > limit) { | 1008 | if (skb->len > limit) { |
1009 | if (tso_fragment(sk, skb, limit)) | 1009 | if (tso_fragment(sk, skb, limit, mss_now)) |
1010 | break; | 1010 | break; |
1011 | } | 1011 | } |
1012 | } else if (unlikely(skb->len > mss_now)) { | 1012 | } else if (unlikely(skb->len > mss_now)) { |
1013 | if (unlikely(tcp_fragment(sk, skb, mss_now))) | 1013 | if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) |
1014 | break; | 1014 | break; |
1015 | } | 1015 | } |
1016 | 1016 | ||
@@ -1039,7 +1039,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) | |||
1039 | skb = sk->sk_send_head; | 1039 | skb = sk->sk_send_head; |
1040 | if (!skb) | 1040 | if (!skb) |
1041 | break; | 1041 | break; |
1042 | tso_segs = tcp_init_tso_segs(sk, skb); | 1042 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
1043 | } | 1043 | } |
1044 | 1044 | ||
1045 | if (likely(sent_pkts)) { | 1045 | if (likely(sent_pkts)) { |
@@ -1076,7 +1076,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1076 | 1076 | ||
1077 | BUG_ON(!skb || skb->len < mss_now); | 1077 | BUG_ON(!skb || skb->len < mss_now); |
1078 | 1078 | ||
1079 | tso_segs = tcp_init_tso_segs(sk, skb); | 1079 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
1080 | cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); | 1080 | cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); |
1081 | 1081 | ||
1082 | if (likely(cwnd_quota)) { | 1082 | if (likely(cwnd_quota)) { |
@@ -1093,11 +1093,11 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) | |||
1093 | limit = skb->len - trim; | 1093 | limit = skb->len - trim; |
1094 | } | 1094 | } |
1095 | if (skb->len > limit) { | 1095 | if (skb->len > limit) { |
1096 | if (unlikely(tso_fragment(sk, skb, limit))) | 1096 | if (unlikely(tso_fragment(sk, skb, limit, mss_now))) |
1097 | return; | 1097 | return; |
1098 | } | 1098 | } |
1099 | } else if (unlikely(skb->len > mss_now)) { | 1099 | } else if (unlikely(skb->len > mss_now)) { |
1100 | if (unlikely(tcp_fragment(sk, skb, mss_now))) | 1100 | if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now))) |
1101 | return; | 1101 | return; |
1102 | } | 1102 | } |
1103 | 1103 | ||
@@ -1388,7 +1388,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1388 | int old_factor = tcp_skb_pcount(skb); | 1388 | int old_factor = tcp_skb_pcount(skb); |
1389 | int new_factor; | 1389 | int new_factor; |
1390 | 1390 | ||
1391 | if (tcp_fragment(sk, skb, cur_mss)) | 1391 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) |
1392 | return -ENOMEM; /* We'll try again later. */ | 1392 | return -ENOMEM; /* We'll try again later. */ |
1393 | 1393 | ||
1394 | /* New SKB created, account for it. */ | 1394 | /* New SKB created, account for it. */ |
@@ -1991,7 +1991,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
1991 | skb->len > mss) { | 1991 | skb->len > mss) { |
1992 | seg_size = min(seg_size, mss); | 1992 | seg_size = min(seg_size, mss); |
1993 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 1993 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
1994 | if (tcp_fragment(sk, skb, seg_size)) | 1994 | if (tcp_fragment(sk, skb, seg_size, mss)) |
1995 | return -1; | 1995 | return -1; |
1996 | /* SWS override triggered forced fragmentation. | 1996 | /* SWS override triggered forced fragmentation. |
1997 | * Disable TSO, the connection is too sick. */ | 1997 | * Disable TSO, the connection is too sick. */ |
@@ -2000,7 +2000,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2000 | sk->sk_route_caps &= ~NETIF_F_TSO; | 2000 | sk->sk_route_caps &= ~NETIF_F_TSO; |
2001 | } | 2001 | } |
2002 | } else if (!tcp_skb_pcount(skb)) | 2002 | } else if (!tcp_skb_pcount(skb)) |
2003 | tcp_set_skb_tso_segs(sk, skb); | 2003 | tcp_set_skb_tso_segs(sk, skb, mss); |
2004 | 2004 | ||
2005 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 2005 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
2006 | TCP_SKB_CB(skb)->when = tcp_time_stamp; | 2006 | TCP_SKB_CB(skb)->when = tcp_time_stamp; |