diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 94 |
1 files changed, 39 insertions, 55 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index da2c3b8794f2..c1f259d2d33b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
441 | *ptr++ = htonl(sp[this_sack].end_seq); | 441 | *ptr++ = htonl(sp[this_sack].end_seq); |
442 | } | 442 | } |
443 | 443 | ||
444 | if (tp->rx_opt.dsack) { | 444 | tp->rx_opt.dsack = 0; |
445 | tp->rx_opt.dsack = 0; | ||
446 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; | ||
447 | } | ||
448 | } | 445 | } |
449 | } | 446 | } |
450 | 447 | ||
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
550 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | 547 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; |
551 | struct tcp_sock *tp = tcp_sk(sk); | 548 | struct tcp_sock *tp = tcp_sk(sk); |
552 | unsigned size = 0; | 549 | unsigned size = 0; |
550 | unsigned int eff_sacks; | ||
553 | 551 | ||
554 | #ifdef CONFIG_TCP_MD5SIG | 552 | #ifdef CONFIG_TCP_MD5SIG |
555 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 553 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
568 | size += TCPOLEN_TSTAMP_ALIGNED; | 566 | size += TCPOLEN_TSTAMP_ALIGNED; |
569 | } | 567 | } |
570 | 568 | ||
571 | if (unlikely(tp->rx_opt.eff_sacks)) { | 569 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; |
570 | if (unlikely(eff_sacks)) { | ||
572 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | 571 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; |
573 | opts->num_sack_blocks = | 572 | opts->num_sack_blocks = |
574 | min_t(unsigned, tp->rx_opt.eff_sacks, | 573 | min_t(unsigned, eff_sacks, |
575 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | 574 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / |
576 | TCPOLEN_SACK_PERBLOCK); | 575 | TCPOLEN_SACK_PERBLOCK); |
577 | size += TCPOLEN_SACK_BASE_ALIGNED + | 576 | size += TCPOLEN_SACK_BASE_ALIGNED + |
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
663 | th->urg_ptr = 0; | 662 | th->urg_ptr = 0; |
664 | 663 | ||
665 | /* The urg_mode check is necessary during a below snd_una win probe */ | 664 | /* The urg_mode check is necessary during a below snd_una win probe */ |
666 | if (unlikely(tcp_urg_mode(tp) && | 665 | if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { |
667 | between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { | 666 | if (before(tp->snd_up, tcb->seq + 0x10000)) { |
668 | th->urg_ptr = htons(tp->snd_up - tcb->seq); | 667 | th->urg_ptr = htons(tp->snd_up - tcb->seq); |
669 | th->urg = 1; | 668 | th->urg = 1; |
669 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { | ||
670 | th->urg_ptr = 0xFFFF; | ||
671 | th->urg = 1; | ||
672 | } | ||
670 | } | 673 | } |
671 | 674 | ||
672 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 675 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
@@ -763,11 +766,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
763 | struct sk_buff *buff; | 766 | struct sk_buff *buff; |
764 | int nsize, old_factor; | 767 | int nsize, old_factor; |
765 | int nlen; | 768 | int nlen; |
766 | u16 flags; | 769 | u8 flags; |
767 | 770 | ||
768 | BUG_ON(len > skb->len); | 771 | BUG_ON(len > skb->len); |
769 | 772 | ||
770 | tcp_clear_retrans_hints_partial(tp); | ||
771 | nsize = skb_headlen(skb) - len; | 773 | nsize = skb_headlen(skb) - len; |
772 | if (nsize < 0) | 774 | if (nsize < 0) |
773 | nsize = 0; | 775 | nsize = 0; |
@@ -850,6 +852,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
850 | tcp_verify_left_out(tp); | 852 | tcp_verify_left_out(tp); |
851 | } | 853 | } |
852 | tcp_adjust_fackets_out(sk, skb, diff); | 854 | tcp_adjust_fackets_out(sk, skb, diff); |
855 | |||
856 | if (tp->lost_skb_hint && | ||
857 | before(TCP_SKB_CB(skb)->seq, | ||
858 | TCP_SKB_CB(tp->lost_skb_hint)->seq) && | ||
859 | (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) | ||
860 | tp->lost_cnt_hint -= diff; | ||
853 | } | 861 | } |
854 | 862 | ||
855 | /* Link BUFF into the send queue. */ | 863 | /* Link BUFF into the send queue. */ |
@@ -913,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
913 | * factor and mss. | 921 | * factor and mss. |
914 | */ | 922 | */ |
915 | if (tcp_skb_pcount(skb) > 1) | 923 | if (tcp_skb_pcount(skb) > 1) |
916 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); | 924 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); |
917 | 925 | ||
918 | return 0; | 926 | return 0; |
919 | } | 927 | } |
@@ -974,15 +982,6 @@ void tcp_mtup_init(struct sock *sk) | |||
974 | icsk->icsk_mtup.probe_size = 0; | 982 | icsk->icsk_mtup.probe_size = 0; |
975 | } | 983 | } |
976 | 984 | ||
977 | /* Bound MSS / TSO packet size with the half of the window */ | ||
978 | static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) | ||
979 | { | ||
980 | if (tp->max_window && pktsize > (tp->max_window >> 1)) | ||
981 | return max(tp->max_window >> 1, 68U - tp->tcp_header_len); | ||
982 | else | ||
983 | return pktsize; | ||
984 | } | ||
985 | |||
986 | /* This function synchronize snd mss to current pmtu/exthdr set. | 985 | /* This function synchronize snd mss to current pmtu/exthdr set. |
987 | 986 | ||
988 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts | 987 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts |
@@ -1029,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
1029 | /* Compute the current effective MSS, taking SACKs and IP options, | 1028 | /* Compute the current effective MSS, taking SACKs and IP options, |
1030 | * and even PMTU discovery events into account. | 1029 | * and even PMTU discovery events into account. |
1031 | */ | 1030 | */ |
1032 | unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | 1031 | unsigned int tcp_current_mss(struct sock *sk) |
1033 | { | 1032 | { |
1034 | struct tcp_sock *tp = tcp_sk(sk); | 1033 | struct tcp_sock *tp = tcp_sk(sk); |
1035 | struct dst_entry *dst = __sk_dst_get(sk); | 1034 | struct dst_entry *dst = __sk_dst_get(sk); |
1036 | u32 mss_now; | 1035 | u32 mss_now; |
1037 | u16 xmit_size_goal; | ||
1038 | int doing_tso = 0; | ||
1039 | unsigned header_len; | 1036 | unsigned header_len; |
1040 | struct tcp_out_options opts; | 1037 | struct tcp_out_options opts; |
1041 | struct tcp_md5sig_key *md5; | 1038 | struct tcp_md5sig_key *md5; |
1042 | 1039 | ||
1043 | mss_now = tp->mss_cache; | 1040 | mss_now = tp->mss_cache; |
1044 | 1041 | ||
1045 | if (large_allowed && sk_can_gso(sk)) | ||
1046 | doing_tso = 1; | ||
1047 | |||
1048 | if (dst) { | 1042 | if (dst) { |
1049 | u32 mtu = dst_mtu(dst); | 1043 | u32 mtu = dst_mtu(dst); |
1050 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) | 1044 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
@@ -1062,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
1062 | mss_now -= delta; | 1056 | mss_now -= delta; |
1063 | } | 1057 | } |
1064 | 1058 | ||
1065 | xmit_size_goal = mss_now; | ||
1066 | |||
1067 | if (doing_tso) { | ||
1068 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - | ||
1069 | inet_csk(sk)->icsk_af_ops->net_header_len - | ||
1070 | inet_csk(sk)->icsk_ext_hdr_len - | ||
1071 | tp->tcp_header_len); | ||
1072 | |||
1073 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
1074 | xmit_size_goal -= (xmit_size_goal % mss_now); | ||
1075 | } | ||
1076 | tp->xmit_size_goal = xmit_size_goal; | ||
1077 | |||
1078 | return mss_now; | 1059 | return mss_now; |
1079 | } | 1060 | } |
1080 | 1061 | ||
@@ -1256,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk) | |||
1256 | struct sk_buff *skb = tcp_send_head(sk); | 1237 | struct sk_buff *skb = tcp_send_head(sk); |
1257 | 1238 | ||
1258 | return (skb && | 1239 | return (skb && |
1259 | tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), | 1240 | tcp_snd_test(sk, skb, tcp_current_mss(sk), |
1260 | (tcp_skb_is_last(sk, skb) ? | 1241 | (tcp_skb_is_last(sk, skb) ? |
1261 | tp->nonagle : TCP_NAGLE_PUSH))); | 1242 | tp->nonagle : TCP_NAGLE_PUSH))); |
1262 | } | 1243 | } |
@@ -1273,7 +1254,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1273 | { | 1254 | { |
1274 | struct sk_buff *buff; | 1255 | struct sk_buff *buff; |
1275 | int nlen = skb->len - len; | 1256 | int nlen = skb->len - len; |
1276 | u16 flags; | 1257 | u8 flags; |
1277 | 1258 | ||
1278 | /* All of a TSO frame must be composed of paged data. */ | 1259 | /* All of a TSO frame must be composed of paged data. */ |
1279 | if (skb->len != skb->data_len) | 1260 | if (skb->len != skb->data_len) |
@@ -1352,6 +1333,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1352 | if (limit >= sk->sk_gso_max_size) | 1333 | if (limit >= sk->sk_gso_max_size) |
1353 | goto send_now; | 1334 | goto send_now; |
1354 | 1335 | ||
1336 | /* Middle in queue won't get any more data, full sendable already? */ | ||
1337 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) | ||
1338 | goto send_now; | ||
1339 | |||
1355 | if (sysctl_tcp_tso_win_divisor) { | 1340 | if (sysctl_tcp_tso_win_divisor) { |
1356 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); | 1341 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); |
1357 | 1342 | ||
@@ -1405,11 +1390,11 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1405 | icsk->icsk_mtup.probe_size || | 1390 | icsk->icsk_mtup.probe_size || |
1406 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || | 1391 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || |
1407 | tp->snd_cwnd < 11 || | 1392 | tp->snd_cwnd < 11 || |
1408 | tp->rx_opt.eff_sacks) | 1393 | tp->rx_opt.num_sacks || tp->rx_opt.dsack) |
1409 | return -1; | 1394 | return -1; |
1410 | 1395 | ||
1411 | /* Very simple search strategy: just double the MSS. */ | 1396 | /* Very simple search strategy: just double the MSS. */ |
1412 | mss_now = tcp_current_mss(sk, 0); | 1397 | mss_now = tcp_current_mss(sk); |
1413 | probe_size = 2 * tp->mss_cache; | 1398 | probe_size = 2 * tp->mss_cache; |
1414 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; | 1399 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; |
1415 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { | 1400 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { |
@@ -1754,11 +1739,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1754 | struct tcp_sock *tp = tcp_sk(sk); | 1739 | struct tcp_sock *tp = tcp_sk(sk); |
1755 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 1740 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1756 | int skb_size, next_skb_size; | 1741 | int skb_size, next_skb_size; |
1757 | u16 flags; | ||
1758 | 1742 | ||
1759 | skb_size = skb->len; | 1743 | skb_size = skb->len; |
1760 | next_skb_size = next_skb->len; | 1744 | next_skb_size = next_skb->len; |
1761 | flags = TCP_SKB_CB(skb)->flags; | ||
1762 | 1745 | ||
1763 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 1746 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
1764 | 1747 | ||
@@ -1778,9 +1761,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1778 | /* Update sequence range on original skb. */ | 1761 | /* Update sequence range on original skb. */ |
1779 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; | 1762 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; |
1780 | 1763 | ||
1781 | /* Merge over control information. */ | 1764 | /* Merge over control information. This moves PSH/FIN etc. over */ |
1782 | flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ | 1765 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; |
1783 | TCP_SKB_CB(skb)->flags = flags; | ||
1784 | 1766 | ||
1785 | /* All done, get rid of second SKB and account for it so | 1767 | /* All done, get rid of second SKB and account for it so |
1786 | * packet counting does not break. | 1768 | * packet counting does not break. |
@@ -1894,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1894 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) | 1876 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) |
1895 | return -EHOSTUNREACH; /* Routing failure or similar. */ | 1877 | return -EHOSTUNREACH; /* Routing failure or similar. */ |
1896 | 1878 | ||
1897 | cur_mss = tcp_current_mss(sk, 0); | 1879 | cur_mss = tcp_current_mss(sk); |
1898 | 1880 | ||
1899 | /* If receiver has shrunk his window, and skb is out of | 1881 | /* If receiver has shrunk his window, and skb is out of |
1900 | * new window, do not retransmit it. The exception is the | 1882 | * new window, do not retransmit it. The exception is the |
@@ -1908,6 +1890,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1908 | if (skb->len > cur_mss) { | 1890 | if (skb->len > cur_mss) { |
1909 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) | 1891 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) |
1910 | return -ENOMEM; /* We'll try again later. */ | 1892 | return -ENOMEM; /* We'll try again later. */ |
1893 | } else { | ||
1894 | tcp_init_tso_segs(sk, skb, cur_mss); | ||
1911 | } | 1895 | } |
1912 | 1896 | ||
1913 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 1897 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
@@ -2061,7 +2045,7 @@ begin_fwd: | |||
2061 | goto begin_fwd; | 2045 | goto begin_fwd; |
2062 | 2046 | ||
2063 | } else if (!(sacked & TCPCB_LOST)) { | 2047 | } else if (!(sacked & TCPCB_LOST)) { |
2064 | if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) | 2048 | if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
2065 | hole = skb; | 2049 | hole = skb; |
2066 | continue; | 2050 | continue; |
2067 | 2051 | ||
@@ -2100,7 +2084,7 @@ void tcp_send_fin(struct sock *sk) | |||
2100 | * unsent frames. But be careful about outgoing SACKS | 2084 | * unsent frames. But be careful about outgoing SACKS |
2101 | * and IP options. | 2085 | * and IP options. |
2102 | */ | 2086 | */ |
2103 | mss_now = tcp_current_mss(sk, 1); | 2087 | mss_now = tcp_current_mss(sk); |
2104 | 2088 | ||
2105 | if (tcp_send_head(sk) != NULL) { | 2089 | if (tcp_send_head(sk) != NULL) { |
2106 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2090 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; |
@@ -2325,7 +2309,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2325 | sk->sk_err = 0; | 2309 | sk->sk_err = 0; |
2326 | sock_reset_flag(sk, SOCK_DONE); | 2310 | sock_reset_flag(sk, SOCK_DONE); |
2327 | tp->snd_wnd = 0; | 2311 | tp->snd_wnd = 0; |
2328 | tcp_init_wl(tp, tp->write_seq, 0); | 2312 | tcp_init_wl(tp, 0); |
2329 | tp->snd_una = tp->write_seq; | 2313 | tp->snd_una = tp->write_seq; |
2330 | tp->snd_sml = tp->write_seq; | 2314 | tp->snd_sml = tp->write_seq; |
2331 | tp->snd_up = tp->write_seq; | 2315 | tp->snd_up = tp->write_seq; |
@@ -2512,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2512 | if ((skb = tcp_send_head(sk)) != NULL && | 2496 | if ((skb = tcp_send_head(sk)) != NULL && |
2513 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { | 2497 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { |
2514 | int err; | 2498 | int err; |
2515 | unsigned int mss = tcp_current_mss(sk, 0); | 2499 | unsigned int mss = tcp_current_mss(sk); |
2516 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 2500 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
2517 | 2501 | ||
2518 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) | 2502 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) |