aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c94
1 files changed, 39 insertions, 55 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f2..c1f259d2d33b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
441 *ptr++ = htonl(sp[this_sack].end_seq); 441 *ptr++ = htonl(sp[this_sack].end_seq);
442 } 442 }
443 443
444 if (tp->rx_opt.dsack) { 444 tp->rx_opt.dsack = 0;
445 tp->rx_opt.dsack = 0;
446 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
447 }
448 } 445 }
449} 446}
450 447
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
550 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; 547 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
551 struct tcp_sock *tp = tcp_sk(sk); 548 struct tcp_sock *tp = tcp_sk(sk);
552 unsigned size = 0; 549 unsigned size = 0;
550 unsigned int eff_sacks;
553 551
554#ifdef CONFIG_TCP_MD5SIG 552#ifdef CONFIG_TCP_MD5SIG
555 *md5 = tp->af_specific->md5_lookup(sk, sk); 553 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
568 size += TCPOLEN_TSTAMP_ALIGNED; 566 size += TCPOLEN_TSTAMP_ALIGNED;
569 } 567 }
570 568
571 if (unlikely(tp->rx_opt.eff_sacks)) { 569 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
570 if (unlikely(eff_sacks)) {
572 const unsigned remaining = MAX_TCP_OPTION_SPACE - size; 571 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
573 opts->num_sack_blocks = 572 opts->num_sack_blocks =
574 min_t(unsigned, tp->rx_opt.eff_sacks, 573 min_t(unsigned, eff_sacks,
575 (remaining - TCPOLEN_SACK_BASE_ALIGNED) / 574 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
576 TCPOLEN_SACK_PERBLOCK); 575 TCPOLEN_SACK_PERBLOCK);
577 size += TCPOLEN_SACK_BASE_ALIGNED + 576 size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
663 th->urg_ptr = 0; 662 th->urg_ptr = 0;
664 663
665 /* The urg_mode check is necessary during a below snd_una win probe */ 664 /* The urg_mode check is necessary during a below snd_una win probe */
666 if (unlikely(tcp_urg_mode(tp) && 665 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
667 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { 666 if (before(tp->snd_up, tcb->seq + 0x10000)) {
668 th->urg_ptr = htons(tp->snd_up - tcb->seq); 667 th->urg_ptr = htons(tp->snd_up - tcb->seq);
669 th->urg = 1; 668 th->urg = 1;
669 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
670 th->urg_ptr = 0xFFFF;
671 th->urg = 1;
672 }
670 } 673 }
671 674
672 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); 675 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
@@ -763,11 +766,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
763 struct sk_buff *buff; 766 struct sk_buff *buff;
764 int nsize, old_factor; 767 int nsize, old_factor;
765 int nlen; 768 int nlen;
766 u16 flags; 769 u8 flags;
767 770
768 BUG_ON(len > skb->len); 771 BUG_ON(len > skb->len);
769 772
770 tcp_clear_retrans_hints_partial(tp);
771 nsize = skb_headlen(skb) - len; 773 nsize = skb_headlen(skb) - len;
772 if (nsize < 0) 774 if (nsize < 0)
773 nsize = 0; 775 nsize = 0;
@@ -850,6 +852,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
850 tcp_verify_left_out(tp); 852 tcp_verify_left_out(tp);
851 } 853 }
852 tcp_adjust_fackets_out(sk, skb, diff); 854 tcp_adjust_fackets_out(sk, skb, diff);
855
856 if (tp->lost_skb_hint &&
857 before(TCP_SKB_CB(skb)->seq,
858 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
859 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
860 tp->lost_cnt_hint -= diff;
853 } 861 }
854 862
855 /* Link BUFF into the send queue. */ 863 /* Link BUFF into the send queue. */
@@ -913,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
913 * factor and mss. 921 * factor and mss.
914 */ 922 */
915 if (tcp_skb_pcount(skb) > 1) 923 if (tcp_skb_pcount(skb) > 1)
916 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); 924 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
917 925
918 return 0; 926 return 0;
919} 927}
@@ -974,15 +982,6 @@ void tcp_mtup_init(struct sock *sk)
974 icsk->icsk_mtup.probe_size = 0; 982 icsk->icsk_mtup.probe_size = 0;
975} 983}
976 984
977/* Bound MSS / TSO packet size with the half of the window */
978static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
979{
980 if (tp->max_window && pktsize > (tp->max_window >> 1))
981 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
982 else
983 return pktsize;
984}
985
986/* This function synchronize snd mss to current pmtu/exthdr set. 985/* This function synchronize snd mss to current pmtu/exthdr set.
987 986
988 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 987 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -1029,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1029/* Compute the current effective MSS, taking SACKs and IP options, 1028/* Compute the current effective MSS, taking SACKs and IP options,
1030 * and even PMTU discovery events into account. 1029 * and even PMTU discovery events into account.
1031 */ 1030 */
1032unsigned int tcp_current_mss(struct sock *sk, int large_allowed) 1031unsigned int tcp_current_mss(struct sock *sk)
1033{ 1032{
1034 struct tcp_sock *tp = tcp_sk(sk); 1033 struct tcp_sock *tp = tcp_sk(sk);
1035 struct dst_entry *dst = __sk_dst_get(sk); 1034 struct dst_entry *dst = __sk_dst_get(sk);
1036 u32 mss_now; 1035 u32 mss_now;
1037 u16 xmit_size_goal;
1038 int doing_tso = 0;
1039 unsigned header_len; 1036 unsigned header_len;
1040 struct tcp_out_options opts; 1037 struct tcp_out_options opts;
1041 struct tcp_md5sig_key *md5; 1038 struct tcp_md5sig_key *md5;
1042 1039
1043 mss_now = tp->mss_cache; 1040 mss_now = tp->mss_cache;
1044 1041
1045 if (large_allowed && sk_can_gso(sk))
1046 doing_tso = 1;
1047
1048 if (dst) { 1042 if (dst) {
1049 u32 mtu = dst_mtu(dst); 1043 u32 mtu = dst_mtu(dst);
1050 if (mtu != inet_csk(sk)->icsk_pmtu_cookie) 1044 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
@@ -1062,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
1062 mss_now -= delta; 1056 mss_now -= delta;
1063 } 1057 }
1064 1058
1065 xmit_size_goal = mss_now;
1066
1067 if (doing_tso) {
1068 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
1069 inet_csk(sk)->icsk_af_ops->net_header_len -
1070 inet_csk(sk)->icsk_ext_hdr_len -
1071 tp->tcp_header_len);
1072
1073 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
1074 xmit_size_goal -= (xmit_size_goal % mss_now);
1075 }
1076 tp->xmit_size_goal = xmit_size_goal;
1077
1078 return mss_now; 1059 return mss_now;
1079} 1060}
1080 1061
@@ -1256,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk)
1256 struct sk_buff *skb = tcp_send_head(sk); 1237 struct sk_buff *skb = tcp_send_head(sk);
1257 1238
1258 return (skb && 1239 return (skb &&
1259 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), 1240 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1260 (tcp_skb_is_last(sk, skb) ? 1241 (tcp_skb_is_last(sk, skb) ?
1261 tp->nonagle : TCP_NAGLE_PUSH))); 1242 tp->nonagle : TCP_NAGLE_PUSH)));
1262} 1243}
@@ -1273,7 +1254,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1273{ 1254{
1274 struct sk_buff *buff; 1255 struct sk_buff *buff;
1275 int nlen = skb->len - len; 1256 int nlen = skb->len - len;
1276 u16 flags; 1257 u8 flags;
1277 1258
1278 /* All of a TSO frame must be composed of paged data. */ 1259 /* All of a TSO frame must be composed of paged data. */
1279 if (skb->len != skb->data_len) 1260 if (skb->len != skb->data_len)
@@ -1352,6 +1333,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1352 if (limit >= sk->sk_gso_max_size) 1333 if (limit >= sk->sk_gso_max_size)
1353 goto send_now; 1334 goto send_now;
1354 1335
1336 /* Middle in queue won't get any more data, full sendable already? */
1337 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1338 goto send_now;
1339
1355 if (sysctl_tcp_tso_win_divisor) { 1340 if (sysctl_tcp_tso_win_divisor) {
1356 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1341 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1357 1342
@@ -1405,11 +1390,11 @@ static int tcp_mtu_probe(struct sock *sk)
1405 icsk->icsk_mtup.probe_size || 1390 icsk->icsk_mtup.probe_size ||
1406 inet_csk(sk)->icsk_ca_state != TCP_CA_Open || 1391 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1407 tp->snd_cwnd < 11 || 1392 tp->snd_cwnd < 11 ||
1408 tp->rx_opt.eff_sacks) 1393 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1409 return -1; 1394 return -1;
1410 1395
1411 /* Very simple search strategy: just double the MSS. */ 1396 /* Very simple search strategy: just double the MSS. */
1412 mss_now = tcp_current_mss(sk, 0); 1397 mss_now = tcp_current_mss(sk);
1413 probe_size = 2 * tp->mss_cache; 1398 probe_size = 2 * tp->mss_cache;
1414 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; 1399 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1415 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1400 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
@@ -1754,11 +1739,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1754 struct tcp_sock *tp = tcp_sk(sk); 1739 struct tcp_sock *tp = tcp_sk(sk);
1755 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1740 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1756 int skb_size, next_skb_size; 1741 int skb_size, next_skb_size;
1757 u16 flags;
1758 1742
1759 skb_size = skb->len; 1743 skb_size = skb->len;
1760 next_skb_size = next_skb->len; 1744 next_skb_size = next_skb->len;
1761 flags = TCP_SKB_CB(skb)->flags;
1762 1745
1763 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); 1746 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1764 1747
@@ -1778,9 +1761,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1778 /* Update sequence range on original skb. */ 1761 /* Update sequence range on original skb. */
1779 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1762 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1780 1763
1781 /* Merge over control information. */ 1764 /* Merge over control information. This moves PSH/FIN etc. over */
1782 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1765 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
1783 TCP_SKB_CB(skb)->flags = flags;
1784 1766
1785 /* All done, get rid of second SKB and account for it so 1767 /* All done, get rid of second SKB and account for it so
1786 * packet counting does not break. 1768 * packet counting does not break.
@@ -1894,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1894 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 1876 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1895 return -EHOSTUNREACH; /* Routing failure or similar. */ 1877 return -EHOSTUNREACH; /* Routing failure or similar. */
1896 1878
1897 cur_mss = tcp_current_mss(sk, 0); 1879 cur_mss = tcp_current_mss(sk);
1898 1880
1899 /* If receiver has shrunk his window, and skb is out of 1881 /* If receiver has shrunk his window, and skb is out of
1900 * new window, do not retransmit it. The exception is the 1882 * new window, do not retransmit it. The exception is the
@@ -1908,6 +1890,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1908 if (skb->len > cur_mss) { 1890 if (skb->len > cur_mss) {
1909 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1891 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1910 return -ENOMEM; /* We'll try again later. */ 1892 return -ENOMEM; /* We'll try again later. */
1893 } else {
1894 tcp_init_tso_segs(sk, skb, cur_mss);
1911 } 1895 }
1912 1896
1913 tcp_retrans_try_collapse(sk, skb, cur_mss); 1897 tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -2061,7 +2045,7 @@ begin_fwd:
2061 goto begin_fwd; 2045 goto begin_fwd;
2062 2046
2063 } else if (!(sacked & TCPCB_LOST)) { 2047 } else if (!(sacked & TCPCB_LOST)) {
2064 if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) 2048 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2065 hole = skb; 2049 hole = skb;
2066 continue; 2050 continue;
2067 2051
@@ -2100,7 +2084,7 @@ void tcp_send_fin(struct sock *sk)
2100 * unsent frames. But be careful about outgoing SACKS 2084 * unsent frames. But be careful about outgoing SACKS
2101 * and IP options. 2085 * and IP options.
2102 */ 2086 */
2103 mss_now = tcp_current_mss(sk, 1); 2087 mss_now = tcp_current_mss(sk);
2104 2088
2105 if (tcp_send_head(sk) != NULL) { 2089 if (tcp_send_head(sk) != NULL) {
2106 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2090 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -2325,7 +2309,7 @@ static void tcp_connect_init(struct sock *sk)
2325 sk->sk_err = 0; 2309 sk->sk_err = 0;
2326 sock_reset_flag(sk, SOCK_DONE); 2310 sock_reset_flag(sk, SOCK_DONE);
2327 tp->snd_wnd = 0; 2311 tp->snd_wnd = 0;
2328 tcp_init_wl(tp, tp->write_seq, 0); 2312 tcp_init_wl(tp, 0);
2329 tp->snd_una = tp->write_seq; 2313 tp->snd_una = tp->write_seq;
2330 tp->snd_sml = tp->write_seq; 2314 tp->snd_sml = tp->write_seq;
2331 tp->snd_up = tp->write_seq; 2315 tp->snd_up = tp->write_seq;
@@ -2512,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk)
2512 if ((skb = tcp_send_head(sk)) != NULL && 2496 if ((skb = tcp_send_head(sk)) != NULL &&
2513 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { 2497 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2514 int err; 2498 int err;
2515 unsigned int mss = tcp_current_mss(sk, 0); 2499 unsigned int mss = tcp_current_mss(sk);
2516 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 2500 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2517 2501
2518 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) 2502 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))