aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c651
1 files changed, 310 insertions, 341 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f4c1eef89af0..89f0188885c7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -61,27 +61,24 @@ int sysctl_tcp_base_mss __read_mostly = 512;
61/* By default, RFC2861 behavior. */ 61/* By default, RFC2861 behavior. */
62int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 62int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
63 63
64static inline void tcp_packets_out_inc(struct sock *sk, 64static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
65 const struct sk_buff *skb)
66{ 65{
67 struct tcp_sock *tp = tcp_sk(sk); 66 struct tcp_sock *tp = tcp_sk(sk);
68 int orig = tp->packets_out; 67 unsigned int prior_packets = tp->packets_out;
68
69 tcp_advance_send_head(sk, skb);
70 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
71
72 /* Don't override Nagle indefinately with F-RTO */
73 if (tp->frto_counter == 2)
74 tp->frto_counter = 3;
69 75
70 tp->packets_out += tcp_skb_pcount(skb); 76 tp->packets_out += tcp_skb_pcount(skb);
71 if (!orig) 77 if (!prior_packets)
72 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 78 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
73 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 79 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
74} 80}
75 81
76static void update_send_head(struct sock *sk, struct sk_buff *skb)
77{
78 struct tcp_sock *tp = tcp_sk(sk);
79
80 tcp_advance_send_head(sk, skb);
81 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
82 tcp_packets_out_inc(sk, skb);
83}
84
85/* SND.NXT, if window was not shrunk. 82/* SND.NXT, if window was not shrunk.
86 * If window has been shrunk, what should we make? It is not clear at all. 83 * If window has been shrunk, what should we make? It is not clear at all.
87 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( 84 * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
@@ -92,10 +89,10 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk)
92{ 89{
93 struct tcp_sock *tp = tcp_sk(sk); 90 struct tcp_sock *tp = tcp_sk(sk);
94 91
95 if (!before(tp->snd_una+tp->snd_wnd, tp->snd_nxt)) 92 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
96 return tp->snd_nxt; 93 return tp->snd_nxt;
97 else 94 else
98 return tp->snd_una+tp->snd_wnd; 95 return tcp_wnd_end(tp);
99} 96}
100 97
101/* Calculate mss to advertise in SYN segment. 98/* Calculate mss to advertise in SYN segment.
@@ -224,14 +221,14 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 * following RFC2414. Senders, not following this RFC, 221 * following RFC2414. Senders, not following this RFC,
225 * will be satisfied with 2. 222 * will be satisfied with 2.
226 */ 223 */
227 if (mss > (1<<*rcv_wscale)) { 224 if (mss > (1 << *rcv_wscale)) {
228 int init_cwnd = 4; 225 int init_cwnd = 4;
229 if (mss > 1460*3) 226 if (mss > 1460 * 3)
230 init_cwnd = 2; 227 init_cwnd = 2;
231 else if (mss > 1460) 228 else if (mss > 1460)
232 init_cwnd = 3; 229 init_cwnd = 3;
233 if (*rcv_wnd > init_cwnd*mss) 230 if (*rcv_wnd > init_cwnd * mss)
234 *rcv_wnd = init_cwnd*mss; 231 *rcv_wnd = init_cwnd * mss;
235 } 232 }
236 233
237 /* Set the clamp no higher than max representable value */ 234 /* Set the clamp no higher than max representable value */
@@ -281,11 +278,10 @@ static u16 tcp_select_window(struct sock *sk)
281 return new_win; 278 return new_win;
282} 279}
283 280
284static inline void TCP_ECN_send_synack(struct tcp_sock *tp, 281static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
285 struct sk_buff *skb)
286{ 282{
287 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR; 283 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_CWR;
288 if (!(tp->ecn_flags&TCP_ECN_OK)) 284 if (!(tp->ecn_flags & TCP_ECN_OK))
289 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE; 285 TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_ECE;
290} 286}
291 287
@@ -295,7 +291,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
295 291
296 tp->ecn_flags = 0; 292 tp->ecn_flags = 0;
297 if (sysctl_tcp_ecn) { 293 if (sysctl_tcp_ecn) {
298 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE|TCPCB_FLAG_CWR; 294 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
299 tp->ecn_flags = TCP_ECN_OK; 295 tp->ecn_flags = TCP_ECN_OK;
300 } 296 }
301} 297}
@@ -317,7 +313,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
317 if (skb->len != tcp_header_len && 313 if (skb->len != tcp_header_len &&
318 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { 314 !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
319 INET_ECN_xmit(sk); 315 INET_ECN_xmit(sk);
320 if (tp->ecn_flags&TCP_ECN_QUEUE_CWR) { 316 if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
321 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; 317 tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
322 tcp_hdr(skb)->cwr = 1; 318 tcp_hdr(skb)->cwr = 1;
323 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; 319 skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
@@ -331,6 +327,26 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
331 } 327 }
332} 328}
333 329
330/* Constructs common control bits of non-data skb. If SYN/FIN is present,
331 * auto increment end seqno.
332 */
333static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
334{
335 skb->csum = 0;
336
337 TCP_SKB_CB(skb)->flags = flags;
338 TCP_SKB_CB(skb)->sacked = 0;
339
340 skb_shinfo(skb)->gso_segs = 1;
341 skb_shinfo(skb)->gso_size = 0;
342 skb_shinfo(skb)->gso_type = 0;
343
344 TCP_SKB_CB(skb)->seq = seq;
345 if (flags & (TCPCB_FLAG_SYN | TCPCB_FLAG_FIN))
346 seq++;
347 TCP_SKB_CB(skb)->end_seq = seq;
348}
349
334static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp, 350static void tcp_build_and_update_options(__be32 *ptr, struct tcp_sock *tp,
335 __u32 tstamp, __u8 **md5_hash) 351 __u32 tstamp, __u8 **md5_hash)
336{ 352{
@@ -434,7 +450,7 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
434 (TCPOPT_NOP << 16) | 450 (TCPOPT_NOP << 16) |
435 (TCPOPT_MD5SIG << 8) | 451 (TCPOPT_MD5SIG << 8) |
436 TCPOLEN_MD5SIG); 452 TCPOLEN_MD5SIG);
437 *md5_hash = (__u8 *) ptr; 453 *md5_hash = (__u8 *)ptr;
438 } 454 }
439#endif 455#endif
440} 456}
@@ -450,7 +466,8 @@ static void tcp_syn_build_options(__be32 *ptr, int mss, int ts, int sack,
450 * We are working here with either a clone of the original 466 * We are working here with either a clone of the original
451 * SKB, or a fresh unique copy made by the retransmit engine. 467 * SKB, or a fresh unique copy made by the retransmit engine.
452 */ 468 */
453static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, gfp_t gfp_mask) 469static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
470 gfp_t gfp_mask)
454{ 471{
455 const struct inet_connection_sock *icsk = inet_csk(sk); 472 const struct inet_connection_sock *icsk = inet_csk(sk);
456 struct inet_sock *inet; 473 struct inet_sock *inet;
@@ -554,8 +571,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
554 th->urg_ptr = 0; 571 th->urg_ptr = 0;
555 572
556 if (unlikely(tp->urg_mode && 573 if (unlikely(tp->urg_mode &&
557 between(tp->snd_up, tcb->seq+1, tcb->seq+0xFFFF))) { 574 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) {
558 th->urg_ptr = htons(tp->snd_up-tcb->seq); 575 th->urg_ptr = htons(tp->snd_up - tcb->seq);
559 th->urg = 1; 576 th->urg = 1;
560 } 577 }
561 578
@@ -619,7 +636,6 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
619#undef SYSCTL_FLAG_SACK 636#undef SYSCTL_FLAG_SACK
620} 637}
621 638
622
623/* This routine just queue's the buffer 639/* This routine just queue's the buffer
624 * 640 *
625 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames, 641 * NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -633,10 +649,12 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
633 tp->write_seq = TCP_SKB_CB(skb)->end_seq; 649 tp->write_seq = TCP_SKB_CB(skb)->end_seq;
634 skb_header_release(skb); 650 skb_header_release(skb);
635 tcp_add_write_queue_tail(sk, skb); 651 tcp_add_write_queue_tail(sk, skb);
636 sk_charge_skb(sk, skb); 652 sk->sk_wmem_queued += skb->truesize;
653 sk_mem_charge(sk, skb->truesize);
637} 654}
638 655
639static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 656static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
657 unsigned int mss_now)
640{ 658{
641 if (skb->len <= mss_now || !sk_can_gso(sk)) { 659 if (skb->len <= mss_now || !sk_can_gso(sk)) {
642 /* Avoid the costly divide in the normal 660 /* Avoid the costly divide in the normal
@@ -653,23 +671,18 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned
653} 671}
654 672
655/* When a modification to fackets out becomes necessary, we need to check 673/* When a modification to fackets out becomes necessary, we need to check
656 * skb is counted to fackets_out or not. Another important thing is to 674 * skb is counted to fackets_out or not.
657 * tweak SACK fastpath hint too as it would overwrite all changes unless
658 * hint is also changed.
659 */ 675 */
660static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb, 676static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
661 int decr) 677 int decr)
662{ 678{
679 struct tcp_sock *tp = tcp_sk(sk);
680
663 if (!tp->sacked_out || tcp_is_reno(tp)) 681 if (!tp->sacked_out || tcp_is_reno(tp))
664 return; 682 return;
665 683
666 if (!before(tp->highest_sack, TCP_SKB_CB(skb)->seq)) 684 if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq))
667 tp->fackets_out -= decr; 685 tp->fackets_out -= decr;
668
669 /* cnt_hint is "off-by-one" compared with fackets_out (see sacktag) */
670 if (tp->fastpath_skb_hint != NULL &&
671 after(TCP_SKB_CB(tp->fastpath_skb_hint)->seq, TCP_SKB_CB(skb)->seq))
672 tp->fastpath_cnt_hint -= decr;
673} 686}
674 687
675/* Function to create two new TCP segments. Shrinks the given segment 688/* Function to create two new TCP segments. Shrinks the given segment
@@ -677,7 +690,8 @@ static void tcp_adjust_fackets_out(struct tcp_sock *tp, struct sk_buff *skb,
677 * packet to the list. This won't be called frequently, I hope. 690 * packet to the list. This won't be called frequently, I hope.
678 * Remember, these are still headerless SKBs at this point. 691 * Remember, these are still headerless SKBs at this point.
679 */ 692 */
680int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) 693int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
694 unsigned int mss_now)
681{ 695{
682 struct tcp_sock *tp = tcp_sk(sk); 696 struct tcp_sock *tp = tcp_sk(sk);
683 struct sk_buff *buff; 697 struct sk_buff *buff;
@@ -702,7 +716,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
702 if (buff == NULL) 716 if (buff == NULL)
703 return -ENOMEM; /* We'll just try again later. */ 717 return -ENOMEM; /* We'll just try again later. */
704 718
705 sk_charge_skb(sk, buff); 719 sk->sk_wmem_queued += buff->truesize;
720 sk_mem_charge(sk, buff->truesize);
706 nlen = skb->len - len - nsize; 721 nlen = skb->len - len - nsize;
707 buff->truesize += nlen; 722 buff->truesize += nlen;
708 skb->truesize -= nlen; 723 skb->truesize -= nlen;
@@ -712,20 +727,16 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
712 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq; 727 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
713 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 728 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
714 729
715 if (tcp_is_sack(tp) && tp->sacked_out &&
716 (TCP_SKB_CB(skb)->seq == tp->highest_sack))
717 tp->highest_sack = TCP_SKB_CB(buff)->seq;
718
719 /* PSH and FIN should only be set in the second packet. */ 730 /* PSH and FIN should only be set in the second packet. */
720 flags = TCP_SKB_CB(skb)->flags; 731 flags = TCP_SKB_CB(skb)->flags;
721 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 732 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
722 TCP_SKB_CB(buff)->flags = flags; 733 TCP_SKB_CB(buff)->flags = flags;
723 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 734 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
724 TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
725 735
726 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 736 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
727 /* Copy and checksum data tail into the new buffer. */ 737 /* Copy and checksum data tail into the new buffer. */
728 buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), 738 buff->csum = csum_partial_copy_nocheck(skb->data + len,
739 skb_put(buff, nsize),
729 nsize, 0); 740 nsize, 0);
730 741
731 skb_trim(skb, len); 742 skb_trim(skb, len);
@@ -772,7 +783,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
772 tcp_dec_pcount_approx_int(&tp->sacked_out, diff); 783 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
773 tcp_verify_left_out(tp); 784 tcp_verify_left_out(tp);
774 } 785 }
775 tcp_adjust_fackets_out(tp, skb, diff); 786 tcp_adjust_fackets_out(sk, skb, diff);
776 } 787 }
777 788
778 /* Link BUFF into the send queue. */ 789 /* Link BUFF into the send queue. */
@@ -792,7 +803,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
792 803
793 eat = len; 804 eat = len;
794 k = 0; 805 k = 0;
795 for (i=0; i<skb_shinfo(skb)->nr_frags; i++) { 806 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
796 if (skb_shinfo(skb)->frags[i].size <= eat) { 807 if (skb_shinfo(skb)->frags[i].size <= eat) {
797 put_page(skb_shinfo(skb)->frags[i].page); 808 put_page(skb_shinfo(skb)->frags[i].page);
798 eat -= skb_shinfo(skb)->frags[i].size; 809 eat -= skb_shinfo(skb)->frags[i].size;
@@ -815,8 +826,7 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
815 826
816int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) 827int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
817{ 828{
818 if (skb_cloned(skb) && 829 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
819 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
820 return -ENOMEM; 830 return -ENOMEM;
821 831
822 /* If len == headlen, we avoid __skb_pull to preserve alignment. */ 832 /* If len == headlen, we avoid __skb_pull to preserve alignment. */
@@ -830,7 +840,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
830 840
831 skb->truesize -= len; 841 skb->truesize -= len;
832 sk->sk_wmem_queued -= len; 842 sk->sk_wmem_queued -= len;
833 sk->sk_forward_alloc += len; 843 sk_mem_uncharge(sk, len);
834 sock_set_flag(sk, SOCK_QUEUE_SHRUNK); 844 sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
835 845
836 /* Any change of skb->len requires recalculation of tso 846 /* Any change of skb->len requires recalculation of tso
@@ -898,6 +908,15 @@ void tcp_mtup_init(struct sock *sk)
898 icsk->icsk_mtup.probe_size = 0; 908 icsk->icsk_mtup.probe_size = 0;
899} 909}
900 910
911/* Bound MSS / TSO packet size with the half of the window */
912static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
913{
914 if (tp->max_window && pktsize > (tp->max_window >> 1))
915 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
916 else
917 return pktsize;
918}
919
901/* This function synchronize snd mss to current pmtu/exthdr set. 920/* This function synchronize snd mss to current pmtu/exthdr set.
902 921
903 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 922 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -920,7 +939,6 @@ void tcp_mtup_init(struct sock *sk)
920 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache 939 NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
921 are READ ONLY outside this function. --ANK (980731) 940 are READ ONLY outside this function. --ANK (980731)
922 */ 941 */
923
924unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) 942unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
925{ 943{
926 struct tcp_sock *tp = tcp_sk(sk); 944 struct tcp_sock *tp = tcp_sk(sk);
@@ -931,10 +949,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
931 icsk->icsk_mtup.search_high = pmtu; 949 icsk->icsk_mtup.search_high = pmtu;
932 950
933 mss_now = tcp_mtu_to_mss(sk, pmtu); 951 mss_now = tcp_mtu_to_mss(sk, pmtu);
934 952 mss_now = tcp_bound_to_half_wnd(tp, mss_now);
935 /* Bound mss with half of window */
936 if (tp->max_window && mss_now > (tp->max_window>>1))
937 mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
938 953
939 /* And store cached results */ 954 /* And store cached results */
940 icsk->icsk_pmtu_cookie = pmtu; 955 icsk->icsk_pmtu_cookie = pmtu;
@@ -988,11 +1003,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
988 inet_csk(sk)->icsk_ext_hdr_len - 1003 inet_csk(sk)->icsk_ext_hdr_len -
989 tp->tcp_header_len); 1004 tp->tcp_header_len);
990 1005
991 if (tp->max_window && 1006 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
992 (xmit_size_goal > (tp->max_window >> 1)))
993 xmit_size_goal = max((tp->max_window >> 1),
994 68U - tp->tcp_header_len);
995
996 xmit_size_goal -= (xmit_size_goal % mss_now); 1007 xmit_size_goal -= (xmit_size_goal % mss_now);
997 } 1008 }
998 tp->xmit_size_goal = xmit_size_goal; 1009 tp->xmit_size_goal = xmit_size_goal;
@@ -1001,13 +1012,11 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
1001} 1012}
1002 1013
1003/* Congestion window validation. (RFC2861) */ 1014/* Congestion window validation. (RFC2861) */
1004
1005static void tcp_cwnd_validate(struct sock *sk) 1015static void tcp_cwnd_validate(struct sock *sk)
1006{ 1016{
1007 struct tcp_sock *tp = tcp_sk(sk); 1017 struct tcp_sock *tp = tcp_sk(sk);
1008 __u32 packets_out = tp->packets_out;
1009 1018
1010 if (packets_out >= tp->snd_cwnd) { 1019 if (tp->packets_out >= tp->snd_cwnd) {
1011 /* Network is feed fully. */ 1020 /* Network is feed fully. */
1012 tp->snd_cwnd_used = 0; 1021 tp->snd_cwnd_used = 0;
1013 tp->snd_cwnd_stamp = tcp_time_stamp; 1022 tp->snd_cwnd_stamp = tcp_time_stamp;
@@ -1022,19 +1031,35 @@ static void tcp_cwnd_validate(struct sock *sk)
1022 } 1031 }
1023} 1032}
1024 1033
1025static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) 1034/* Returns the portion of skb which can be sent right away without
1035 * introducing MSS oddities to segment boundaries. In rare cases where
1036 * mss_now != mss_cache, we will request caller to create a small skb
1037 * per input skb which could be mostly avoided here (if desired).
1038 */
1039static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1040 unsigned int mss_now, unsigned int cwnd)
1026{ 1041{
1027 u32 window, cwnd_len; 1042 struct tcp_sock *tp = tcp_sk(sk);
1043 u32 needed, window, cwnd_len;
1028 1044
1029 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq); 1045 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1030 cwnd_len = mss_now * cwnd; 1046 cwnd_len = mss_now * cwnd;
1031 return min(window, cwnd_len); 1047
1048 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1049 return cwnd_len;
1050
1051 if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
1052 return cwnd_len;
1053
1054 needed = min(skb->len, window);
1055 return needed - needed % mss_now;
1032} 1056}
1033 1057
1034/* Can at least one segment of SKB be sent right now, according to the 1058/* Can at least one segment of SKB be sent right now, according to the
1035 * congestion window rules? If so, return how many segments are allowed. 1059 * congestion window rules? If so, return how many segments are allowed.
1036 */ 1060 */
1037static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb) 1061static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1062 struct sk_buff *skb)
1038{ 1063{
1039 u32 in_flight, cwnd; 1064 u32 in_flight, cwnd;
1040 1065
@@ -1054,13 +1079,12 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
1054/* This must be invoked the first time we consider transmitting 1079/* This must be invoked the first time we consider transmitting
1055 * SKB onto the wire. 1080 * SKB onto the wire.
1056 */ 1081 */
1057static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 1082static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1083 unsigned int mss_now)
1058{ 1084{
1059 int tso_segs = tcp_skb_pcount(skb); 1085 int tso_segs = tcp_skb_pcount(skb);
1060 1086
1061 if (!tso_segs || 1087 if (!tso_segs || (tso_segs > 1 && tcp_skb_mss(skb) != mss_now)) {
1062 (tso_segs > 1 &&
1063 tcp_skb_mss(skb) != mss_now)) {
1064 tcp_set_skb_tso_segs(sk, skb, mss_now); 1088 tcp_set_skb_tso_segs(sk, skb, mss_now);
1065 tso_segs = tcp_skb_pcount(skb); 1089 tso_segs = tcp_skb_pcount(skb);
1066 } 1090 }
@@ -1080,16 +1104,13 @@ static inline int tcp_minshall_check(const struct tcp_sock *tp)
1080 * 4. Or TCP_CORK is not set, and all sent packets are ACKed. 1104 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1081 * With Minshall's modification: all sent small packets are ACKed. 1105 * With Minshall's modification: all sent small packets are ACKed.
1082 */ 1106 */
1083
1084static inline int tcp_nagle_check(const struct tcp_sock *tp, 1107static inline int tcp_nagle_check(const struct tcp_sock *tp,
1085 const struct sk_buff *skb, 1108 const struct sk_buff *skb,
1086 unsigned mss_now, int nonagle) 1109 unsigned mss_now, int nonagle)
1087{ 1110{
1088 return (skb->len < mss_now && 1111 return (skb->len < mss_now &&
1089 ((nonagle&TCP_NAGLE_CORK) || 1112 ((nonagle & TCP_NAGLE_CORK) ||
1090 (!nonagle && 1113 (!nonagle && tp->packets_out && tcp_minshall_check(tp))));
1091 tp->packets_out &&
1092 tcp_minshall_check(tp))));
1093} 1114}
1094 1115
1095/* Return non-zero if the Nagle test allows this packet to be 1116/* Return non-zero if the Nagle test allows this packet to be
@@ -1121,14 +1142,15 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1121} 1142}
1122 1143
1123/* Does at least the first segment of SKB fit into the send window? */ 1144/* Does at least the first segment of SKB fit into the send window? */
1124static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss) 1145static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1146 unsigned int cur_mss)
1125{ 1147{
1126 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 1148 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1127 1149
1128 if (skb->len > cur_mss) 1150 if (skb->len > cur_mss)
1129 end_seq = TCP_SKB_CB(skb)->seq + cur_mss; 1151 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
1130 1152
1131 return !after(end_seq, tp->snd_una + tp->snd_wnd); 1153 return !after(end_seq, tcp_wnd_end(tp));
1132} 1154}
1133 1155
1134/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) 1156/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
@@ -1147,8 +1169,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1147 return 0; 1169 return 0;
1148 1170
1149 cwnd_quota = tcp_cwnd_test(tp, skb); 1171 cwnd_quota = tcp_cwnd_test(tp, skb);
1150 if (cwnd_quota && 1172 if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
1151 !tcp_snd_wnd_test(tp, skb, cur_mss))
1152 cwnd_quota = 0; 1173 cwnd_quota = 0;
1153 1174
1154 return cwnd_quota; 1175 return cwnd_quota;
@@ -1172,7 +1193,8 @@ int tcp_may_send_now(struct sock *sk)
1172 * know that all the data is in scatter-gather pages, and that the 1193 * know that all the data is in scatter-gather pages, and that the
1173 * packet has never been sent out before (and thus is not cloned). 1194 * packet has never been sent out before (and thus is not cloned).
1174 */ 1195 */
1175static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) 1196static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1197 unsigned int mss_now)
1176{ 1198{
1177 struct sk_buff *buff; 1199 struct sk_buff *buff;
1178 int nlen = skb->len - len; 1200 int nlen = skb->len - len;
@@ -1182,11 +1204,12 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1182 if (skb->len != skb->data_len) 1204 if (skb->len != skb->data_len)
1183 return tcp_fragment(sk, skb, len, mss_now); 1205 return tcp_fragment(sk, skb, len, mss_now);
1184 1206
1185 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); 1207 buff = sk_stream_alloc_skb(sk, 0, GFP_ATOMIC);
1186 if (unlikely(buff == NULL)) 1208 if (unlikely(buff == NULL))
1187 return -ENOMEM; 1209 return -ENOMEM;
1188 1210
1189 sk_charge_skb(sk, buff); 1211 sk->sk_wmem_queued += buff->truesize;
1212 sk_mem_charge(sk, buff->truesize);
1190 buff->truesize += nlen; 1213 buff->truesize += nlen;
1191 skb->truesize -= nlen; 1214 skb->truesize -= nlen;
1192 1215
@@ -1197,7 +1220,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1197 1220
1198 /* PSH and FIN should only be set in the second packet. */ 1221 /* PSH and FIN should only be set in the second packet. */
1199 flags = TCP_SKB_CB(skb)->flags; 1222 flags = TCP_SKB_CB(skb)->flags;
1200 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1223 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN | TCPCB_FLAG_PSH);
1201 TCP_SKB_CB(buff)->flags = flags; 1224 TCP_SKB_CB(buff)->flags = flags;
1202 1225
1203 /* This packet was never sent out yet, so no SACK bits. */ 1226 /* This packet was never sent out yet, so no SACK bits. */
@@ -1235,15 +1258,15 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1235 goto send_now; 1258 goto send_now;
1236 1259
1237 /* Defer for less than two clock ticks. */ 1260 /* Defer for less than two clock ticks. */
1238 if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1) 1261 if (tp->tso_deferred &&
1262 ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1239 goto send_now; 1263 goto send_now;
1240 1264
1241 in_flight = tcp_packets_in_flight(tp); 1265 in_flight = tcp_packets_in_flight(tp);
1242 1266
1243 BUG_ON(tcp_skb_pcount(skb) <= 1 || 1267 BUG_ON(tcp_skb_pcount(skb) <= 1 || (tp->snd_cwnd <= in_flight));
1244 (tp->snd_cwnd <= in_flight));
1245 1268
1246 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq; 1269 send_win = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1247 1270
1248 /* From in_flight test above, we know that cwnd > in_flight. */ 1271 /* From in_flight test above, we know that cwnd > in_flight. */
1249 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache; 1272 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
@@ -1274,7 +1297,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1274 } 1297 }
1275 1298
1276 /* Ok, it looks like it is advisable to defer. */ 1299 /* Ok, it looks like it is advisable to defer. */
1277 tp->tso_deferred = 1 | (jiffies<<1); 1300 tp->tso_deferred = 1 | (jiffies << 1);
1278 1301
1279 return 1; 1302 return 1;
1280 1303
@@ -1286,7 +1309,8 @@ send_now:
1286/* Create a new MTU probe if we are ready. 1309/* Create a new MTU probe if we are ready.
1287 * Returns 0 if we should wait to probe (no cwnd available), 1310 * Returns 0 if we should wait to probe (no cwnd available),
1288 * 1 if a probe was sent, 1311 * 1 if a probe was sent,
1289 * -1 otherwise */ 1312 * -1 otherwise
1313 */
1290static int tcp_mtu_probe(struct sock *sk) 1314static int tcp_mtu_probe(struct sock *sk)
1291{ 1315{
1292 struct tcp_sock *tp = tcp_sk(sk); 1316 struct tcp_sock *tp = tcp_sk(sk);
@@ -1295,7 +1319,6 @@ static int tcp_mtu_probe(struct sock *sk)
1295 int len; 1319 int len;
1296 int probe_size; 1320 int probe_size;
1297 int size_needed; 1321 int size_needed;
1298 unsigned int pif;
1299 int copy; 1322 int copy;
1300 int mss_now; 1323 int mss_now;
1301 1324
@@ -1312,7 +1335,7 @@ static int tcp_mtu_probe(struct sock *sk)
1312 1335
1313 /* Very simple search strategy: just double the MSS. */ 1336 /* Very simple search strategy: just double the MSS. */
1314 mss_now = tcp_current_mss(sk, 0); 1337 mss_now = tcp_current_mss(sk, 0);
1315 probe_size = 2*tp->mss_cache; 1338 probe_size = 2 * tp->mss_cache;
1316 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; 1339 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1317 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1340 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
1318 /* TODO: set timer for probe_converge_event */ 1341 /* TODO: set timer for probe_converge_event */
@@ -1325,14 +1348,12 @@ static int tcp_mtu_probe(struct sock *sk)
1325 1348
1326 if (tp->snd_wnd < size_needed) 1349 if (tp->snd_wnd < size_needed)
1327 return -1; 1350 return -1;
1328 if (after(tp->snd_nxt + size_needed, tp->snd_una + tp->snd_wnd)) 1351 if (after(tp->snd_nxt + size_needed, tcp_wnd_end(tp)))
1329 return 0; 1352 return 0;
1330 1353
1331 /* Do we need to wait to drain cwnd? */ 1354 /* Do we need to wait to drain cwnd? With none in flight, don't stall */
1332 pif = tcp_packets_in_flight(tp); 1355 if (tcp_packets_in_flight(tp) + 2 > tp->snd_cwnd) {
1333 if (pif + 2 > tp->snd_cwnd) { 1356 if (!tcp_packets_in_flight(tp))
1334 /* With no packets in flight, don't stall. */
1335 if (pif == 0)
1336 return -1; 1357 return -1;
1337 else 1358 else
1338 return 0; 1359 return 0;
@@ -1341,10 +1362,10 @@ static int tcp_mtu_probe(struct sock *sk)
1341 /* We're allowed to probe. Build it now. */ 1362 /* We're allowed to probe. Build it now. */
1342 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL) 1363 if ((nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC)) == NULL)
1343 return -1; 1364 return -1;
1344 sk_charge_skb(sk, nskb); 1365 sk->sk_wmem_queued += nskb->truesize;
1366 sk_mem_charge(sk, nskb->truesize);
1345 1367
1346 skb = tcp_send_head(sk); 1368 skb = tcp_send_head(sk);
1347 tcp_insert_write_queue_before(nskb, skb, sk);
1348 1369
1349 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1370 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1350 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1371 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
@@ -1353,30 +1374,32 @@ static int tcp_mtu_probe(struct sock *sk)
1353 nskb->csum = 0; 1374 nskb->csum = 0;
1354 nskb->ip_summed = skb->ip_summed; 1375 nskb->ip_summed = skb->ip_summed;
1355 1376
1356 len = 0; 1377 tcp_insert_write_queue_before(nskb, skb, sk);
1357 while (len < probe_size) {
1358 next = tcp_write_queue_next(sk, skb);
1359 1378
1379 len = 0;
1380 tcp_for_write_queue_from_safe(skb, next, sk) {
1360 copy = min_t(int, skb->len, probe_size - len); 1381 copy = min_t(int, skb->len, probe_size - len);
1361 if (nskb->ip_summed) 1382 if (nskb->ip_summed)
1362 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy); 1383 skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
1363 else 1384 else
1364 nskb->csum = skb_copy_and_csum_bits(skb, 0, 1385 nskb->csum = skb_copy_and_csum_bits(skb, 0,
1365 skb_put(nskb, copy), copy, nskb->csum); 1386 skb_put(nskb, copy),
1387 copy, nskb->csum);
1366 1388
1367 if (skb->len <= copy) { 1389 if (skb->len <= copy) {
1368 /* We've eaten all the data from this skb. 1390 /* We've eaten all the data from this skb.
1369 * Throw it away. */ 1391 * Throw it away. */
1370 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags; 1392 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1371 tcp_unlink_write_queue(skb, sk); 1393 tcp_unlink_write_queue(skb, sk);
1372 sk_stream_free_skb(sk, skb); 1394 sk_wmem_free_skb(sk, skb);
1373 } else { 1395 } else {
1374 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags & 1396 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1375 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); 1397 ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
1376 if (!skb_shinfo(skb)->nr_frags) { 1398 if (!skb_shinfo(skb)->nr_frags) {
1377 skb_pull(skb, copy); 1399 skb_pull(skb, copy);
1378 if (skb->ip_summed != CHECKSUM_PARTIAL) 1400 if (skb->ip_summed != CHECKSUM_PARTIAL)
1379 skb->csum = csum_partial(skb->data, skb->len, 0); 1401 skb->csum = csum_partial(skb->data,
1402 skb->len, 0);
1380 } else { 1403 } else {
1381 __pskb_trim_head(skb, copy); 1404 __pskb_trim_head(skb, copy);
1382 tcp_set_skb_tso_segs(sk, skb, mss_now); 1405 tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1385,7 +1408,9 @@ static int tcp_mtu_probe(struct sock *sk)
1385 } 1408 }
1386 1409
1387 len += copy; 1410 len += copy;
1388 skb = next; 1411
1412 if (len >= probe_size)
1413 break;
1389 } 1414 }
1390 tcp_init_tso_segs(sk, nskb, nskb->len); 1415 tcp_init_tso_segs(sk, nskb, nskb->len);
1391 1416
@@ -1394,9 +1419,9 @@ static int tcp_mtu_probe(struct sock *sk)
1394 TCP_SKB_CB(nskb)->when = tcp_time_stamp; 1419 TCP_SKB_CB(nskb)->when = tcp_time_stamp;
1395 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) { 1420 if (!tcp_transmit_skb(sk, nskb, 1, GFP_ATOMIC)) {
1396 /* Decrement cwnd here because we are sending 1421 /* Decrement cwnd here because we are sending
1397 * effectively two packets. */ 1422 * effectively two packets. */
1398 tp->snd_cwnd--; 1423 tp->snd_cwnd--;
1399 update_send_head(sk, nskb); 1424 tcp_event_new_data_sent(sk, nskb);
1400 1425
1401 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len); 1426 icsk->icsk_mtup.probe_size = tcp_mss_to_mtu(sk, nskb->len);
1402 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq; 1427 tp->mtu_probe.probe_seq_start = TCP_SKB_CB(nskb)->seq;
@@ -1408,7 +1433,6 @@ static int tcp_mtu_probe(struct sock *sk)
1408 return -1; 1433 return -1;
1409} 1434}
1410 1435
1411
1412/* This routine writes packets to the network. It advances the 1436/* This routine writes packets to the network. It advances the
1413 * send_head. This happens as incoming acks open up the remote 1437 * send_head. This happens as incoming acks open up the remote
1414 * window for us. 1438 * window for us.
@@ -1464,17 +1488,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1464 } 1488 }
1465 1489
1466 limit = mss_now; 1490 limit = mss_now;
1467 if (tso_segs > 1) { 1491 if (tso_segs > 1)
1468 limit = tcp_window_allows(tp, skb, 1492 limit = tcp_mss_split_point(sk, skb, mss_now,
1469 mss_now, cwnd_quota); 1493 cwnd_quota);
1470
1471 if (skb->len < limit) {
1472 unsigned int trim = skb->len % mss_now;
1473
1474 if (trim)
1475 limit = skb->len - trim;
1476 }
1477 }
1478 1494
1479 if (skb->len > limit && 1495 if (skb->len > limit &&
1480 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1496 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1488,7 +1504,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1488 /* Advance the send_head. This one is sent out. 1504 /* Advance the send_head. This one is sent out.
1489 * This call will increment packets_out. 1505 * This call will increment packets_out.
1490 */ 1506 */
1491 update_send_head(sk, skb); 1507 tcp_event_new_data_sent(sk, skb);
1492 1508
1493 tcp_minshall_update(tp, mss_now, skb); 1509 tcp_minshall_update(tp, mss_now, skb);
1494 sent_pkts++; 1510 sent_pkts++;
@@ -1521,7 +1537,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1521 */ 1537 */
1522void tcp_push_one(struct sock *sk, unsigned int mss_now) 1538void tcp_push_one(struct sock *sk, unsigned int mss_now)
1523{ 1539{
1524 struct tcp_sock *tp = tcp_sk(sk);
1525 struct sk_buff *skb = tcp_send_head(sk); 1540 struct sk_buff *skb = tcp_send_head(sk);
1526 unsigned int tso_segs, cwnd_quota; 1541 unsigned int tso_segs, cwnd_quota;
1527 1542
@@ -1536,17 +1551,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1536 BUG_ON(!tso_segs); 1551 BUG_ON(!tso_segs);
1537 1552
1538 limit = mss_now; 1553 limit = mss_now;
1539 if (tso_segs > 1) { 1554 if (tso_segs > 1)
1540 limit = tcp_window_allows(tp, skb, 1555 limit = tcp_mss_split_point(sk, skb, mss_now,
1541 mss_now, cwnd_quota); 1556 cwnd_quota);
1542
1543 if (skb->len < limit) {
1544 unsigned int trim = skb->len % mss_now;
1545
1546 if (trim)
1547 limit = skb->len - trim;
1548 }
1549 }
1550 1557
1551 if (skb->len > limit && 1558 if (skb->len > limit &&
1552 unlikely(tso_fragment(sk, skb, limit, mss_now))) 1559 unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1556,7 +1563,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
1556 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1563 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1557 1564
1558 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) { 1565 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1559 update_send_head(sk, skb); 1566 tcp_event_new_data_sent(sk, skb);
1560 tcp_cwnd_validate(sk); 1567 tcp_cwnd_validate(sk);
1561 return; 1568 return;
1562 } 1569 }
@@ -1633,11 +1640,12 @@ u32 __tcp_select_window(struct sock *sk)
1633 if (mss > full_space) 1640 if (mss > full_space)
1634 mss = full_space; 1641 mss = full_space;
1635 1642
1636 if (free_space < full_space/2) { 1643 if (free_space < (full_space >> 1)) {
1637 icsk->icsk_ack.quick = 0; 1644 icsk->icsk_ack.quick = 0;
1638 1645
1639 if (tcp_memory_pressure) 1646 if (tcp_memory_pressure)
1640 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); 1647 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
1648 4U * tp->advmss);
1641 1649
1642 if (free_space < mss) 1650 if (free_space < mss)
1643 return 0; 1651 return 0;
@@ -1670,9 +1678,9 @@ u32 __tcp_select_window(struct sock *sk)
1670 * is too small. 1678 * is too small.
1671 */ 1679 */
1672 if (window <= free_space - mss || window > free_space) 1680 if (window <= free_space - mss || window > free_space)
1673 window = (free_space/mss)*mss; 1681 window = (free_space / mss) * mss;
1674 else if (mss == full_space && 1682 else if (mss == full_space &&
1675 free_space > window + full_space/2) 1683 free_space > window + (full_space >> 1))
1676 window = free_space; 1684 window = free_space;
1677 } 1685 }
1678 1686
@@ -1680,86 +1688,82 @@ u32 __tcp_select_window(struct sock *sk)
1680} 1688}
1681 1689
1682/* Attempt to collapse two adjacent SKB's during retransmission. */ 1690/* Attempt to collapse two adjacent SKB's during retransmission. */
1683static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int mss_now) 1691static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1692 int mss_now)
1684{ 1693{
1685 struct tcp_sock *tp = tcp_sk(sk); 1694 struct tcp_sock *tp = tcp_sk(sk);
1686 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1695 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1696 int skb_size, next_skb_size;
1697 u16 flags;
1687 1698
1688 /* The first test we must make is that neither of these two 1699 /* The first test we must make is that neither of these two
1689 * SKB's are still referenced by someone else. 1700 * SKB's are still referenced by someone else.
1690 */ 1701 */
1691 if (!skb_cloned(skb) && !skb_cloned(next_skb)) { 1702 if (skb_cloned(skb) || skb_cloned(next_skb))
1692 int skb_size = skb->len, next_skb_size = next_skb->len; 1703 return;
1693 u16 flags = TCP_SKB_CB(skb)->flags;
1694 1704
1695 /* Also punt if next skb has been SACK'd. */ 1705 skb_size = skb->len;
1696 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) 1706 next_skb_size = next_skb->len;
1697 return; 1707 flags = TCP_SKB_CB(skb)->flags;
1698 1708
1699 /* Next skb is out of window. */ 1709 /* Also punt if next skb has been SACK'd. */
1700 if (after(TCP_SKB_CB(next_skb)->end_seq, tp->snd_una+tp->snd_wnd)) 1710 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1701 return; 1711 return;
1702 1712
1703 /* Punt if not enough space exists in the first SKB for 1713 /* Next skb is out of window. */
1704 * the data in the second, or the total combined payload 1714 if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1705 * would exceed the MSS. 1715 return;
1706 */
1707 if ((next_skb_size > skb_tailroom(skb)) ||
1708 ((skb_size + next_skb_size) > mss_now))
1709 return;
1710 1716
1711 BUG_ON(tcp_skb_pcount(skb) != 1 || 1717 /* Punt if not enough space exists in the first SKB for
1712 tcp_skb_pcount(next_skb) != 1); 1718 * the data in the second, or the total combined payload
1719 * would exceed the MSS.
1720 */
1721 if ((next_skb_size > skb_tailroom(skb)) ||
1722 ((skb_size + next_skb_size) > mss_now))
1723 return;
1713 1724
1714 if (WARN_ON(tcp_is_sack(tp) && tp->sacked_out && 1725 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1715 (TCP_SKB_CB(next_skb)->seq == tp->highest_sack)))
1716 return;
1717 1726
1718 /* Ok. We will be able to collapse the packet. */ 1727 tcp_highest_sack_combine(sk, next_skb, skb);
1719 tcp_unlink_write_queue(next_skb, sk);
1720 1728
1721 skb_copy_from_linear_data(next_skb, 1729 /* Ok. We will be able to collapse the packet. */
1722 skb_put(skb, next_skb_size), 1730 tcp_unlink_write_queue(next_skb, sk);
1723 next_skb_size);
1724 1731
1725 if (next_skb->ip_summed == CHECKSUM_PARTIAL) 1732 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
1726 skb->ip_summed = CHECKSUM_PARTIAL; 1733 next_skb_size);
1727 1734
1728 if (skb->ip_summed != CHECKSUM_PARTIAL) 1735 if (next_skb->ip_summed == CHECKSUM_PARTIAL)
1729 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); 1736 skb->ip_summed = CHECKSUM_PARTIAL;
1730 1737
1731 /* Update sequence range on original skb. */ 1738 if (skb->ip_summed != CHECKSUM_PARTIAL)
1732 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1739 skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size);
1733 1740
1734 /* Merge over control information. */ 1741 /* Update sequence range on original skb. */
1735 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1742 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1736 TCP_SKB_CB(skb)->flags = flags;
1737 1743
1738 /* All done, get rid of second SKB and account for it so 1744 /* Merge over control information. */
1739 * packet counting does not break. 1745 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */
1740 */ 1746 TCP_SKB_CB(skb)->flags = flags;
1741 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
1742 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
1743 tp->retrans_out -= tcp_skb_pcount(next_skb);
1744 if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST)
1745 tp->lost_out -= tcp_skb_pcount(next_skb);
1746 /* Reno case is special. Sigh... */
1747 if (tcp_is_reno(tp) && tp->sacked_out)
1748 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1749
1750 tcp_adjust_fackets_out(tp, next_skb, tcp_skb_pcount(next_skb));
1751 tp->packets_out -= tcp_skb_pcount(next_skb);
1752
1753 /* changed transmit queue under us so clear hints */
1754 tcp_clear_retrans_hints_partial(tp);
1755 /* manually tune sacktag skb hint */
1756 if (tp->fastpath_skb_hint == next_skb) {
1757 tp->fastpath_skb_hint = skb;
1758 tp->fastpath_cnt_hint -= tcp_skb_pcount(skb);
1759 }
1760 1747
1761 sk_stream_free_skb(sk, next_skb); 1748 /* All done, get rid of second SKB and account for it so
1762 } 1749 * packet counting does not break.
1750 */
1751 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1752 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1753 tp->retrans_out -= tcp_skb_pcount(next_skb);
1754 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1755 tp->lost_out -= tcp_skb_pcount(next_skb);
1756 /* Reno case is special. Sigh... */
1757 if (tcp_is_reno(tp) && tp->sacked_out)
1758 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1759
1760 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1761 tp->packets_out -= tcp_skb_pcount(next_skb);
1762
1763 /* changed transmit queue under us so clear hints */
1764 tcp_clear_retrans_hints_partial(tp);
1765
1766 sk_wmem_free_skb(sk, next_skb);
1763} 1767}
1764 1768
1765/* Do a simple retransmit without using the backoff mechanisms in 1769/* Do a simple retransmit without using the backoff mechanisms in
@@ -1778,12 +1782,12 @@ void tcp_simple_retransmit(struct sock *sk)
1778 if (skb == tcp_send_head(sk)) 1782 if (skb == tcp_send_head(sk))
1779 break; 1783 break;
1780 if (skb->len > mss && 1784 if (skb->len > mss &&
1781 !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { 1785 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1782 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1786 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1783 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1787 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1784 tp->retrans_out -= tcp_skb_pcount(skb); 1788 tp->retrans_out -= tcp_skb_pcount(skb);
1785 } 1789 }
1786 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { 1790 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) {
1787 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1791 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1788 tp->lost_out += tcp_skb_pcount(skb); 1792 tp->lost_out += tcp_skb_pcount(skb);
1789 lost = 1; 1793 lost = 1;
@@ -1848,7 +1852,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1848 * case, when window is shrunk to zero. In this case 1852 * case, when window is shrunk to zero. In this case
1849 * our retransmit serves as a zero window probe. 1853 * our retransmit serves as a zero window probe.
1850 */ 1854 */
1851 if (!before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd) 1855 if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))
1852 && TCP_SKB_CB(skb)->seq != tp->snd_una) 1856 && TCP_SKB_CB(skb)->seq != tp->snd_una)
1853 return -EAGAIN; 1857 return -EAGAIN;
1854 1858
@@ -1862,8 +1866,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1862 (skb->len < (cur_mss >> 1)) && 1866 (skb->len < (cur_mss >> 1)) &&
1863 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && 1867 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1864 (!tcp_skb_is_last(sk, skb)) && 1868 (!tcp_skb_is_last(sk, skb)) &&
1865 (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && 1869 (skb_shinfo(skb)->nr_frags == 0 &&
1866 (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && 1870 skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1871 (tcp_skb_pcount(skb) == 1 &&
1872 tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1867 (sysctl_tcp_retrans_collapse != 0)) 1873 (sysctl_tcp_retrans_collapse != 0))
1868 tcp_retrans_try_collapse(sk, skb, cur_mss); 1874 tcp_retrans_try_collapse(sk, skb, cur_mss);
1869 1875
@@ -1878,12 +1884,10 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1878 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && 1884 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1879 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 1885 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
1880 if (!pskb_trim(skb, 0)) { 1886 if (!pskb_trim(skb, 0)) {
1881 TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; 1887 /* Reuse, even though it does some unnecessary work */
1882 skb_shinfo(skb)->gso_segs = 1; 1888 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
1883 skb_shinfo(skb)->gso_size = 0; 1889 TCP_SKB_CB(skb)->flags);
1884 skb_shinfo(skb)->gso_type = 0;
1885 skb->ip_summed = CHECKSUM_NONE; 1890 skb->ip_summed = CHECKSUM_NONE;
1886 skb->csum = 0;
1887 } 1891 }
1888 } 1892 }
1889 1893
@@ -1901,7 +1905,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1901 tp->total_retrans++; 1905 tp->total_retrans++;
1902 1906
1903#if FASTRETRANS_DEBUG > 0 1907#if FASTRETRANS_DEBUG > 0
1904 if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { 1908 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1905 if (net_ratelimit()) 1909 if (net_ratelimit())
1906 printk(KERN_DEBUG "retrans_out leaked.\n"); 1910 printk(KERN_DEBUG "retrans_out leaked.\n");
1907 } 1911 }
@@ -1943,7 +1947,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1943 if (tp->retransmit_skb_hint) { 1947 if (tp->retransmit_skb_hint) {
1944 skb = tp->retransmit_skb_hint; 1948 skb = tp->retransmit_skb_hint;
1945 packet_cnt = tp->retransmit_cnt_hint; 1949 packet_cnt = tp->retransmit_cnt_hint;
1946 }else{ 1950 } else {
1947 skb = tcp_write_queue_head(sk); 1951 skb = tcp_write_queue_head(sk);
1948 packet_cnt = 0; 1952 packet_cnt = 0;
1949 } 1953 }
@@ -1970,7 +1974,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1970 return; 1974 return;
1971 1975
1972 if (sacked & TCPCB_LOST) { 1976 if (sacked & TCPCB_LOST) {
1973 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1977 if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1974 if (tcp_retransmit_skb(sk, skb)) { 1978 if (tcp_retransmit_skb(sk, skb)) {
1975 tp->retransmit_skb_hint = NULL; 1979 tp->retransmit_skb_hint = NULL;
1976 return; 1980 return;
@@ -2028,7 +2032,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2028 break; 2032 break;
2029 tp->forward_skb_hint = skb; 2033 tp->forward_skb_hint = skb;
2030 2034
2031 if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) 2035 if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
2032 break; 2036 break;
2033 2037
2034 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 2038 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -2052,7 +2056,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
2052 } 2056 }
2053} 2057}
2054 2058
2055
2056/* Send a fin. The caller locks the socket for us. This cannot be 2059/* Send a fin. The caller locks the socket for us. This cannot be
2057 * allowed to fail queueing a FIN frame under any circumstances. 2060 * allowed to fail queueing a FIN frame under any circumstances.
2058 */ 2061 */
@@ -2083,16 +2086,9 @@ void tcp_send_fin(struct sock *sk)
2083 2086
2084 /* Reserve space for headers and prepare control bits. */ 2087 /* Reserve space for headers and prepare control bits. */
2085 skb_reserve(skb, MAX_TCP_HEADER); 2088 skb_reserve(skb, MAX_TCP_HEADER);
2086 skb->csum = 0;
2087 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2088 TCP_SKB_CB(skb)->sacked = 0;
2089 skb_shinfo(skb)->gso_segs = 1;
2090 skb_shinfo(skb)->gso_size = 0;
2091 skb_shinfo(skb)->gso_type = 0;
2092
2093 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ 2089 /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
2094 TCP_SKB_CB(skb)->seq = tp->write_seq; 2090 tcp_init_nondata_skb(skb, tp->write_seq,
2095 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2091 TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
2096 tcp_queue_skb(sk, skb); 2092 tcp_queue_skb(sk, skb);
2097 } 2093 }
2098 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); 2094 __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF);
@@ -2116,16 +2112,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2116 2112
2117 /* Reserve space for headers and prepare control bits. */ 2113 /* Reserve space for headers and prepare control bits. */
2118 skb_reserve(skb, MAX_TCP_HEADER); 2114 skb_reserve(skb, MAX_TCP_HEADER);
2119 skb->csum = 0; 2115 tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
2120 TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); 2116 TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
2121 TCP_SKB_CB(skb)->sacked = 0;
2122 skb_shinfo(skb)->gso_segs = 1;
2123 skb_shinfo(skb)->gso_size = 0;
2124 skb_shinfo(skb)->gso_type = 0;
2125
2126 /* Send it off. */ 2117 /* Send it off. */
2127 TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk);
2128 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2129 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2118 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2130 if (tcp_transmit_skb(sk, skb, 0, priority)) 2119 if (tcp_transmit_skb(sk, skb, 0, priority))
2131 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); 2120 NET_INC_STATS(LINUX_MIB_TCPABORTFAILED);
@@ -2138,14 +2127,14 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
2138 */ 2127 */
2139int tcp_send_synack(struct sock *sk) 2128int tcp_send_synack(struct sock *sk)
2140{ 2129{
2141 struct sk_buff* skb; 2130 struct sk_buff *skb;
2142 2131
2143 skb = tcp_write_queue_head(sk); 2132 skb = tcp_write_queue_head(sk);
2144 if (skb == NULL || !(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_SYN)) { 2133 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)) {
2145 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n"); 2134 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2146 return -EFAULT; 2135 return -EFAULT;
2147 } 2136 }
2148 if (!(TCP_SKB_CB(skb)->flags&TCPCB_FLAG_ACK)) { 2137 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_ACK)) {
2149 if (skb_cloned(skb)) { 2138 if (skb_cloned(skb)) {
2150 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2139 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2151 if (nskb == NULL) 2140 if (nskb == NULL)
@@ -2153,8 +2142,9 @@ int tcp_send_synack(struct sock *sk)
2153 tcp_unlink_write_queue(skb, sk); 2142 tcp_unlink_write_queue(skb, sk);
2154 skb_header_release(nskb); 2143 skb_header_release(nskb);
2155 __tcp_add_write_queue_head(sk, nskb); 2144 __tcp_add_write_queue_head(sk, nskb);
2156 sk_stream_free_skb(sk, skb); 2145 sk_wmem_free_skb(sk, skb);
2157 sk_charge_skb(sk, nskb); 2146 sk->sk_wmem_queued += nskb->truesize;
2147 sk_mem_charge(sk, nskb->truesize);
2158 skb = nskb; 2148 skb = nskb;
2159 } 2149 }
2160 2150
@@ -2168,8 +2158,8 @@ int tcp_send_synack(struct sock *sk)
2168/* 2158/*
2169 * Prepare a SYN-ACK. 2159 * Prepare a SYN-ACK.
2170 */ 2160 */
2171struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2161struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2172 struct request_sock *req) 2162 struct request_sock *req)
2173{ 2163{
2174 struct inet_request_sock *ireq = inet_rsk(req); 2164 struct inet_request_sock *ireq = inet_rsk(req);
2175 struct tcp_sock *tp = tcp_sk(sk); 2165 struct tcp_sock *tp = tcp_sk(sk);
@@ -2212,12 +2202,11 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2212 TCP_ECN_make_synack(req, th); 2202 TCP_ECN_make_synack(req, th);
2213 th->source = inet_sk(sk)->sport; 2203 th->source = inet_sk(sk)->sport;
2214 th->dest = ireq->rmt_port; 2204 th->dest = ireq->rmt_port;
2215 TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; 2205 /* Setting of flags are superfluous here for callers (and ECE is
2216 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; 2206 * not even correctly set)
2217 TCP_SKB_CB(skb)->sacked = 0; 2207 */
2218 skb_shinfo(skb)->gso_segs = 1; 2208 tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
2219 skb_shinfo(skb)->gso_size = 0; 2209 TCPCB_FLAG_SYN | TCPCB_FLAG_ACK);
2220 skb_shinfo(skb)->gso_type = 0;
2221 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2210 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2222 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); 2211 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2223 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ 2212 if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -2249,7 +2238,6 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2249 NULL) 2238 NULL)
2250 ); 2239 );
2251 2240
2252 skb->csum = 0;
2253 th->doff = (tcp_header_size >> 2); 2241 th->doff = (tcp_header_size >> 2);
2254 TCP_INC_STATS(TCP_MIB_OUTSEGS); 2242 TCP_INC_STATS(TCP_MIB_OUTSEGS);
2255 2243
@@ -2341,23 +2329,17 @@ int tcp_connect(struct sock *sk)
2341 /* Reserve space for headers. */ 2329 /* Reserve space for headers. */
2342 skb_reserve(buff, MAX_TCP_HEADER); 2330 skb_reserve(buff, MAX_TCP_HEADER);
2343 2331
2344 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
2345 TCP_ECN_send_syn(sk, buff);
2346 TCP_SKB_CB(buff)->sacked = 0;
2347 skb_shinfo(buff)->gso_segs = 1;
2348 skb_shinfo(buff)->gso_size = 0;
2349 skb_shinfo(buff)->gso_type = 0;
2350 buff->csum = 0;
2351 tp->snd_nxt = tp->write_seq; 2332 tp->snd_nxt = tp->write_seq;
2352 TCP_SKB_CB(buff)->seq = tp->write_seq++; 2333 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
2353 TCP_SKB_CB(buff)->end_seq = tp->write_seq; 2334 TCP_ECN_send_syn(sk, buff);
2354 2335
2355 /* Send it off. */ 2336 /* Send it off. */
2356 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2337 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2357 tp->retrans_stamp = TCP_SKB_CB(buff)->when; 2338 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2358 skb_header_release(buff); 2339 skb_header_release(buff);
2359 __tcp_add_write_queue_tail(sk, buff); 2340 __tcp_add_write_queue_tail(sk, buff);
2360 sk_charge_skb(sk, buff); 2341 sk->sk_wmem_queued += buff->truesize;
2342 sk_mem_charge(sk, buff->truesize);
2361 tp->packets_out += tcp_skb_pcount(buff); 2343 tp->packets_out += tcp_skb_pcount(buff);
2362 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); 2344 tcp_transmit_skb(sk, buff, 1, GFP_KERNEL);
2363 2345
@@ -2386,9 +2368,10 @@ void tcp_send_delayed_ack(struct sock *sk)
2386 2368
2387 if (ato > TCP_DELACK_MIN) { 2369 if (ato > TCP_DELACK_MIN) {
2388 const struct tcp_sock *tp = tcp_sk(sk); 2370 const struct tcp_sock *tp = tcp_sk(sk);
2389 int max_ato = HZ/2; 2371 int max_ato = HZ / 2;
2390 2372
2391 if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) 2373 if (icsk->icsk_ack.pingpong ||
2374 (icsk->icsk_ack.pending & ICSK_ACK_PUSHED))
2392 max_ato = TCP_DELACK_MAX; 2375 max_ato = TCP_DELACK_MAX;
2393 2376
2394 /* Slow path, intersegment interval is "high". */ 2377 /* Slow path, intersegment interval is "high". */
@@ -2398,7 +2381,7 @@ void tcp_send_delayed_ack(struct sock *sk)
2398 * directly. 2381 * directly.
2399 */ 2382 */
2400 if (tp->srtt) { 2383 if (tp->srtt) {
2401 int rtt = max(tp->srtt>>3, TCP_DELACK_MIN); 2384 int rtt = max(tp->srtt >> 3, TCP_DELACK_MIN);
2402 2385
2403 if (rtt < max_ato) 2386 if (rtt < max_ato)
2404 max_ato = rtt; 2387 max_ato = rtt;
@@ -2432,37 +2415,32 @@ void tcp_send_delayed_ack(struct sock *sk)
2432/* This routine sends an ack and also updates the window. */ 2415/* This routine sends an ack and also updates the window. */
2433void tcp_send_ack(struct sock *sk) 2416void tcp_send_ack(struct sock *sk)
2434{ 2417{
2435 /* If we have been reset, we may not send again. */ 2418 struct sk_buff *buff;
2436 if (sk->sk_state != TCP_CLOSE) {
2437 struct sk_buff *buff;
2438 2419
2439 /* We are not putting this on the write queue, so 2420 /* If we have been reset, we may not send again. */
2440 * tcp_transmit_skb() will set the ownership to this 2421 if (sk->sk_state == TCP_CLOSE)
2441 * sock. 2422 return;
2442 */
2443 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2444 if (buff == NULL) {
2445 inet_csk_schedule_ack(sk);
2446 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2447 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2448 TCP_DELACK_MAX, TCP_RTO_MAX);
2449 return;
2450 }
2451 2423
2452 /* Reserve space for headers and prepare control bits. */ 2424 /* We are not putting this on the write queue, so
2453 skb_reserve(buff, MAX_TCP_HEADER); 2425 * tcp_transmit_skb() will set the ownership to this
2454 buff->csum = 0; 2426 * sock.
2455 TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; 2427 */
2456 TCP_SKB_CB(buff)->sacked = 0; 2428 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
2457 skb_shinfo(buff)->gso_segs = 1; 2429 if (buff == NULL) {
2458 skb_shinfo(buff)->gso_size = 0; 2430 inet_csk_schedule_ack(sk);
2459 skb_shinfo(buff)->gso_type = 0; 2431 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
2460 2432 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
2461 /* Send it off, this clears delayed acks for us. */ 2433 TCP_DELACK_MAX, TCP_RTO_MAX);
2462 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk); 2434 return;
2463 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2464 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2465 } 2435 }
2436
2437 /* Reserve space for headers and prepare control bits. */
2438 skb_reserve(buff, MAX_TCP_HEADER);
2439 tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPCB_FLAG_ACK);
2440
2441 /* Send it off, this clears delayed acks for us. */
2442 TCP_SKB_CB(buff)->when = tcp_time_stamp;
2443 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
2466} 2444}
2467 2445
2468/* This routine sends a packet with an out of date sequence 2446/* This routine sends a packet with an out of date sequence
@@ -2488,66 +2466,57 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
2488 2466
2489 /* Reserve space for headers and set control bits. */ 2467 /* Reserve space for headers and set control bits. */
2490 skb_reserve(skb, MAX_TCP_HEADER); 2468 skb_reserve(skb, MAX_TCP_HEADER);
2491 skb->csum = 0;
2492 TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
2493 TCP_SKB_CB(skb)->sacked = urgent;
2494 skb_shinfo(skb)->gso_segs = 1;
2495 skb_shinfo(skb)->gso_size = 0;
2496 skb_shinfo(skb)->gso_type = 0;
2497
2498 /* Use a previous sequence. This should cause the other 2469 /* Use a previous sequence. This should cause the other
2499 * end to send an ack. Don't queue or clone SKB, just 2470 * end to send an ack. Don't queue or clone SKB, just
2500 * send it. 2471 * send it.
2501 */ 2472 */
2502 TCP_SKB_CB(skb)->seq = urgent ? tp->snd_una : tp->snd_una - 1; 2473 tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPCB_FLAG_ACK);
2503 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq;
2504 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2474 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2505 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2475 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
2506} 2476}
2507 2477
2508int tcp_write_wakeup(struct sock *sk) 2478int tcp_write_wakeup(struct sock *sk)
2509{ 2479{
2510 if (sk->sk_state != TCP_CLOSE) { 2480 struct tcp_sock *tp = tcp_sk(sk);
2511 struct tcp_sock *tp = tcp_sk(sk); 2481 struct sk_buff *skb;
2512 struct sk_buff *skb;
2513
2514 if ((skb = tcp_send_head(sk)) != NULL &&
2515 before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
2516 int err;
2517 unsigned int mss = tcp_current_mss(sk, 0);
2518 unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
2519
2520 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2521 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2522
2523 /* We are probing the opening of a window
2524 * but the window size is != 0
2525 * must have been a result SWS avoidance ( sender )
2526 */
2527 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2528 skb->len > mss) {
2529 seg_size = min(seg_size, mss);
2530 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2531 if (tcp_fragment(sk, skb, seg_size, mss))
2532 return -1;
2533 } else if (!tcp_skb_pcount(skb))
2534 tcp_set_skb_tso_segs(sk, skb, mss);
2535 2482
2483 if (sk->sk_state == TCP_CLOSE)
2484 return -1;
2485
2486 if ((skb = tcp_send_head(sk)) != NULL &&
2487 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2488 int err;
2489 unsigned int mss = tcp_current_mss(sk, 0);
2490 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2491
2492 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
2493 tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
2494
2495 /* We are probing the opening of a window
2496 * but the window size is != 0
2497 * must have been a result SWS avoidance ( sender )
2498 */
2499 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
2500 skb->len > mss) {
2501 seg_size = min(seg_size, mss);
2536 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2502 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2537 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2503 if (tcp_fragment(sk, skb, seg_size, mss))
2538 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2504 return -1;
2539 if (!err) { 2505 } else if (!tcp_skb_pcount(skb))
2540 update_send_head(sk, skb); 2506 tcp_set_skb_tso_segs(sk, skb, mss);
2541 } 2507
2542 return err; 2508 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
2543 } else { 2509 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2544 if (tp->urg_mode && 2510 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2545 between(tp->snd_up, tp->snd_una+1, tp->snd_una+0xFFFF)) 2511 if (!err)
2546 tcp_xmit_probe_skb(sk, TCPCB_URG); 2512 tcp_event_new_data_sent(sk, skb);
2547 return tcp_xmit_probe_skb(sk, 0); 2513 return err;
2548 } 2514 } else {
2515 if (tp->urg_mode &&
2516 between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
2517 tcp_xmit_probe_skb(sk, 1);
2518 return tcp_xmit_probe_skb(sk, 0);
2549 } 2519 }
2550 return -1;
2551} 2520}
2552 2521
2553/* A window probe timeout has occurred. If window is not closed send 2522/* A window probe timeout has occurred. If window is not closed send