aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c544
1 files changed, 435 insertions, 109 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e17c244875c..e041d057ec86 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
49 * will allow a single TSO frame to consume. Building TSO frames 49 * will allow a single TSO frame to consume. Building TSO frames
50 * which are too large can cause TCP streams to be bursty. 50 * which are too large can cause TCP streams to be bursty.
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 8; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
@@ -140,11 +140,11 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
140 tp->ack.pingpong = 1; 140 tp->ack.pingpong = 1;
141} 141}
142 142
143static __inline__ void tcp_event_ack_sent(struct sock *sk) 143static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
144{ 144{
145 struct tcp_sock *tp = tcp_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk);
146 146
147 tcp_dec_quickack_mode(tp); 147 tcp_dec_quickack_mode(tp, pkts);
148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK); 148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
149} 149}
150 150
@@ -355,7 +355,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
355 tp->af_specific->send_check(sk, th, skb->len, skb); 355 tp->af_specific->send_check(sk, th, skb->len, skb);
356 356
357 if (tcb->flags & TCPCB_FLAG_ACK) 357 if (tcb->flags & TCPCB_FLAG_ACK)
358 tcp_event_ack_sent(sk); 358 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
359 359
360 if (skb->len != tcp_header_size) 360 if (skb->len != tcp_header_size)
361 tcp_event_data_sent(tp, skb, sk); 361 tcp_event_data_sent(tp, skb, sk);
@@ -403,42 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
403 sk->sk_send_head = skb; 403 sk->sk_send_head = skb;
404} 404}
405 405
406static inline void tcp_tso_set_push(struct sk_buff *skb) 406static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
407{
408 /* Force push to be on for any TSO frames to workaround
409 * problems with busted implementations like Mac OS-X that
410 * hold off socket receive wakeups until push is seen.
411 */
412 if (tcp_skb_pcount(skb) > 1)
413 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
414}
415
416/* Send _single_ skb sitting at the send head. This function requires
417 * true push pending frames to setup probe timer etc.
418 */
419void tcp_push_one(struct sock *sk, unsigned cur_mss)
420{ 407{
421 struct tcp_sock *tp = tcp_sk(sk); 408 struct tcp_sock *tp = tcp_sk(sk);
422 struct sk_buff *skb = sk->sk_send_head;
423 409
424 if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) { 410 if (skb->len <= tp->mss_cache ||
425 /* Send it out now. */
426 TCP_SKB_CB(skb)->when = tcp_time_stamp;
427 tcp_tso_set_push(skb);
428 if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
429 sk->sk_send_head = NULL;
430 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
431 tcp_packets_out_inc(sk, tp, skb);
432 return;
433 }
434 }
435}
436
437void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
438{
439 struct tcp_sock *tp = tcp_sk(sk);
440
441 if (skb->len <= tp->mss_cache_std ||
442 !(sk->sk_route_caps & NETIF_F_TSO)) { 411 !(sk->sk_route_caps & NETIF_F_TSO)) {
443 /* Avoid the costly divide in the normal 412 /* Avoid the costly divide in the normal
444 * non-TSO case. 413 * non-TSO case.
@@ -448,10 +417,10 @@ void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
448 } else { 417 } else {
449 unsigned int factor; 418 unsigned int factor;
450 419
451 factor = skb->len + (tp->mss_cache_std - 1); 420 factor = skb->len + (tp->mss_cache - 1);
452 factor /= tp->mss_cache_std; 421 factor /= tp->mss_cache;
453 skb_shinfo(skb)->tso_segs = factor; 422 skb_shinfo(skb)->tso_segs = factor;
454 skb_shinfo(skb)->tso_size = tp->mss_cache_std; 423 skb_shinfo(skb)->tso_size = tp->mss_cache;
455 } 424 }
456} 425}
457 426
@@ -537,6 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
537 } 506 }
538 507
539 /* Link BUFF into the send queue. */ 508 /* Link BUFF into the send queue. */
509 skb_header_release(buff);
540 __skb_append(skb, buff); 510 __skb_append(skb, buff);
541 511
542 return 0; 512 return 0;
@@ -657,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
657 627
658 /* And store cached results */ 628 /* And store cached results */
659 tp->pmtu_cookie = pmtu; 629 tp->pmtu_cookie = pmtu;
660 tp->mss_cache = tp->mss_cache_std = mss_now; 630 tp->mss_cache = mss_now;
661 631
662 return mss_now; 632 return mss_now;
663} 633}
@@ -669,57 +639,316 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
669 * cannot be large. However, taking into account rare use of URG, this 639 * cannot be large. However, taking into account rare use of URG, this
670 * is not a big flaw. 640 * is not a big flaw.
671 */ 641 */
672 642unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
673unsigned int tcp_current_mss(struct sock *sk, int large)
674{ 643{
675 struct tcp_sock *tp = tcp_sk(sk); 644 struct tcp_sock *tp = tcp_sk(sk);
676 struct dst_entry *dst = __sk_dst_get(sk); 645 struct dst_entry *dst = __sk_dst_get(sk);
677 unsigned int do_large, mss_now; 646 u32 mss_now;
647 u16 xmit_size_goal;
648 int doing_tso = 0;
649
650 mss_now = tp->mss_cache;
651
652 if (large_allowed &&
653 (sk->sk_route_caps & NETIF_F_TSO) &&
654 !tp->urg_mode)
655 doing_tso = 1;
678 656
679 mss_now = tp->mss_cache_std;
680 if (dst) { 657 if (dst) {
681 u32 mtu = dst_mtu(dst); 658 u32 mtu = dst_mtu(dst);
682 if (mtu != tp->pmtu_cookie) 659 if (mtu != tp->pmtu_cookie)
683 mss_now = tcp_sync_mss(sk, mtu); 660 mss_now = tcp_sync_mss(sk, mtu);
684 } 661 }
685 662
686 do_large = (large && 663 if (tp->rx_opt.eff_sacks)
687 (sk->sk_route_caps & NETIF_F_TSO) && 664 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
688 !tp->urg_mode); 665 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
689 666
690 if (do_large) { 667 xmit_size_goal = mss_now;
691 unsigned int large_mss, factor, limit;
692 668
693 large_mss = 65535 - tp->af_specific->net_header_len - 669 if (doing_tso) {
670 xmit_size_goal = 65535 -
671 tp->af_specific->net_header_len -
694 tp->ext_header_len - tp->tcp_header_len; 672 tp->ext_header_len - tp->tcp_header_len;
695 673
696 if (tp->max_window && large_mss > (tp->max_window>>1)) 674 if (tp->max_window &&
697 large_mss = max((tp->max_window>>1), 675 (xmit_size_goal > (tp->max_window >> 1)))
698 68U - tp->tcp_header_len); 676 xmit_size_goal = max((tp->max_window >> 1),
677 68U - tp->tcp_header_len);
678
679 xmit_size_goal -= (xmit_size_goal % mss_now);
680 }
681 tp->xmit_size_goal = xmit_size_goal;
699 682
700 factor = large_mss / mss_now; 683 return mss_now;
684}
701 685
702 /* Always keep large mss multiple of real mss, but 686/* Congestion window validation. (RFC2861) */
703 * do not exceed 1/tso_win_divisor of the congestion window
704 * so we can keep the ACK clock ticking and minimize
705 * bursting.
706 */
707 limit = tp->snd_cwnd;
708 if (sysctl_tcp_tso_win_divisor)
709 limit /= sysctl_tcp_tso_win_divisor;
710 limit = max(1U, limit);
711 if (factor > limit)
712 factor = limit;
713 687
714 tp->mss_cache = mss_now * factor; 688static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
689{
690 __u32 packets_out = tp->packets_out;
691
692 if (packets_out >= tp->snd_cwnd) {
693 /* Network is feed fully. */
694 tp->snd_cwnd_used = 0;
695 tp->snd_cwnd_stamp = tcp_time_stamp;
696 } else {
697 /* Network starves. */
698 if (tp->packets_out > tp->snd_cwnd_used)
699 tp->snd_cwnd_used = tp->packets_out;
715 700
716 mss_now = tp->mss_cache; 701 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
702 tcp_cwnd_application_limited(sk);
717 } 703 }
704}
718 705
719 if (tp->rx_opt.eff_sacks) 706static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
720 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 707{
721 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); 708 u32 window, cwnd_len;
722 return mss_now; 709
710 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
711 cwnd_len = mss_now * cwnd;
712 return min(window, cwnd_len);
713}
714
715/* Can at least one segment of SKB be sent right now, according to the
716 * congestion window rules? If so, return how many segments are allowed.
717 */
718static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
719{
720 u32 in_flight, cwnd;
721
722 /* Don't be strict about the congestion window for the final FIN. */
723 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
724 return 1;
725
726 in_flight = tcp_packets_in_flight(tp);
727 cwnd = tp->snd_cwnd;
728 if (in_flight < cwnd)
729 return (cwnd - in_flight);
730
731 return 0;
732}
733
734/* This must be invoked the first time we consider transmitting
735 * SKB onto the wire.
736 */
737static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
738{
739 int tso_segs = tcp_skb_pcount(skb);
740
741 if (!tso_segs) {
742 tcp_set_skb_tso_segs(sk, skb);
743 tso_segs = tcp_skb_pcount(skb);
744 }
745 return tso_segs;
746}
747
748static inline int tcp_minshall_check(const struct tcp_sock *tp)
749{
750 return after(tp->snd_sml,tp->snd_una) &&
751 !after(tp->snd_sml, tp->snd_nxt);
752}
753
754/* Return 0, if packet can be sent now without violation Nagle's rules:
755 * 1. It is full sized.
756 * 2. Or it contains FIN. (already checked by caller)
757 * 3. Or TCP_NODELAY was set.
758 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
759 * With Minshall's modification: all sent small packets are ACKed.
760 */
761
762static inline int tcp_nagle_check(const struct tcp_sock *tp,
763 const struct sk_buff *skb,
764 unsigned mss_now, int nonagle)
765{
766 return (skb->len < mss_now &&
767 ((nonagle&TCP_NAGLE_CORK) ||
768 (!nonagle &&
769 tp->packets_out &&
770 tcp_minshall_check(tp))));
771}
772
773/* Return non-zero if the Nagle test allows this packet to be
774 * sent now.
775 */
776static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
777 unsigned int cur_mss, int nonagle)
778{
779 /* Nagle rule does not apply to frames, which sit in the middle of the
780 * write_queue (they have no chances to get new data).
781 *
782 * This is implemented in the callers, where they modify the 'nonagle'
783 * argument based upon the location of SKB in the send queue.
784 */
785 if (nonagle & TCP_NAGLE_PUSH)
786 return 1;
787
788 /* Don't use the nagle rule for urgent data (or for the final FIN). */
789 if (tp->urg_mode ||
790 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
791 return 1;
792
793 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
794 return 1;
795
796 return 0;
797}
798
799/* Does at least the first segment of SKB fit into the send window? */
800static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
801{
802 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
803
804 if (skb->len > cur_mss)
805 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
806
807 return !after(end_seq, tp->snd_una + tp->snd_wnd);
808}
809
810/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
811 * should be put on the wire right now. If so, it returns the number of
812 * packets allowed by the congestion window.
813 */
814static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
815 unsigned int cur_mss, int nonagle)
816{
817 struct tcp_sock *tp = tcp_sk(sk);
818 unsigned int cwnd_quota;
819
820 tcp_init_tso_segs(sk, skb);
821
822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
823 return 0;
824
825 cwnd_quota = tcp_cwnd_test(tp, skb);
826 if (cwnd_quota &&
827 !tcp_snd_wnd_test(tp, skb, cur_mss))
828 cwnd_quota = 0;
829
830 return cwnd_quota;
831}
832
833static inline int tcp_skb_is_last(const struct sock *sk,
834 const struct sk_buff *skb)
835{
836 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
837}
838
839int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
840{
841 struct sk_buff *skb = sk->sk_send_head;
842
843 return (skb &&
844 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
845 (tcp_skb_is_last(sk, skb) ?
846 TCP_NAGLE_PUSH :
847 tp->nonagle)));
848}
849
850/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
851 * which is put after SKB on the list. It is very much like
852 * tcp_fragment() except that it may make several kinds of assumptions
853 * in order to speed up the splitting operation. In particular, we
854 * know that all the data is in scatter-gather pages, and that the
855 * packet has never been sent out before (and thus is not cloned).
856 */
857static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
858{
859 struct sk_buff *buff;
860 int nlen = skb->len - len;
861 u16 flags;
862
863 /* All of a TSO frame must be composed of paged data. */
864 BUG_ON(skb->len != skb->data_len);
865
866 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
867 if (unlikely(buff == NULL))
868 return -ENOMEM;
869
870 buff->truesize = nlen;
871 skb->truesize -= nlen;
872
873 /* Correct the sequence numbers. */
874 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
875 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
876 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
877
878 /* PSH and FIN should only be set in the second packet. */
879 flags = TCP_SKB_CB(skb)->flags;
880 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
881 TCP_SKB_CB(buff)->flags = flags;
882
883 /* This packet was never sent out yet, so no SACK bits. */
884 TCP_SKB_CB(buff)->sacked = 0;
885
886 buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
887 skb_split(skb, buff, len);
888
889 /* Fix up tso_factor for both original and new SKB. */
890 tcp_set_skb_tso_segs(sk, skb);
891 tcp_set_skb_tso_segs(sk, buff);
892
893 /* Link BUFF into the send queue. */
894 skb_header_release(buff);
895 __skb_append(skb, buff);
896
897 return 0;
898}
899
900/* Try to defer sending, if possible, in order to minimize the amount
901 * of TSO splitting we do. View it as a kind of TSO Nagle test.
902 *
903 * This algorithm is from John Heffner.
904 */
905static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
906{
907 u32 send_win, cong_win, limit, in_flight;
908
909 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
910 return 0;
911
912 if (tp->ca_state != TCP_CA_Open)
913 return 0;
914
915 in_flight = tcp_packets_in_flight(tp);
916
917 BUG_ON(tcp_skb_pcount(skb) <= 1 ||
918 (tp->snd_cwnd <= in_flight));
919
920 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
921
922 /* From in_flight test above, we know that cwnd > in_flight. */
923 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
924
925 limit = min(send_win, cong_win);
926
927 /* If sk_send_head can be sent fully now, just do it. */
928 if (skb->len <= limit)
929 return 0;
930
931 if (sysctl_tcp_tso_win_divisor) {
932 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
933
934 /* If at least some fraction of a window is available,
935 * just use it.
936 */
937 chunk /= sysctl_tcp_tso_win_divisor;
938 if (limit >= chunk)
939 return 0;
940 } else {
941 /* Different approach, try not to defer past a single
942 * ACK. Receiver should ACK every other full sized
943 * frame, so if we have space for more than 3 frames
944 * then send now.
945 */
946 if (limit > tcp_max_burst(tp) * tp->mss_cache)
947 return 0;
948 }
949
950 /* Ok, it looks like it is advisable to defer. */
951 return 1;
723} 952}
724 953
725/* This routine writes packets to the network. It advances the 954/* This routine writes packets to the network. It advances the
@@ -729,57 +958,158 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
729 * Returns 1, if no segments are in flight and we have queued segments, but 958 * Returns 1, if no segments are in flight and we have queued segments, but
730 * cannot send anything now because of SWS or another problem. 959 * cannot send anything now because of SWS or another problem.
731 */ 960 */
732int tcp_write_xmit(struct sock *sk, int nonagle) 961static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
733{ 962{
734 struct tcp_sock *tp = tcp_sk(sk); 963 struct tcp_sock *tp = tcp_sk(sk);
735 unsigned int mss_now; 964 struct sk_buff *skb;
965 unsigned int tso_segs, sent_pkts;
966 int cwnd_quota;
736 967
737 /* If we are closed, the bytes will have to remain here. 968 /* If we are closed, the bytes will have to remain here.
738 * In time closedown will finish, we empty the write queue and all 969 * In time closedown will finish, we empty the write queue and all
739 * will be happy. 970 * will be happy.
740 */ 971 */
741 if (sk->sk_state != TCP_CLOSE) { 972 if (unlikely(sk->sk_state == TCP_CLOSE))
742 struct sk_buff *skb; 973 return 0;
743 int sent_pkts = 0; 974
975 skb = sk->sk_send_head;
976 if (unlikely(!skb))
977 return 0;
978
979 tso_segs = tcp_init_tso_segs(sk, skb);
980 cwnd_quota = tcp_cwnd_test(tp, skb);
981 if (unlikely(!cwnd_quota))
982 goto out;
983
984 sent_pkts = 0;
985 while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
986 BUG_ON(!tso_segs);
987
988 if (tso_segs == 1) {
989 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
990 (tcp_skb_is_last(sk, skb) ?
991 nonagle : TCP_NAGLE_PUSH))))
992 break;
993 } else {
994 if (tcp_tso_should_defer(sk, tp, skb))
995 break;
996 }
744 997
745 /* Account for SACKS, we may need to fragment due to this. 998 if (tso_segs > 1) {
746 * It is just like the real MSS changing on us midstream. 999 u32 limit = tcp_window_allows(tp, skb,
747 * We also handle things correctly when the user adds some 1000 mss_now, cwnd_quota);
748 * IP options mid-stream. Silly to do, but cover it. 1001
749 */ 1002 if (skb->len < limit) {
750 mss_now = tcp_current_mss(sk, 1); 1003 unsigned int trim = skb->len % mss_now;
751 1004
752 while ((skb = sk->sk_send_head) && 1005 if (trim)
753 tcp_snd_test(sk, skb, mss_now, 1006 limit = skb->len - trim;
754 tcp_skb_is_last(sk, skb) ? nonagle : 1007 }
755 TCP_NAGLE_PUSH)) { 1008 if (skb->len > limit) {
756 if (skb->len > mss_now) { 1009 if (tso_fragment(sk, skb, limit))
757 if (tcp_fragment(sk, skb, mss_now))
758 break; 1010 break;
759 } 1011 }
760 1012 } else if (unlikely(skb->len > mss_now)) {
761 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1013 if (unlikely(tcp_fragment(sk, skb, mss_now)))
762 tcp_tso_set_push(skb);
763 if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
764 break; 1014 break;
1015 }
765 1016
766 /* Advance the send_head. This one is sent out. 1017 TCP_SKB_CB(skb)->when = tcp_time_stamp;
767 * This call will increment packets_out. 1018
768 */ 1019 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
769 update_send_head(sk, tp, skb); 1020 break;
1021
1022 /* Advance the send_head. This one is sent out.
1023 * This call will increment packets_out.
1024 */
1025 update_send_head(sk, tp, skb);
1026
1027 tcp_minshall_update(tp, mss_now, skb);
1028 sent_pkts++;
1029
1030 /* Do not optimize this to use tso_segs. If we chopped up
1031 * the packet above, tso_segs will no longer be valid.
1032 */
1033 cwnd_quota -= tcp_skb_pcount(skb);
1034
1035 BUG_ON(cwnd_quota < 0);
1036 if (!cwnd_quota)
1037 break;
1038
1039 skb = sk->sk_send_head;
1040 if (!skb)
1041 break;
1042 tso_segs = tcp_init_tso_segs(sk, skb);
1043 }
1044
1045 if (likely(sent_pkts)) {
1046 tcp_cwnd_validate(sk, tp);
1047 return 0;
1048 }
1049out:
1050 return !tp->packets_out && sk->sk_send_head;
1051}
1052
1053/* Push out any pending frames which were held back due to
1054 * TCP_CORK or attempt at coalescing tiny packets.
1055 * The socket must be locked by the caller.
1056 */
1057void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
1058 unsigned int cur_mss, int nonagle)
1059{
1060 struct sk_buff *skb = sk->sk_send_head;
770 1061
771 tcp_minshall_update(tp, mss_now, skb); 1062 if (skb) {
772 sent_pkts = 1; 1063 if (tcp_write_xmit(sk, cur_mss, nonagle))
1064 tcp_check_probe_timer(sk, tp);
1065 }
1066}
1067
1068/* Send _single_ skb sitting at the send head. This function requires
1069 * true push pending frames to setup probe timer etc.
1070 */
1071void tcp_push_one(struct sock *sk, unsigned int mss_now)
1072{
1073 struct tcp_sock *tp = tcp_sk(sk);
1074 struct sk_buff *skb = sk->sk_send_head;
1075 unsigned int tso_segs, cwnd_quota;
1076
1077 BUG_ON(!skb || skb->len < mss_now);
1078
1079 tso_segs = tcp_init_tso_segs(sk, skb);
1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1081
1082 if (likely(cwnd_quota)) {
1083 BUG_ON(!tso_segs);
1084
1085 if (tso_segs > 1) {
1086 u32 limit = tcp_window_allows(tp, skb,
1087 mss_now, cwnd_quota);
1088
1089 if (skb->len < limit) {
1090 unsigned int trim = skb->len % mss_now;
1091
1092 if (trim)
1093 limit = skb->len - trim;
1094 }
1095 if (skb->len > limit) {
1096 if (unlikely(tso_fragment(sk, skb, limit)))
1097 return;
1098 }
1099 } else if (unlikely(skb->len > mss_now)) {
1100 if (unlikely(tcp_fragment(sk, skb, mss_now)))
1101 return;
773 } 1102 }
774 1103
775 if (sent_pkts) { 1104 /* Send it out now. */
1105 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1106
1107 if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
1108 update_send_head(sk, tp, skb);
776 tcp_cwnd_validate(sk, tp); 1109 tcp_cwnd_validate(sk, tp);
777 return 0; 1110 return;
778 } 1111 }
779
780 return !tp->packets_out && sk->sk_send_head;
781 } 1112 }
782 return 0;
783} 1113}
784 1114
785/* This function returns the amount that we can raise the 1115/* This function returns the amount that we can raise the
@@ -1039,7 +1369,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1039 if (sk->sk_route_caps & NETIF_F_TSO) { 1369 if (sk->sk_route_caps & NETIF_F_TSO) {
1040 sk->sk_route_caps &= ~NETIF_F_TSO; 1370 sk->sk_route_caps &= ~NETIF_F_TSO;
1041 sock_set_flag(sk, SOCK_NO_LARGESEND); 1371 sock_set_flag(sk, SOCK_NO_LARGESEND);
1042 tp->mss_cache = tp->mss_cache_std;
1043 } 1372 }
1044 1373
1045 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) 1374 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
@@ -1101,7 +1430,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1101 * is still in somebody's hands, else make a clone. 1430 * is still in somebody's hands, else make a clone.
1102 */ 1431 */
1103 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1432 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1104 tcp_tso_set_push(skb);
1105 1433
1106 err = tcp_transmit_skb(sk, (skb_cloned(skb) ? 1434 err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
1107 pskb_copy(skb, GFP_ATOMIC): 1435 pskb_copy(skb, GFP_ATOMIC):
@@ -1670,14 +1998,12 @@ int tcp_write_wakeup(struct sock *sk)
1670 if (sk->sk_route_caps & NETIF_F_TSO) { 1998 if (sk->sk_route_caps & NETIF_F_TSO) {
1671 sock_set_flag(sk, SOCK_NO_LARGESEND); 1999 sock_set_flag(sk, SOCK_NO_LARGESEND);
1672 sk->sk_route_caps &= ~NETIF_F_TSO; 2000 sk->sk_route_caps &= ~NETIF_F_TSO;
1673 tp->mss_cache = tp->mss_cache_std;
1674 } 2001 }
1675 } else if (!tcp_skb_pcount(skb)) 2002 } else if (!tcp_skb_pcount(skb))
1676 tcp_set_skb_tso_segs(sk, skb); 2003 tcp_set_skb_tso_segs(sk, skb);
1677 2004
1678 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1679 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2006 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1680 tcp_tso_set_push(skb);
1681 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); 2007 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1682 if (!err) { 2008 if (!err) {
1683 update_send_head(sk, tp, skb); 2009 update_send_head(sk, tp, skb);