aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /net/ipv4/tcp_output.c
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c742
1 files changed, 176 insertions, 566 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5d451593ef1..faf257b9415 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -34,8 +34,6 @@
34 * 34 *
35 */ 35 */
36 36
37#define pr_fmt(fmt) "TCP: " fmt
38
39#include <net/tcp.h> 37#include <net/tcp.h>
40 38
41#include <linux/compiler.h> 39#include <linux/compiler.h>
@@ -50,9 +48,6 @@ int sysctl_tcp_retrans_collapse __read_mostly = 1;
50 */ 48 */
51int sysctl_tcp_workaround_signed_windows __read_mostly = 0; 49int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
52 50
53/* Default TSQ limit of two TSO segments */
54int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
55
56/* This limits the percentage of the congestion window which we 51/* This limits the percentage of the congestion window which we
57 * will allow a single TSO frame to consume. Building TSO frames 52 * will allow a single TSO frame to consume. Building TSO frames
58 * which are too large can cause TCP streams to be bursty. 53 * which are too large can cause TCP streams to be bursty.
@@ -68,11 +63,9 @@ int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
68int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */ 63int sysctl_tcp_cookie_size __read_mostly = 0; /* TCP_COOKIE_MAX */
69EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); 64EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size);
70 65
71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
72 int push_one, gfp_t gfp);
73 66
74/* Account for new data that has been sent to the network. */ 67/* Account for new data that has been sent to the network. */
75static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) 68static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
76{ 69{
77 struct tcp_sock *tp = tcp_sk(sk); 70 struct tcp_sock *tp = tcp_sk(sk);
78 unsigned int prior_packets = tp->packets_out; 71 unsigned int prior_packets = tp->packets_out;
@@ -85,8 +78,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
85 tp->frto_counter = 3; 78 tp->frto_counter = 3;
86 79
87 tp->packets_out += tcp_skb_pcount(skb); 80 tp->packets_out += tcp_skb_pcount(skb);
88 if (!prior_packets || tp->early_retrans_delayed) 81 if (!prior_packets)
89 tcp_rearm_rto(sk); 82 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
83 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
90} 84}
91 85
92/* SND.NXT, if window was not shrunk. 86/* SND.NXT, if window was not shrunk.
@@ -95,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
95 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already 89 * Anything in between SND.UNA...SND.UNA+SND.WND also can be already
96 * invalid. OK, let's make this for now: 90 * invalid. OK, let's make this for now:
97 */ 91 */
98static inline __u32 tcp_acceptable_seq(const struct sock *sk) 92static inline __u32 tcp_acceptable_seq(struct sock *sk)
99{ 93{
100 const struct tcp_sock *tp = tcp_sk(sk); 94 struct tcp_sock *tp = tcp_sk(sk);
101 95
102 if (!before(tcp_wnd_end(tp), tp->snd_nxt)) 96 if (!before(tcp_wnd_end(tp), tp->snd_nxt))
103 return tp->snd_nxt; 97 return tp->snd_nxt;
@@ -122,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(const struct sock *sk)
122static __u16 tcp_advertise_mss(struct sock *sk) 116static __u16 tcp_advertise_mss(struct sock *sk)
123{ 117{
124 struct tcp_sock *tp = tcp_sk(sk); 118 struct tcp_sock *tp = tcp_sk(sk);
125 const struct dst_entry *dst = __sk_dst_get(sk); 119 struct dst_entry *dst = __sk_dst_get(sk);
126 int mss = tp->advmss; 120 int mss = tp->advmss;
127 121
128 if (dst) { 122 if (dst) {
@@ -139,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk)
139 133
140/* RFC2861. Reset CWND after idle period longer RTO to "restart window". 134/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
141 * This is the first part of cwnd validation mechanism. */ 135 * This is the first part of cwnd validation mechanism. */
142static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) 136static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
143{ 137{
144 struct tcp_sock *tp = tcp_sk(sk); 138 struct tcp_sock *tp = tcp_sk(sk);
145 s32 delta = tcp_time_stamp - tp->lsndtime; 139 s32 delta = tcp_time_stamp - tp->lsndtime;
@@ -160,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst)
160 154
161/* Congestion state accounting after a packet has been sent. */ 155/* Congestion state accounting after a packet has been sent. */
162static void tcp_event_data_sent(struct tcp_sock *tp, 156static void tcp_event_data_sent(struct tcp_sock *tp,
163 struct sock *sk) 157 struct sk_buff *skb, struct sock *sk)
164{ 158{
165 struct inet_connection_sock *icsk = inet_csk(sk); 159 struct inet_connection_sock *icsk = inet_csk(sk);
166 const u32 now = tcp_time_stamp; 160 const u32 now = tcp_time_stamp;
@@ -301,11 +295,11 @@ static u16 tcp_select_window(struct sock *sk)
301} 295}
302 296
303/* Packet ECN state for a SYN-ACK */ 297/* Packet ECN state for a SYN-ACK */
304static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) 298static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb)
305{ 299{
306 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; 300 TCP_SKB_CB(skb)->flags &= ~TCPHDR_CWR;
307 if (!(tp->ecn_flags & TCP_ECN_OK)) 301 if (!(tp->ecn_flags & TCP_ECN_OK))
308 TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; 302 TCP_SKB_CB(skb)->flags &= ~TCPHDR_ECE;
309} 303}
310 304
311/* Packet ECN state for a SYN. */ 305/* Packet ECN state for a SYN. */
@@ -315,13 +309,13 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb)
315 309
316 tp->ecn_flags = 0; 310 tp->ecn_flags = 0;
317 if (sysctl_tcp_ecn == 1) { 311 if (sysctl_tcp_ecn == 1) {
318 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; 312 TCP_SKB_CB(skb)->flags |= TCPHDR_ECE | TCPHDR_CWR;
319 tp->ecn_flags = TCP_ECN_OK; 313 tp->ecn_flags = TCP_ECN_OK;
320 } 314 }
321} 315}
322 316
323static __inline__ void 317static __inline__ void
324TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) 318TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th)
325{ 319{
326 if (inet_rsk(req)->ecn_ok) 320 if (inet_rsk(req)->ecn_ok)
327 th->ece = 1; 321 th->ece = 1;
@@ -362,7 +356,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
362 skb->ip_summed = CHECKSUM_PARTIAL; 356 skb->ip_summed = CHECKSUM_PARTIAL;
363 skb->csum = 0; 357 skb->csum = 0;
364 358
365 TCP_SKB_CB(skb)->tcp_flags = flags; 359 TCP_SKB_CB(skb)->flags = flags;
366 TCP_SKB_CB(skb)->sacked = 0; 360 TCP_SKB_CB(skb)->sacked = 0;
367 361
368 skb_shinfo(skb)->gso_segs = 1; 362 skb_shinfo(skb)->gso_segs = 1;
@@ -375,7 +369,7 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
375 TCP_SKB_CB(skb)->end_seq = seq; 369 TCP_SKB_CB(skb)->end_seq = seq;
376} 370}
377 371
378static inline bool tcp_urg_mode(const struct tcp_sock *tp) 372static inline int tcp_urg_mode(const struct tcp_sock *tp)
379{ 373{
380 return tp->snd_una != tp->snd_up; 374 return tp->snd_una != tp->snd_up;
381} 375}
@@ -385,17 +379,15 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp)
385#define OPTION_MD5 (1 << 2) 379#define OPTION_MD5 (1 << 2)
386#define OPTION_WSCALE (1 << 3) 380#define OPTION_WSCALE (1 << 3)
387#define OPTION_COOKIE_EXTENSION (1 << 4) 381#define OPTION_COOKIE_EXTENSION (1 << 4)
388#define OPTION_FAST_OPEN_COOKIE (1 << 8)
389 382
390struct tcp_out_options { 383struct tcp_out_options {
391 u16 options; /* bit field of OPTION_* */ 384 u8 options; /* bit field of OPTION_* */
392 u16 mss; /* 0 to disable */
393 u8 ws; /* window scale, 0 to disable */ 385 u8 ws; /* window scale, 0 to disable */
394 u8 num_sack_blocks; /* number of SACK blocks to include */ 386 u8 num_sack_blocks; /* number of SACK blocks to include */
395 u8 hash_size; /* bytes in hash_location */ 387 u8 hash_size; /* bytes in hash_location */
396 __u8 *hash_location; /* temporary pointer, overloaded */ 388 u16 mss; /* 0 to disable */
397 __u32 tsval, tsecr; /* need to include OPTION_TS */ 389 __u32 tsval, tsecr; /* need to include OPTION_TS */
398 struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ 390 __u8 *hash_location; /* temporary pointer, overloaded */
399}; 391};
400 392
401/* The sysctl int routines are generic, so check consistency here. 393/* The sysctl int routines are generic, so check consistency here.
@@ -444,7 +436,7 @@ static u8 tcp_cookie_size_check(u8 desired)
444static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, 436static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
445 struct tcp_out_options *opts) 437 struct tcp_out_options *opts)
446{ 438{
447 u16 options = opts->options; /* mungable copy */ 439 u8 options = opts->options; /* mungable copy */
448 440
449 /* Having both authentication and cookies for security is redundant, 441 /* Having both authentication and cookies for security is redundant,
450 * and there's certainly not enough room. Instead, the cookie-less 442 * and there's certainly not enough room. Instead, the cookie-less
@@ -566,37 +558,20 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
566 558
567 tp->rx_opt.dsack = 0; 559 tp->rx_opt.dsack = 0;
568 } 560 }
569
570 if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
571 struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
572
573 *ptr++ = htonl((TCPOPT_EXP << 24) |
574 ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) |
575 TCPOPT_FASTOPEN_MAGIC);
576
577 memcpy(ptr, foc->val, foc->len);
578 if ((foc->len & 3) == 2) {
579 u8 *align = ((u8 *)ptr) + foc->len;
580 align[0] = align[1] = TCPOPT_NOP;
581 }
582 ptr += (foc->len + 3) >> 2;
583 }
584} 561}
585 562
586/* Compute TCP options for SYN packets. This is not the final 563/* Compute TCP options for SYN packets. This is not the final
587 * network wire format yet. 564 * network wire format yet.
588 */ 565 */
589static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, 566static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
590 struct tcp_out_options *opts, 567 struct tcp_out_options *opts,
591 struct tcp_md5sig_key **md5) 568 struct tcp_md5sig_key **md5) {
592{
593 struct tcp_sock *tp = tcp_sk(sk); 569 struct tcp_sock *tp = tcp_sk(sk);
594 struct tcp_cookie_values *cvp = tp->cookie_values; 570 struct tcp_cookie_values *cvp = tp->cookie_values;
595 unsigned int remaining = MAX_TCP_OPTION_SPACE; 571 unsigned remaining = MAX_TCP_OPTION_SPACE;
596 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ? 572 u8 cookie_size = (!tp->rx_opt.cookie_out_never && cvp != NULL) ?
597 tcp_cookie_size_check(cvp->cookie_desired) : 573 tcp_cookie_size_check(cvp->cookie_desired) :
598 0; 574 0;
599 struct tcp_fastopen_request *fastopen = tp->fastopen_req;
600 575
601#ifdef CONFIG_TCP_MD5SIG 576#ifdef CONFIG_TCP_MD5SIG
602 *md5 = tp->af_specific->md5_lookup(sk, sk); 577 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -637,16 +612,6 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
637 remaining -= TCPOLEN_SACKPERM_ALIGNED; 612 remaining -= TCPOLEN_SACKPERM_ALIGNED;
638 } 613 }
639 614
640 if (fastopen && fastopen->cookie.len >= 0) {
641 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len;
642 need = (need + 3) & ~3U; /* Align to 32 bits */
643 if (remaining >= need) {
644 opts->options |= OPTION_FAST_OPEN_COOKIE;
645 opts->fastopen_cookie = &fastopen->cookie;
646 remaining -= need;
647 tp->syn_fastopen = 1;
648 }
649 }
650 /* Note that timestamps are required by the specification. 615 /* Note that timestamps are required by the specification.
651 * 616 *
652 * Odd numbers of bytes are prohibited by the specification, ensuring 617 * Odd numbers of bytes are prohibited by the specification, ensuring
@@ -697,16 +662,15 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
697} 662}
698 663
699/* Set up TCP options for SYN-ACKs. */ 664/* Set up TCP options for SYN-ACKs. */
700static unsigned int tcp_synack_options(struct sock *sk, 665static unsigned tcp_synack_options(struct sock *sk,
701 struct request_sock *req, 666 struct request_sock *req,
702 unsigned int mss, struct sk_buff *skb, 667 unsigned mss, struct sk_buff *skb,
703 struct tcp_out_options *opts, 668 struct tcp_out_options *opts,
704 struct tcp_md5sig_key **md5, 669 struct tcp_md5sig_key **md5,
705 struct tcp_extend_values *xvp, 670 struct tcp_extend_values *xvp)
706 struct tcp_fastopen_cookie *foc)
707{ 671{
708 struct inet_request_sock *ireq = inet_rsk(req); 672 struct inet_request_sock *ireq = inet_rsk(req);
709 unsigned int remaining = MAX_TCP_OPTION_SPACE; 673 unsigned remaining = MAX_TCP_OPTION_SPACE;
710 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ? 674 u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
711 xvp->cookie_plus : 675 xvp->cookie_plus :
712 0; 676 0;
@@ -748,15 +712,7 @@ static unsigned int tcp_synack_options(struct sock *sk,
748 if (unlikely(!ireq->tstamp_ok)) 712 if (unlikely(!ireq->tstamp_ok))
749 remaining -= TCPOLEN_SACKPERM_ALIGNED; 713 remaining -= TCPOLEN_SACKPERM_ALIGNED;
750 } 714 }
751 if (foc != NULL) { 715
752 u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
753 need = (need + 3) & ~3U; /* Align to 32 bits */
754 if (remaining >= need) {
755 opts->options |= OPTION_FAST_OPEN_COOKIE;
756 opts->fastopen_cookie = foc;
757 remaining -= need;
758 }
759 }
760 /* Similar rationale to tcp_syn_options() applies here, too. 716 /* Similar rationale to tcp_syn_options() applies here, too.
761 * If the <SYN> options fit, the same options should fit now! 717 * If the <SYN> options fit, the same options should fit now!
762 */ 718 */
@@ -785,13 +741,12 @@ static unsigned int tcp_synack_options(struct sock *sk,
785/* Compute TCP options for ESTABLISHED sockets. This is not the 741/* Compute TCP options for ESTABLISHED sockets. This is not the
786 * final wire format yet. 742 * final wire format yet.
787 */ 743 */
788static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, 744static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
789 struct tcp_out_options *opts, 745 struct tcp_out_options *opts,
790 struct tcp_md5sig_key **md5) 746 struct tcp_md5sig_key **md5) {
791{
792 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; 747 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
793 struct tcp_sock *tp = tcp_sk(sk); 748 struct tcp_sock *tp = tcp_sk(sk);
794 unsigned int size = 0; 749 unsigned size = 0;
795 unsigned int eff_sacks; 750 unsigned int eff_sacks;
796 751
797#ifdef CONFIG_TCP_MD5SIG 752#ifdef CONFIG_TCP_MD5SIG
@@ -813,9 +768,9 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
813 768
814 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; 769 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
815 if (unlikely(eff_sacks)) { 770 if (unlikely(eff_sacks)) {
816 const unsigned int remaining = MAX_TCP_OPTION_SPACE - size; 771 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
817 opts->num_sack_blocks = 772 opts->num_sack_blocks =
818 min_t(unsigned int, eff_sacks, 773 min_t(unsigned, eff_sacks,
819 (remaining - TCPOLEN_SACK_BASE_ALIGNED) / 774 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
820 TCPOLEN_SACK_PERBLOCK); 775 TCPOLEN_SACK_PERBLOCK);
821 size += TCPOLEN_SACK_BASE_ALIGNED + 776 size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -825,160 +780,6 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
825 return size; 780 return size;
826} 781}
827 782
828
829/* TCP SMALL QUEUES (TSQ)
830 *
831 * TSQ goal is to keep small amount of skbs per tcp flow in tx queues (qdisc+dev)
832 * to reduce RTT and bufferbloat.
833 * We do this using a special skb destructor (tcp_wfree).
834 *
835 * Its important tcp_wfree() can be replaced by sock_wfree() in the event skb
836 * needs to be reallocated in a driver.
837 * The invariant being skb->truesize substracted from sk->sk_wmem_alloc
838 *
839 * Since transmit from skb destructor is forbidden, we use a tasklet
840 * to process all sockets that eventually need to send more skbs.
841 * We use one tasklet per cpu, with its own queue of sockets.
842 */
843struct tsq_tasklet {
844 struct tasklet_struct tasklet;
845 struct list_head head; /* queue of tcp sockets */
846};
847static DEFINE_PER_CPU(struct tsq_tasklet, tsq_tasklet);
848
849static void tcp_tsq_handler(struct sock *sk)
850{
851 if ((1 << sk->sk_state) &
852 (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
853 TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
854 tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
855}
856/*
857 * One tasklest per cpu tries to send more skbs.
858 * We run in tasklet context but need to disable irqs when
859 * transfering tsq->head because tcp_wfree() might
860 * interrupt us (non NAPI drivers)
861 */
862static void tcp_tasklet_func(unsigned long data)
863{
864 struct tsq_tasklet *tsq = (struct tsq_tasklet *)data;
865 LIST_HEAD(list);
866 unsigned long flags;
867 struct list_head *q, *n;
868 struct tcp_sock *tp;
869 struct sock *sk;
870
871 local_irq_save(flags);
872 list_splice_init(&tsq->head, &list);
873 local_irq_restore(flags);
874
875 list_for_each_safe(q, n, &list) {
876 tp = list_entry(q, struct tcp_sock, tsq_node);
877 list_del(&tp->tsq_node);
878
879 sk = (struct sock *)tp;
880 bh_lock_sock(sk);
881
882 if (!sock_owned_by_user(sk)) {
883 tcp_tsq_handler(sk);
884 } else {
885 /* defer the work to tcp_release_cb() */
886 set_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
887 }
888 bh_unlock_sock(sk);
889
890 clear_bit(TSQ_QUEUED, &tp->tsq_flags);
891 sk_free(sk);
892 }
893}
894
895#define TCP_DEFERRED_ALL ((1UL << TCP_TSQ_DEFERRED) | \
896 (1UL << TCP_WRITE_TIMER_DEFERRED) | \
897 (1UL << TCP_DELACK_TIMER_DEFERRED) | \
898 (1UL << TCP_MTU_REDUCED_DEFERRED))
899/**
900 * tcp_release_cb - tcp release_sock() callback
901 * @sk: socket
902 *
903 * called from release_sock() to perform protocol dependent
904 * actions before socket release.
905 */
906void tcp_release_cb(struct sock *sk)
907{
908 struct tcp_sock *tp = tcp_sk(sk);
909 unsigned long flags, nflags;
910
911 /* perform an atomic operation only if at least one flag is set */
912 do {
913 flags = tp->tsq_flags;
914 if (!(flags & TCP_DEFERRED_ALL))
915 return;
916 nflags = flags & ~TCP_DEFERRED_ALL;
917 } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
918
919 if (flags & (1UL << TCP_TSQ_DEFERRED))
920 tcp_tsq_handler(sk);
921
922 if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
923 tcp_write_timer_handler(sk);
924 __sock_put(sk);
925 }
926 if (flags & (1UL << TCP_DELACK_TIMER_DEFERRED)) {
927 tcp_delack_timer_handler(sk);
928 __sock_put(sk);
929 }
930 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
931 sk->sk_prot->mtu_reduced(sk);
932 __sock_put(sk);
933 }
934}
935EXPORT_SYMBOL(tcp_release_cb);
936
937void __init tcp_tasklet_init(void)
938{
939 int i;
940
941 for_each_possible_cpu(i) {
942 struct tsq_tasklet *tsq = &per_cpu(tsq_tasklet, i);
943
944 INIT_LIST_HEAD(&tsq->head);
945 tasklet_init(&tsq->tasklet,
946 tcp_tasklet_func,
947 (unsigned long)tsq);
948 }
949}
950
951/*
952 * Write buffer destructor automatically called from kfree_skb.
953 * We cant xmit new skbs from this context, as we might already
954 * hold qdisc lock.
955 */
956static void tcp_wfree(struct sk_buff *skb)
957{
958 struct sock *sk = skb->sk;
959 struct tcp_sock *tp = tcp_sk(sk);
960
961 if (test_and_clear_bit(TSQ_THROTTLED, &tp->tsq_flags) &&
962 !test_and_set_bit(TSQ_QUEUED, &tp->tsq_flags)) {
963 unsigned long flags;
964 struct tsq_tasklet *tsq;
965
966 /* Keep a ref on socket.
967 * This last ref will be released in tcp_tasklet_func()
968 */
969 atomic_sub(skb->truesize - 1, &sk->sk_wmem_alloc);
970
971 /* queue this socket to tasklet queue */
972 local_irq_save(flags);
973 tsq = &__get_cpu_var(tsq_tasklet);
974 list_add(&tp->tsq_node, &tsq->head);
975 tasklet_schedule(&tsq->tasklet);
976 local_irq_restore(flags);
977 } else {
978 sock_wfree(skb);
979 }
980}
981
982/* This routine actually transmits TCP packets queued in by 783/* This routine actually transmits TCP packets queued in by
983 * tcp_do_sendmsg(). This is used by both the initial 784 * tcp_do_sendmsg(). This is used by both the initial
984 * transmission and possible later retransmissions. 785 * transmission and possible later retransmissions.
@@ -998,7 +799,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
998 struct tcp_sock *tp; 799 struct tcp_sock *tp;
999 struct tcp_skb_cb *tcb; 800 struct tcp_skb_cb *tcb;
1000 struct tcp_out_options opts; 801 struct tcp_out_options opts;
1001 unsigned int tcp_options_size, tcp_header_size; 802 unsigned tcp_options_size, tcp_header_size;
1002 struct tcp_md5sig_key *md5; 803 struct tcp_md5sig_key *md5;
1003 struct tcphdr *th; 804 struct tcphdr *th;
1004 int err; 805 int err;
@@ -1025,7 +826,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1025 tcb = TCP_SKB_CB(skb); 826 tcb = TCP_SKB_CB(skb);
1026 memset(&opts, 0, sizeof(opts)); 827 memset(&opts, 0, sizeof(opts));
1027 828
1028 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) 829 if (unlikely(tcb->flags & TCPHDR_SYN))
1029 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); 830 tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
1030 else 831 else
1031 tcp_options_size = tcp_established_options(sk, skb, &opts, 832 tcp_options_size = tcp_established_options(sk, skb, &opts,
@@ -1040,12 +841,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1040 841
1041 skb_push(skb, tcp_header_size); 842 skb_push(skb, tcp_header_size);
1042 skb_reset_transport_header(skb); 843 skb_reset_transport_header(skb);
1043 844 skb_set_owner_w(skb, sk);
1044 skb_orphan(skb);
1045 skb->sk = sk;
1046 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
1047 tcp_wfree : sock_wfree;
1048 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
1049 845
1050 /* Build TCP header and checksum it. */ 846 /* Build TCP header and checksum it. */
1051 th = tcp_hdr(skb); 847 th = tcp_hdr(skb);
@@ -1054,9 +850,9 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1054 th->seq = htonl(tcb->seq); 850 th->seq = htonl(tcb->seq);
1055 th->ack_seq = htonl(tp->rcv_nxt); 851 th->ack_seq = htonl(tp->rcv_nxt);
1056 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) | 852 *(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
1057 tcb->tcp_flags); 853 tcb->flags);
1058 854
1059 if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { 855 if (unlikely(tcb->flags & TCPHDR_SYN)) {
1060 /* RFC1323: The window in SYN & SYN/ACK segments 856 /* RFC1323: The window in SYN & SYN/ACK segments
1061 * is never scaled. 857 * is never scaled.
1062 */ 858 */
@@ -1079,7 +875,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1079 } 875 }
1080 876
1081 tcp_options_write((__be32 *)(th + 1), tp, &opts); 877 tcp_options_write((__be32 *)(th + 1), tp, &opts);
1082 if (likely((tcb->tcp_flags & TCPHDR_SYN) == 0)) 878 if (likely((tcb->flags & TCPHDR_SYN) == 0))
1083 TCP_ECN_send(sk, skb, tcp_header_size); 879 TCP_ECN_send(sk, skb, tcp_header_size);
1084 880
1085#ifdef CONFIG_TCP_MD5SIG 881#ifdef CONFIG_TCP_MD5SIG
@@ -1093,11 +889,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
1093 889
1094 icsk->icsk_af_ops->send_check(sk, skb); 890 icsk->icsk_af_ops->send_check(sk, skb);
1095 891
1096 if (likely(tcb->tcp_flags & TCPHDR_ACK)) 892 if (likely(tcb->flags & TCPHDR_ACK))
1097 tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); 893 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
1098 894
1099 if (skb->len != tcp_header_size) 895 if (skb->len != tcp_header_size)
1100 tcp_event_data_sent(tp, sk); 896 tcp_event_data_sent(tp, skb, sk);
1101 897
1102 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) 898 if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
1103 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
@@ -1130,7 +926,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
1130} 926}
1131 927
1132/* Initialize TSO segments for a packet. */ 928/* Initialize TSO segments for a packet. */
1133static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, 929static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb,
1134 unsigned int mss_now) 930 unsigned int mss_now)
1135{ 931{
1136 if (skb->len <= mss_now || !sk_can_gso(sk) || 932 if (skb->len <= mss_now || !sk_can_gso(sk) ||
@@ -1151,7 +947,7 @@ static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
1151/* When a modification to fackets out becomes necessary, we need to check 947/* When a modification to fackets out becomes necessary, we need to check
1152 * skb is counted to fackets_out or not. 948 * skb is counted to fackets_out or not.
1153 */ 949 */
1154static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, 950static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
1155 int decr) 951 int decr)
1156{ 952{
1157 struct tcp_sock *tp = tcp_sk(sk); 953 struct tcp_sock *tp = tcp_sk(sk);
@@ -1166,7 +962,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb,
1166/* Pcount in the middle of the write queue got changed, we need to do various 962/* Pcount in the middle of the write queue got changed, we need to do various
1167 * tweaks to fix counters 963 * tweaks to fix counters
1168 */ 964 */
1169static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) 965static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
1170{ 966{
1171 struct tcp_sock *tp = tcp_sk(sk); 967 struct tcp_sock *tp = tcp_sk(sk);
1172 968
@@ -1236,9 +1032,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1236 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 1032 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1237 1033
1238 /* PSH and FIN should only be set in the second packet. */ 1034 /* PSH and FIN should only be set in the second packet. */
1239 flags = TCP_SKB_CB(skb)->tcp_flags; 1035 flags = TCP_SKB_CB(skb)->flags;
1240 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1036 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1241 TCP_SKB_CB(buff)->tcp_flags = flags; 1037 TCP_SKB_CB(buff)->flags = flags;
1242 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; 1038 TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked;
1243 1039
1244 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { 1040 if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -1295,27 +1091,17 @@ static void __pskb_trim_head(struct sk_buff *skb, int len)
1295{ 1091{
1296 int i, k, eat; 1092 int i, k, eat;
1297 1093
1298 eat = min_t(int, len, skb_headlen(skb));
1299 if (eat) {
1300 __skb_pull(skb, eat);
1301 skb->avail_size -= eat;
1302 len -= eat;
1303 if (!len)
1304 return;
1305 }
1306 eat = len; 1094 eat = len;
1307 k = 0; 1095 k = 0;
1308 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 1096 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1309 int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); 1097 if (skb_shinfo(skb)->frags[i].size <= eat) {
1310 1098 put_page(skb_shinfo(skb)->frags[i].page);
1311 if (size <= eat) { 1099 eat -= skb_shinfo(skb)->frags[i].size;
1312 skb_frag_unref(skb, i);
1313 eat -= size;
1314 } else { 1100 } else {
1315 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; 1101 skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i];
1316 if (eat) { 1102 if (eat) {
1317 skb_shinfo(skb)->frags[k].page_offset += eat; 1103 skb_shinfo(skb)->frags[k].page_offset += eat;
1318 skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); 1104 skb_shinfo(skb)->frags[k].size -= eat;
1319 eat = 0; 1105 eat = 0;
1320 } 1106 }
1321 k++; 1107 k++;
@@ -1334,7 +1120,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1334 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) 1120 if (skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1335 return -ENOMEM; 1121 return -ENOMEM;
1336 1122
1337 __pskb_trim_head(skb, len); 1123 /* If len == headlen, we avoid __skb_pull to preserve alignment. */
1124 if (unlikely(len < skb_headlen(skb)))
1125 __skb_pull(skb, len);
1126 else
1127 __pskb_trim_head(skb, len - skb_headlen(skb));
1338 1128
1339 TCP_SKB_CB(skb)->seq += len; 1129 TCP_SKB_CB(skb)->seq += len;
1340 skb->ip_summed = CHECKSUM_PARTIAL; 1130 skb->ip_summed = CHECKSUM_PARTIAL;
@@ -1354,8 +1144,8 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
1354/* Calculate MSS. Not accounting for SACKs here. */ 1144/* Calculate MSS. Not accounting for SACKs here. */
1355int tcp_mtu_to_mss(struct sock *sk, int pmtu) 1145int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1356{ 1146{
1357 const struct tcp_sock *tp = tcp_sk(sk); 1147 struct tcp_sock *tp = tcp_sk(sk);
1358 const struct inet_connection_sock *icsk = inet_csk(sk); 1148 struct inet_connection_sock *icsk = inet_csk(sk);
1359 int mss_now; 1149 int mss_now;
1360 1150
1361 /* Calculate base mss without TCP options: 1151 /* Calculate base mss without TCP options:
@@ -1363,14 +1153,6 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1363 */ 1153 */
1364 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr); 1154 mss_now = pmtu - icsk->icsk_af_ops->net_header_len - sizeof(struct tcphdr);
1365 1155
1366 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1367 if (icsk->icsk_af_ops->net_frag_header_len) {
1368 const struct dst_entry *dst = __sk_dst_get(sk);
1369
1370 if (dst && dst_allfrag(dst))
1371 mss_now -= icsk->icsk_af_ops->net_frag_header_len;
1372 }
1373
1374 /* Clamp it (mss_clamp does not include tcp options) */ 1156 /* Clamp it (mss_clamp does not include tcp options) */
1375 if (mss_now > tp->rx_opt.mss_clamp) 1157 if (mss_now > tp->rx_opt.mss_clamp)
1376 mss_now = tp->rx_opt.mss_clamp; 1158 mss_now = tp->rx_opt.mss_clamp;
@@ -1391,8 +1173,8 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu)
1391/* Inverse of above */ 1173/* Inverse of above */
1392int tcp_mss_to_mtu(struct sock *sk, int mss) 1174int tcp_mss_to_mtu(struct sock *sk, int mss)
1393{ 1175{
1394 const struct tcp_sock *tp = tcp_sk(sk); 1176 struct tcp_sock *tp = tcp_sk(sk);
1395 const struct inet_connection_sock *icsk = inet_csk(sk); 1177 struct inet_connection_sock *icsk = inet_csk(sk);
1396 int mtu; 1178 int mtu;
1397 1179
1398 mtu = mss + 1180 mtu = mss +
@@ -1400,13 +1182,6 @@ int tcp_mss_to_mtu(struct sock *sk, int mss)
1400 icsk->icsk_ext_hdr_len + 1182 icsk->icsk_ext_hdr_len +
1401 icsk->icsk_af_ops->net_header_len; 1183 icsk->icsk_af_ops->net_header_len;
1402 1184
1403 /* IPv6 adds a frag_hdr in case RTAX_FEATURE_ALLFRAG is set */
1404 if (icsk->icsk_af_ops->net_frag_header_len) {
1405 const struct dst_entry *dst = __sk_dst_get(sk);
1406
1407 if (dst && dst_allfrag(dst))
1408 mtu += icsk->icsk_af_ops->net_frag_header_len;
1409 }
1410 return mtu; 1185 return mtu;
1411} 1186}
1412 1187
@@ -1473,10 +1248,10 @@ EXPORT_SYMBOL(tcp_sync_mss);
1473 */ 1248 */
1474unsigned int tcp_current_mss(struct sock *sk) 1249unsigned int tcp_current_mss(struct sock *sk)
1475{ 1250{
1476 const struct tcp_sock *tp = tcp_sk(sk); 1251 struct tcp_sock *tp = tcp_sk(sk);
1477 const struct dst_entry *dst = __sk_dst_get(sk); 1252 struct dst_entry *dst = __sk_dst_get(sk);
1478 u32 mss_now; 1253 u32 mss_now;
1479 unsigned int header_len; 1254 unsigned header_len;
1480 struct tcp_out_options opts; 1255 struct tcp_out_options opts;
1481 struct tcp_md5sig_key *md5; 1256 struct tcp_md5sig_key *md5;
1482 1257
@@ -1534,22 +1309,22 @@ static void tcp_cwnd_validate(struct sock *sk)
1534 * modulo only when the receiver window alone is the limiting factor or 1309 * modulo only when the receiver window alone is the limiting factor or
1535 * when we would be allowed to send the split-due-to-Nagle skb fully. 1310 * when we would be allowed to send the split-due-to-Nagle skb fully.
1536 */ 1311 */
1537static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, 1312static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
1538 unsigned int mss_now, unsigned int max_segs) 1313 unsigned int mss_now, unsigned int cwnd)
1539{ 1314{
1540 const struct tcp_sock *tp = tcp_sk(sk); 1315 struct tcp_sock *tp = tcp_sk(sk);
1541 u32 needed, window, max_len; 1316 u32 needed, window, cwnd_len;
1542 1317
1543 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 1318 window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
1544 max_len = mss_now * max_segs; 1319 cwnd_len = mss_now * cwnd;
1545 1320
1546 if (likely(max_len <= window && skb != tcp_write_queue_tail(sk))) 1321 if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
1547 return max_len; 1322 return cwnd_len;
1548 1323
1549 needed = min(skb->len, window); 1324 needed = min(skb->len, window);
1550 1325
1551 if (max_len <= needed) 1326 if (cwnd_len <= needed)
1552 return max_len; 1327 return cwnd_len;
1553 1328
1554 return needed - needed % mss_now; 1329 return needed - needed % mss_now;
1555} 1330}
@@ -1557,14 +1332,13 @@ static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_b
1557/* Can at least one segment of SKB be sent right now, according to the 1332/* Can at least one segment of SKB be sent right now, according to the
1558 * congestion window rules? If so, return how many segments are allowed. 1333 * congestion window rules? If so, return how many segments are allowed.
1559 */ 1334 */
1560static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, 1335static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1561 const struct sk_buff *skb) 1336 struct sk_buff *skb)
1562{ 1337{
1563 u32 in_flight, cwnd; 1338 u32 in_flight, cwnd;
1564 1339
1565 /* Don't be strict about the congestion window for the final FIN. */ 1340 /* Don't be strict about the congestion window for the final FIN. */
1566 if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && 1341 if ((TCP_SKB_CB(skb)->flags & TCPHDR_FIN) && tcp_skb_pcount(skb) == 1)
1567 tcp_skb_pcount(skb) == 1)
1568 return 1; 1342 return 1;
1569 1343
1570 in_flight = tcp_packets_in_flight(tp); 1344 in_flight = tcp_packets_in_flight(tp);
@@ -1579,7 +1353,7 @@ static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp,
1579 * This must be invoked the first time we consider transmitting 1353 * This must be invoked the first time we consider transmitting
1580 * SKB onto the wire. 1354 * SKB onto the wire.
1581 */ 1355 */
1582static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, 1356static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1583 unsigned int mss_now) 1357 unsigned int mss_now)
1584{ 1358{
1585 int tso_segs = tcp_skb_pcount(skb); 1359 int tso_segs = tcp_skb_pcount(skb);
@@ -1592,33 +1366,33 @@ static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb,
1592} 1366}
1593 1367
1594/* Minshall's variant of the Nagle send check. */ 1368/* Minshall's variant of the Nagle send check. */
1595static inline bool tcp_minshall_check(const struct tcp_sock *tp) 1369static inline int tcp_minshall_check(const struct tcp_sock *tp)
1596{ 1370{
1597 return after(tp->snd_sml, tp->snd_una) && 1371 return after(tp->snd_sml, tp->snd_una) &&
1598 !after(tp->snd_sml, tp->snd_nxt); 1372 !after(tp->snd_sml, tp->snd_nxt);
1599} 1373}
1600 1374
1601/* Return false, if packet can be sent now without violation Nagle's rules: 1375/* Return 0, if packet can be sent now without violation Nagle's rules:
1602 * 1. It is full sized. 1376 * 1. It is full sized.
1603 * 2. Or it contains FIN. (already checked by caller) 1377 * 2. Or it contains FIN. (already checked by caller)
1604 * 3. Or TCP_CORK is not set, and TCP_NODELAY is set. 1378 * 3. Or TCP_NODELAY was set.
1605 * 4. Or TCP_CORK is not set, and all sent packets are ACKed. 1379 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1606 * With Minshall's modification: all sent small packets are ACKed. 1380 * With Minshall's modification: all sent small packets are ACKed.
1607 */ 1381 */
1608static inline bool tcp_nagle_check(const struct tcp_sock *tp, 1382static inline int tcp_nagle_check(const struct tcp_sock *tp,
1609 const struct sk_buff *skb, 1383 const struct sk_buff *skb,
1610 unsigned int mss_now, int nonagle) 1384 unsigned mss_now, int nonagle)
1611{ 1385{
1612 return skb->len < mss_now && 1386 return skb->len < mss_now &&
1613 ((nonagle & TCP_NAGLE_CORK) || 1387 ((nonagle & TCP_NAGLE_CORK) ||
1614 (!nonagle && tp->packets_out && tcp_minshall_check(tp))); 1388 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1615} 1389}
1616 1390
1617/* Return true if the Nagle test allows this packet to be 1391/* Return non-zero if the Nagle test allows this packet to be
1618 * sent now. 1392 * sent now.
1619 */ 1393 */
1620static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, 1394static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
1621 unsigned int cur_mss, int nonagle) 1395 unsigned int cur_mss, int nonagle)
1622{ 1396{
1623 /* Nagle rule does not apply to frames, which sit in the middle of the 1397 /* Nagle rule does not apply to frames, which sit in the middle of the
1624 * write_queue (they have no chances to get new data). 1398 * write_queue (they have no chances to get new data).
@@ -1627,25 +1401,24 @@ static inline bool tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buf
1627 * argument based upon the location of SKB in the send queue. 1401 * argument based upon the location of SKB in the send queue.
1628 */ 1402 */
1629 if (nonagle & TCP_NAGLE_PUSH) 1403 if (nonagle & TCP_NAGLE_PUSH)
1630 return true; 1404 return 1;
1631 1405
1632 /* Don't use the nagle rule for urgent data (or for the final FIN). 1406 /* Don't use the nagle rule for urgent data (or for the final FIN).
1633 * Nagle can be ignored during F-RTO too (see RFC4138). 1407 * Nagle can be ignored during F-RTO too (see RFC4138).
1634 */ 1408 */
1635 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || 1409 if (tcp_urg_mode(tp) || (tp->frto_counter == 2) ||
1636 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) 1410 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN))
1637 return true; 1411 return 1;
1638 1412
1639 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle)) 1413 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
1640 return true; 1414 return 1;
1641 1415
1642 return false; 1416 return 0;
1643} 1417}
1644 1418
1645/* Does at least the first segment of SKB fit into the send window? */ 1419/* Does at least the first segment of SKB fit into the send window? */
1646static bool tcp_snd_wnd_test(const struct tcp_sock *tp, 1420static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb,
1647 const struct sk_buff *skb, 1421 unsigned int cur_mss)
1648 unsigned int cur_mss)
1649{ 1422{
1650 u32 end_seq = TCP_SKB_CB(skb)->end_seq; 1423 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
1651 1424
@@ -1659,10 +1432,10 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
1659 * should be put on the wire right now. If so, it returns the number of 1432 * should be put on the wire right now. If so, it returns the number of
1660 * packets allowed by the congestion window. 1433 * packets allowed by the congestion window.
1661 */ 1434 */
1662static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, 1435static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
1663 unsigned int cur_mss, int nonagle) 1436 unsigned int cur_mss, int nonagle)
1664{ 1437{
1665 const struct tcp_sock *tp = tcp_sk(sk); 1438 struct tcp_sock *tp = tcp_sk(sk);
1666 unsigned int cwnd_quota; 1439 unsigned int cwnd_quota;
1667 1440
1668 tcp_init_tso_segs(sk, skb, cur_mss); 1441 tcp_init_tso_segs(sk, skb, cur_mss);
@@ -1678,9 +1451,9 @@ static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
1678} 1451}
1679 1452
1680/* Test if sending is allowed right now. */ 1453/* Test if sending is allowed right now. */
1681bool tcp_may_send_now(struct sock *sk) 1454int tcp_may_send_now(struct sock *sk)
1682{ 1455{
1683 const struct tcp_sock *tp = tcp_sk(sk); 1456 struct tcp_sock *tp = tcp_sk(sk);
1684 struct sk_buff *skb = tcp_send_head(sk); 1457 struct sk_buff *skb = tcp_send_head(sk);
1685 1458
1686 return skb && 1459 return skb &&
@@ -1722,9 +1495,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1722 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq; 1495 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
1723 1496
1724 /* PSH and FIN should only be set in the second packet. */ 1497 /* PSH and FIN should only be set in the second packet. */
1725 flags = TCP_SKB_CB(skb)->tcp_flags; 1498 flags = TCP_SKB_CB(skb)->flags;
1726 TCP_SKB_CB(skb)->tcp_flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH); 1499 TCP_SKB_CB(skb)->flags = flags & ~(TCPHDR_FIN | TCPHDR_PSH);
1727 TCP_SKB_CB(buff)->tcp_flags = flags; 1500 TCP_SKB_CB(buff)->flags = flags;
1728 1501
1729 /* This packet was never sent out yet, so no SACK bits. */ 1502 /* This packet was never sent out yet, so no SACK bits. */
1730 TCP_SKB_CB(buff)->sacked = 0; 1503 TCP_SKB_CB(buff)->sacked = 0;
@@ -1748,14 +1521,14 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1748 * 1521 *
1749 * This algorithm is from John Heffner. 1522 * This algorithm is from John Heffner.
1750 */ 1523 */
1751static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) 1524static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1752{ 1525{
1753 struct tcp_sock *tp = tcp_sk(sk); 1526 struct tcp_sock *tp = tcp_sk(sk);
1754 const struct inet_connection_sock *icsk = inet_csk(sk); 1527 const struct inet_connection_sock *icsk = inet_csk(sk);
1755 u32 send_win, cong_win, limit, in_flight; 1528 u32 send_win, cong_win, limit, in_flight;
1756 int win_divisor; 1529 int win_divisor;
1757 1530
1758 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1531 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1759 goto send_now; 1532 goto send_now;
1760 1533
1761 if (icsk->icsk_ca_state != TCP_CA_Open) 1534 if (icsk->icsk_ca_state != TCP_CA_Open)
@@ -1778,8 +1551,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1778 limit = min(send_win, cong_win); 1551 limit = min(send_win, cong_win);
1779 1552
1780 /* If a full-sized TSO skb can be sent, do it. */ 1553 /* If a full-sized TSO skb can be sent, do it. */
1781 if (limit >= min_t(unsigned int, sk->sk_gso_max_size, 1554 if (limit >= sk->sk_gso_max_size)
1782 sk->sk_gso_max_segs * tp->mss_cache))
1783 goto send_now; 1555 goto send_now;
1784 1556
1785 /* Middle in queue won't get any more data, full sendable already? */ 1557 /* Middle in queue won't get any more data, full sendable already? */
@@ -1802,18 +1574,18 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1802 * frame, so if we have space for more than 3 frames 1574 * frame, so if we have space for more than 3 frames
1803 * then send now. 1575 * then send now.
1804 */ 1576 */
1805 if (limit > tcp_max_tso_deferred_mss(tp) * tp->mss_cache) 1577 if (limit > tcp_max_burst(tp) * tp->mss_cache)
1806 goto send_now; 1578 goto send_now;
1807 } 1579 }
1808 1580
1809 /* Ok, it looks like it is advisable to defer. */ 1581 /* Ok, it looks like it is advisable to defer. */
1810 tp->tso_deferred = 1 | (jiffies << 1); 1582 tp->tso_deferred = 1 | (jiffies << 1);
1811 1583
1812 return true; 1584 return 1;
1813 1585
1814send_now: 1586send_now:
1815 tp->tso_deferred = 0; 1587 tp->tso_deferred = 0;
1816 return false; 1588 return 0;
1817} 1589}
1818 1590
1819/* Create a new MTU probe if we are ready. 1591/* Create a new MTU probe if we are ready.
@@ -1883,7 +1655,7 @@ static int tcp_mtu_probe(struct sock *sk)
1883 1655
1884 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; 1656 TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq;
1885 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; 1657 TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size;
1886 TCP_SKB_CB(nskb)->tcp_flags = TCPHDR_ACK; 1658 TCP_SKB_CB(nskb)->flags = TCPHDR_ACK;
1887 TCP_SKB_CB(nskb)->sacked = 0; 1659 TCP_SKB_CB(nskb)->sacked = 0;
1888 nskb->csum = 0; 1660 nskb->csum = 0;
1889 nskb->ip_summed = skb->ip_summed; 1661 nskb->ip_summed = skb->ip_summed;
@@ -1903,11 +1675,11 @@ static int tcp_mtu_probe(struct sock *sk)
1903 if (skb->len <= copy) { 1675 if (skb->len <= copy) {
1904 /* We've eaten all the data from this skb. 1676 /* We've eaten all the data from this skb.
1905 * Throw it away. */ 1677 * Throw it away. */
1906 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; 1678 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags;
1907 tcp_unlink_write_queue(skb, sk); 1679 tcp_unlink_write_queue(skb, sk);
1908 sk_wmem_free_skb(sk, skb); 1680 sk_wmem_free_skb(sk, skb);
1909 } else { 1681 } else {
1910 TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags & 1682 TCP_SKB_CB(nskb)->flags |= TCP_SKB_CB(skb)->flags &
1911 ~(TCPHDR_FIN|TCPHDR_PSH); 1683 ~(TCPHDR_FIN|TCPHDR_PSH);
1912 if (!skb_shinfo(skb)->nr_frags) { 1684 if (!skb_shinfo(skb)->nr_frags) {
1913 skb_pull(skb, copy); 1685 skb_pull(skb, copy);
@@ -1955,11 +1727,11 @@ static int tcp_mtu_probe(struct sock *sk)
1955 * snd_up-64k-mss .. snd_up cannot be large. However, taking into 1727 * snd_up-64k-mss .. snd_up cannot be large. However, taking into
1956 * account rare use of URG, this is not a big flaw. 1728 * account rare use of URG, this is not a big flaw.
1957 * 1729 *
1958 * Returns true, if no segments are in flight and we have queued segments, 1730 * Returns 1, if no segments are in flight and we have queued segments, but
1959 * but cannot send anything now because of SWS or another problem. 1731 * cannot send anything now because of SWS or another problem.
1960 */ 1732 */
1961static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 1733static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1962 int push_one, gfp_t gfp) 1734 int push_one, gfp_t gfp)
1963{ 1735{
1964 struct tcp_sock *tp = tcp_sk(sk); 1736 struct tcp_sock *tp = tcp_sk(sk);
1965 struct sk_buff *skb; 1737 struct sk_buff *skb;
@@ -1973,7 +1745,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1973 /* Do MTU probing. */ 1745 /* Do MTU probing. */
1974 result = tcp_mtu_probe(sk); 1746 result = tcp_mtu_probe(sk);
1975 if (!result) { 1747 if (!result) {
1976 return false; 1748 return 0;
1977 } else if (result > 0) { 1749 } else if (result > 0) {
1978 sent_pkts = 1; 1750 sent_pkts = 1;
1979 } 1751 }
@@ -1982,13 +1754,9 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1982 while ((skb = tcp_send_head(sk))) { 1754 while ((skb = tcp_send_head(sk))) {
1983 unsigned int limit; 1755 unsigned int limit;
1984 1756
1985
1986 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1757 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1987 BUG_ON(!tso_segs); 1758 BUG_ON(!tso_segs);
1988 1759
1989 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE)
1990 goto repair; /* Skip network transmission */
1991
1992 cwnd_quota = tcp_cwnd_test(tp, skb); 1760 cwnd_quota = tcp_cwnd_test(tp, skb);
1993 if (!cwnd_quota) 1761 if (!cwnd_quota)
1994 break; 1762 break;
@@ -2006,19 +1774,10 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2006 break; 1774 break;
2007 } 1775 }
2008 1776
2009 /* TSQ : sk_wmem_alloc accounts skb truesize,
2010 * including skb overhead. But thats OK.
2011 */
2012 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
2013 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
2014 break;
2015 }
2016 limit = mss_now; 1777 limit = mss_now;
2017 if (tso_segs > 1 && !tcp_urg_mode(tp)) 1778 if (tso_segs > 1 && !tcp_urg_mode(tp))
2018 limit = tcp_mss_split_point(sk, skb, mss_now, 1779 limit = tcp_mss_split_point(sk, skb, mss_now,
2019 min_t(unsigned int, 1780 cwnd_quota);
2020 cwnd_quota,
2021 sk->sk_gso_max_segs));
2022 1781
2023 if (skb->len > limit && 1782 if (skb->len > limit &&
2024 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) 1783 unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
@@ -2029,24 +1788,21 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
2029 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) 1788 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
2030 break; 1789 break;
2031 1790
2032repair:
2033 /* Advance the send_head. This one is sent out. 1791 /* Advance the send_head. This one is sent out.
2034 * This call will increment packets_out. 1792 * This call will increment packets_out.
2035 */ 1793 */
2036 tcp_event_new_data_sent(sk, skb); 1794 tcp_event_new_data_sent(sk, skb);
2037 1795
2038 tcp_minshall_update(tp, mss_now, skb); 1796 tcp_minshall_update(tp, mss_now, skb);
2039 sent_pkts += tcp_skb_pcount(skb); 1797 sent_pkts++;
2040 1798
2041 if (push_one) 1799 if (push_one)
2042 break; 1800 break;
2043 } 1801 }
2044 1802
2045 if (likely(sent_pkts)) { 1803 if (likely(sent_pkts)) {
2046 if (tcp_in_cwnd_reduction(sk))
2047 tp->prr_out += sent_pkts;
2048 tcp_cwnd_validate(sk); 1804 tcp_cwnd_validate(sk);
2049 return false; 1805 return 0;
2050 } 1806 }
2051 return !tp->packets_out && tcp_send_head(sk); 1807 return !tp->packets_out && tcp_send_head(sk);
2052} 1808}
@@ -2065,8 +1821,7 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
2065 if (unlikely(sk->sk_state == TCP_CLOSE)) 1821 if (unlikely(sk->sk_state == TCP_CLOSE))
2066 return; 1822 return;
2067 1823
2068 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, 1824 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
2069 sk_gfp_atomic(sk, GFP_ATOMIC)))
2070 tcp_check_probe_timer(sk); 1825 tcp_check_probe_timer(sk);
2071} 1826}
2072 1827
@@ -2155,7 +1910,7 @@ u32 __tcp_select_window(struct sock *sk)
2155 if (free_space < (full_space >> 1)) { 1910 if (free_space < (full_space >> 1)) {
2156 icsk->icsk_ack.quick = 0; 1911 icsk->icsk_ack.quick = 0;
2157 1912
2158 if (sk_under_memory_pressure(sk)) 1913 if (tcp_memory_pressure)
2159 tp->rcv_ssthresh = min(tp->rcv_ssthresh, 1914 tp->rcv_ssthresh = min(tp->rcv_ssthresh,
2160 4U * tp->advmss); 1915 4U * tp->advmss);
2161 1916
@@ -2228,7 +1983,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2228 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1983 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
2229 1984
2230 /* Merge over control information. This moves PSH/FIN etc. over */ 1985 /* Merge over control information. This moves PSH/FIN etc. over */
2231 TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(next_skb)->tcp_flags; 1986 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
2232 1987
2233 /* All done, get rid of second SKB and account for it so 1988 /* All done, get rid of second SKB and account for it so
2234 * packet counting does not break. 1989 * packet counting does not break.
@@ -2246,22 +2001,22 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
2246} 2001}
2247 2002
2248/* Check if coalescing SKBs is legal. */ 2003/* Check if coalescing SKBs is legal. */
2249static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) 2004static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
2250{ 2005{
2251 if (tcp_skb_pcount(skb) > 1) 2006 if (tcp_skb_pcount(skb) > 1)
2252 return false; 2007 return 0;
2253 /* TODO: SACK collapsing could be used to remove this condition */ 2008 /* TODO: SACK collapsing could be used to remove this condition */
2254 if (skb_shinfo(skb)->nr_frags != 0) 2009 if (skb_shinfo(skb)->nr_frags != 0)
2255 return false; 2010 return 0;
2256 if (skb_cloned(skb)) 2011 if (skb_cloned(skb))
2257 return false; 2012 return 0;
2258 if (skb == tcp_send_head(sk)) 2013 if (skb == tcp_send_head(sk))
2259 return false; 2014 return 0;
2260 /* Some heurestics for collapsing over SACK'd could be invented */ 2015 /* Some heurestics for collapsing over SACK'd could be invented */
2261 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) 2016 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
2262 return false; 2017 return 0;
2263 2018
2264 return true; 2019 return 1;
2265} 2020}
2266 2021
2267/* Collapse packets in the retransmit queue to make to create 2022/* Collapse packets in the retransmit queue to make to create
@@ -2272,11 +2027,11 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2272{ 2027{
2273 struct tcp_sock *tp = tcp_sk(sk); 2028 struct tcp_sock *tp = tcp_sk(sk);
2274 struct sk_buff *skb = to, *tmp; 2029 struct sk_buff *skb = to, *tmp;
2275 bool first = true; 2030 int first = 1;
2276 2031
2277 if (!sysctl_tcp_retrans_collapse) 2032 if (!sysctl_tcp_retrans_collapse)
2278 return; 2033 return;
2279 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) 2034 if (TCP_SKB_CB(skb)->flags & TCPHDR_SYN)
2280 return; 2035 return;
2281 2036
2282 tcp_for_write_queue_from_safe(skb, tmp, sk) { 2037 tcp_for_write_queue_from_safe(skb, tmp, sk) {
@@ -2286,7 +2041,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2286 space -= skb->len; 2041 space -= skb->len;
2287 2042
2288 if (first) { 2043 if (first) {
2289 first = false; 2044 first = 0;
2290 continue; 2045 continue;
2291 } 2046 }
2292 2047
@@ -2295,7 +2050,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2295 /* Punt if not enough space exists in the first SKB for 2050 /* Punt if not enough space exists in the first SKB for
2296 * the data in the second 2051 * the data in the second
2297 */ 2052 */
2298 if (skb->len > skb_availroom(to)) 2053 if (skb->len > skb_tailroom(to))
2299 break; 2054 break;
2300 2055
2301 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) 2056 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
@@ -2309,11 +2064,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
2309 * state updates are done by the caller. Returns non-zero if an 2064 * state updates are done by the caller. Returns non-zero if an
2310 * error occurred which prevented the send. 2065 * error occurred which prevented the send.
2311 */ 2066 */
2312int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) 2067int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2313{ 2068{
2314 struct tcp_sock *tp = tcp_sk(sk); 2069 struct tcp_sock *tp = tcp_sk(sk);
2315 struct inet_connection_sock *icsk = inet_csk(sk); 2070 struct inet_connection_sock *icsk = inet_csk(sk);
2316 unsigned int cur_mss; 2071 unsigned int cur_mss;
2072 int err;
2317 2073
2318 /* Inconslusive MTU probe */ 2074 /* Inconslusive MTU probe */
2319 if (icsk->icsk_mtup.probe_size) { 2075 if (icsk->icsk_mtup.probe_size) {
@@ -2367,12 +2123,12 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2367 * since it is cheap to do so and saves bytes on the network. 2123 * since it is cheap to do so and saves bytes on the network.
2368 */ 2124 */
2369 if (skb->len > 0 && 2125 if (skb->len > 0 &&
2370 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && 2126 (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) &&
2371 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { 2127 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2372 if (!pskb_trim(skb, 0)) { 2128 if (!pskb_trim(skb, 0)) {
2373 /* Reuse, even though it does some unnecessary work */ 2129 /* Reuse, even though it does some unnecessary work */
2374 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, 2130 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2375 TCP_SKB_CB(skb)->tcp_flags); 2131 TCP_SKB_CB(skb)->flags);
2376 skb->ip_summed = CHECKSUM_NONE; 2132 skb->ip_summed = CHECKSUM_NONE;
2377 } 2133 }
2378 } 2134 }
@@ -2382,21 +2138,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2382 */ 2138 */
2383 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2139 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2384 2140
2385 /* make sure skb->data is aligned on arches that require it */ 2141 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2386 if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
2387 struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
2388 GFP_ATOMIC);
2389 return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
2390 -ENOBUFS;
2391 } else {
2392 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2393 }
2394}
2395
2396int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2397{
2398 struct tcp_sock *tp = tcp_sk(sk);
2399 int err = __tcp_retransmit_skb(sk, skb);
2400 2142
2401 if (err == 0) { 2143 if (err == 0) {
2402 /* Update global TCP statistics. */ 2144 /* Update global TCP statistics. */
@@ -2406,7 +2148,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2406 2148
2407#if FASTRETRANS_DEBUG > 0 2149#if FASTRETRANS_DEBUG > 0
2408 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { 2150 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
2409 net_dbg_ratelimited("retrans_out leaked\n"); 2151 if (net_ratelimit())
2152 printk(KERN_DEBUG "retrans_out leaked.\n");
2410 } 2153 }
2411#endif 2154#endif
2412 if (!tp->retrans_out) 2155 if (!tp->retrans_out)
@@ -2431,18 +2174,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2431/* Check if we forward retransmits are possible in the current 2174/* Check if we forward retransmits are possible in the current
2432 * window/congestion state. 2175 * window/congestion state.
2433 */ 2176 */
2434static bool tcp_can_forward_retransmit(struct sock *sk) 2177static int tcp_can_forward_retransmit(struct sock *sk)
2435{ 2178{
2436 const struct inet_connection_sock *icsk = inet_csk(sk); 2179 const struct inet_connection_sock *icsk = inet_csk(sk);
2437 const struct tcp_sock *tp = tcp_sk(sk); 2180 struct tcp_sock *tp = tcp_sk(sk);
2438 2181
2439 /* Forward retransmissions are possible only during Recovery. */ 2182 /* Forward retransmissions are possible only during Recovery. */
2440 if (icsk->icsk_ca_state != TCP_CA_Recovery) 2183 if (icsk->icsk_ca_state != TCP_CA_Recovery)
2441 return false; 2184 return 0;
2442 2185
2443 /* No forward retransmissions in Reno are possible. */ 2186 /* No forward retransmissions in Reno are possible. */
2444 if (tcp_is_reno(tp)) 2187 if (tcp_is_reno(tp))
2445 return false; 2188 return 0;
2446 2189
2447 /* Yeah, we have to make difficult choice between forward transmission 2190 /* Yeah, we have to make difficult choice between forward transmission
2448 * and retransmission... Both ways have their merits... 2191 * and retransmission... Both ways have their merits...
@@ -2453,9 +2196,9 @@ static bool tcp_can_forward_retransmit(struct sock *sk)
2453 */ 2196 */
2454 2197
2455 if (tcp_may_send_now(sk)) 2198 if (tcp_may_send_now(sk))
2456 return false; 2199 return 0;
2457 2200
2458 return true; 2201 return 1;
2459} 2202}
2460 2203
2461/* This gets called after a retransmit timeout, and the initially 2204/* This gets called after a retransmit timeout, and the initially
@@ -2545,15 +2288,10 @@ begin_fwd:
2545 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) 2288 if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
2546 continue; 2289 continue;
2547 2290
2548 if (tcp_retransmit_skb(sk, skb)) { 2291 if (tcp_retransmit_skb(sk, skb))
2549 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL);
2550 return; 2292 return;
2551 }
2552 NET_INC_STATS_BH(sock_net(sk), mib_idx); 2293 NET_INC_STATS_BH(sock_net(sk), mib_idx);
2553 2294
2554 if (tcp_in_cwnd_reduction(sk))
2555 tp->prr_out += tcp_skb_pcount(skb);
2556
2557 if (skb == tcp_write_queue_head(sk)) 2295 if (skb == tcp_write_queue_head(sk))
2558 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 2296 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
2559 inet_csk(sk)->icsk_rto, 2297 inet_csk(sk)->icsk_rto,
@@ -2577,7 +2315,7 @@ void tcp_send_fin(struct sock *sk)
2577 mss_now = tcp_current_mss(sk); 2315 mss_now = tcp_current_mss(sk);
2578 2316
2579 if (tcp_send_head(sk) != NULL) { 2317 if (tcp_send_head(sk) != NULL) {
2580 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; 2318 TCP_SKB_CB(skb)->flags |= TCPHDR_FIN;
2581 TCP_SKB_CB(skb)->end_seq++; 2319 TCP_SKB_CB(skb)->end_seq++;
2582 tp->write_seq++; 2320 tp->write_seq++;
2583 } else { 2321 } else {
@@ -2639,11 +2377,11 @@ int tcp_send_synack(struct sock *sk)
2639 struct sk_buff *skb; 2377 struct sk_buff *skb;
2640 2378
2641 skb = tcp_write_queue_head(sk); 2379 skb = tcp_write_queue_head(sk);
2642 if (skb == NULL || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { 2380 if (skb == NULL || !(TCP_SKB_CB(skb)->flags & TCPHDR_SYN)) {
2643 pr_debug("%s: wrong queue state\n", __func__); 2381 printk(KERN_DEBUG "tcp_send_synack: wrong queue state\n");
2644 return -EFAULT; 2382 return -EFAULT;
2645 } 2383 }
2646 if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { 2384 if (!(TCP_SKB_CB(skb)->flags & TCPHDR_ACK)) {
2647 if (skb_cloned(skb)) { 2385 if (skb_cloned(skb)) {
2648 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); 2386 struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC);
2649 if (nskb == NULL) 2387 if (nskb == NULL)
@@ -2657,27 +2395,17 @@ int tcp_send_synack(struct sock *sk)
2657 skb = nskb; 2395 skb = nskb;
2658 } 2396 }
2659 2397
2660 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ACK; 2398 TCP_SKB_CB(skb)->flags |= TCPHDR_ACK;
2661 TCP_ECN_send_synack(tcp_sk(sk), skb); 2399 TCP_ECN_send_synack(tcp_sk(sk), skb);
2662 } 2400 }
2663 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2401 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2664 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2402 return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
2665} 2403}
2666 2404
2667/** 2405/* Prepare a SYN-ACK. */
2668 * tcp_make_synack - Prepare a SYN-ACK.
2669 * sk: listener socket
2670 * dst: dst entry attached to the SYNACK
2671 * req: request_sock pointer
2672 * rvp: request_values pointer
2673 *
2674 * Allocate one skb and build a SYNACK packet.
2675 * @dst is consumed : Caller should not use it again.
2676 */
2677struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, 2406struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2678 struct request_sock *req, 2407 struct request_sock *req,
2679 struct request_values *rvp, 2408 struct request_values *rvp)
2680 struct tcp_fastopen_cookie *foc)
2681{ 2409{
2682 struct tcp_out_options opts; 2410 struct tcp_out_options opts;
2683 struct tcp_extend_values *xvp = tcp_xv(rvp); 2411 struct tcp_extend_values *xvp = tcp_xv(rvp);
@@ -2693,16 +2421,14 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2693 2421
2694 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) 2422 if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired)
2695 s_data_desired = cvp->s_data_desired; 2423 s_data_desired = cvp->s_data_desired;
2696 skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, 2424 skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15 + s_data_desired, 1, GFP_ATOMIC);
2697 sk_gfp_atomic(sk, GFP_ATOMIC)); 2425 if (skb == NULL)
2698 if (unlikely(!skb)) {
2699 dst_release(dst);
2700 return NULL; 2426 return NULL;
2701 } 2427
2702 /* Reserve space for headers. */ 2428 /* Reserve space for headers. */
2703 skb_reserve(skb, MAX_TCP_HEADER); 2429 skb_reserve(skb, MAX_TCP_HEADER);
2704 2430
2705 skb_dst_set(skb, dst); 2431 skb_dst_set(skb, dst_clone(dst));
2706 2432
2707 mss = dst_metric_advmss(dst); 2433 mss = dst_metric_advmss(dst);
2708 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2434 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
@@ -2737,7 +2463,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2737#endif 2463#endif
2738 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2464 TCP_SKB_CB(skb)->when = tcp_time_stamp;
2739 tcp_header_size = tcp_synack_options(sk, req, mss, 2465 tcp_header_size = tcp_synack_options(sk, req, mss,
2740 skb, &opts, &md5, xvp, foc) 2466 skb, &opts, &md5, xvp)
2741 + sizeof(*th); 2467 + sizeof(*th);
2742 2468
2743 skb_push(skb, tcp_header_size); 2469 skb_push(skb, tcp_header_size);
@@ -2791,8 +2517,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2791 } 2517 }
2792 2518
2793 th->seq = htonl(TCP_SKB_CB(skb)->seq); 2519 th->seq = htonl(TCP_SKB_CB(skb)->seq);
2794 /* XXX data is queued and acked as is. No buffer/window check */ 2520 th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1);
2795 th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
2796 2521
2797 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ 2522 /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
2798 th->window = htons(min(req->rcv_wnd, 65535U)); 2523 th->window = htons(min(req->rcv_wnd, 65535U));
@@ -2813,9 +2538,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2813EXPORT_SYMBOL(tcp_make_synack); 2538EXPORT_SYMBOL(tcp_make_synack);
2814 2539
2815/* Do all connect socket setups that can be done AF independent. */ 2540/* Do all connect socket setups that can be done AF independent. */
2816void tcp_connect_init(struct sock *sk) 2541static void tcp_connect_init(struct sock *sk)
2817{ 2542{
2818 const struct dst_entry *dst = __sk_dst_get(sk); 2543 struct dst_entry *dst = __sk_dst_get(sk);
2819 struct tcp_sock *tp = tcp_sk(sk); 2544 struct tcp_sock *tp = tcp_sk(sk);
2820 __u8 rcv_wscale; 2545 __u8 rcv_wscale;
2821 2546
@@ -2868,121 +2593,15 @@ void tcp_connect_init(struct sock *sk)
2868 tp->snd_una = tp->write_seq; 2593 tp->snd_una = tp->write_seq;
2869 tp->snd_sml = tp->write_seq; 2594 tp->snd_sml = tp->write_seq;
2870 tp->snd_up = tp->write_seq; 2595 tp->snd_up = tp->write_seq;
2871 tp->snd_nxt = tp->write_seq; 2596 tp->rcv_nxt = 0;
2872 2597 tp->rcv_wup = 0;
2873 if (likely(!tp->repair)) 2598 tp->copied_seq = 0;
2874 tp->rcv_nxt = 0;
2875 tp->rcv_wup = tp->rcv_nxt;
2876 tp->copied_seq = tp->rcv_nxt;
2877 2599
2878 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; 2600 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2879 inet_csk(sk)->icsk_retransmits = 0; 2601 inet_csk(sk)->icsk_retransmits = 0;
2880 tcp_clear_retrans(tp); 2602 tcp_clear_retrans(tp);
2881} 2603}
2882 2604
2883static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
2884{
2885 struct tcp_sock *tp = tcp_sk(sk);
2886 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
2887
2888 tcb->end_seq += skb->len;
2889 skb_header_release(skb);
2890 __tcp_add_write_queue_tail(sk, skb);
2891 sk->sk_wmem_queued += skb->truesize;
2892 sk_mem_charge(sk, skb->truesize);
2893 tp->write_seq = tcb->end_seq;
2894 tp->packets_out += tcp_skb_pcount(skb);
2895}
2896
2897/* Build and send a SYN with data and (cached) Fast Open cookie. However,
2898 * queue a data-only packet after the regular SYN, such that regular SYNs
2899 * are retransmitted on timeouts. Also if the remote SYN-ACK acknowledges
2900 * only the SYN sequence, the data are retransmitted in the first ACK.
2901 * If cookie is not cached or other error occurs, falls back to send a
2902 * regular SYN with Fast Open cookie request option.
2903 */
2904static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
2905{
2906 struct tcp_sock *tp = tcp_sk(sk);
2907 struct tcp_fastopen_request *fo = tp->fastopen_req;
2908 int syn_loss = 0, space, i, err = 0, iovlen = fo->data->msg_iovlen;
2909 struct sk_buff *syn_data = NULL, *data;
2910 unsigned long last_syn_loss = 0;
2911
2912 tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
2913 tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
2914 &syn_loss, &last_syn_loss);
2915 /* Recurring FO SYN losses: revert to regular handshake temporarily */
2916 if (syn_loss > 1 &&
2917 time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
2918 fo->cookie.len = -1;
2919 goto fallback;
2920 }
2921
2922 if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
2923 fo->cookie.len = -1;
2924 else if (fo->cookie.len <= 0)
2925 goto fallback;
2926
2927 /* MSS for SYN-data is based on cached MSS and bounded by PMTU and
2928 * user-MSS. Reserve maximum option space for middleboxes that add
2929 * private TCP options. The cost is reduced data space in SYN :(
2930 */
2931 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->rx_opt.mss_clamp)
2932 tp->rx_opt.mss_clamp = tp->rx_opt.user_mss;
2933 space = tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
2934 MAX_TCP_OPTION_SPACE;
2935
2936 syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
2937 sk->sk_allocation);
2938 if (syn_data == NULL)
2939 goto fallback;
2940
2941 for (i = 0; i < iovlen && syn_data->len < space; ++i) {
2942 struct iovec *iov = &fo->data->msg_iov[i];
2943 unsigned char __user *from = iov->iov_base;
2944 int len = iov->iov_len;
2945
2946 if (syn_data->len + len > space)
2947 len = space - syn_data->len;
2948 else if (i + 1 == iovlen)
2949 /* No more data pending in inet_wait_for_connect() */
2950 fo->data = NULL;
2951
2952 if (skb_add_data(syn_data, from, len))
2953 goto fallback;
2954 }
2955
2956 /* Queue a data-only packet after the regular SYN for retransmission */
2957 data = pskb_copy(syn_data, sk->sk_allocation);
2958 if (data == NULL)
2959 goto fallback;
2960 TCP_SKB_CB(data)->seq++;
2961 TCP_SKB_CB(data)->tcp_flags &= ~TCPHDR_SYN;
2962 TCP_SKB_CB(data)->tcp_flags = (TCPHDR_ACK|TCPHDR_PSH);
2963 tcp_connect_queue_skb(sk, data);
2964 fo->copied = data->len;
2965
2966 if (tcp_transmit_skb(sk, syn_data, 0, sk->sk_allocation) == 0) {
2967 tp->syn_data = (fo->copied > 0);
2968 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
2969 goto done;
2970 }
2971 syn_data = NULL;
2972
2973fallback:
2974 /* Send a regular SYN with Fast Open cookie request option */
2975 if (fo->cookie.len > 0)
2976 fo->cookie.len = 0;
2977 err = tcp_transmit_skb(sk, syn, 1, sk->sk_allocation);
2978 if (err)
2979 tp->syn_fastopen = 0;
2980 kfree_skb(syn_data);
2981done:
2982 fo->cookie.len = -1; /* Exclude Fast Open option for SYN retries */
2983 return err;
2984}
2985
2986/* Build a SYN and send it off. */ 2605/* Build a SYN and send it off. */
2987int tcp_connect(struct sock *sk) 2606int tcp_connect(struct sock *sk)
2988{ 2607{
@@ -2992,11 +2611,6 @@ int tcp_connect(struct sock *sk)
2992 2611
2993 tcp_connect_init(sk); 2612 tcp_connect_init(sk);
2994 2613
2995 if (unlikely(tp->repair)) {
2996 tcp_finish_connect(sk, NULL);
2997 return 0;
2998 }
2999
3000 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); 2614 buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation);
3001 if (unlikely(buff == NULL)) 2615 if (unlikely(buff == NULL))
3002 return -ENOBUFS; 2616 return -ENOBUFS;
@@ -3004,14 +2618,19 @@ int tcp_connect(struct sock *sk)
3004 /* Reserve space for headers. */ 2618 /* Reserve space for headers. */
3005 skb_reserve(buff, MAX_TCP_HEADER); 2619 skb_reserve(buff, MAX_TCP_HEADER);
3006 2620
2621 tp->snd_nxt = tp->write_seq;
3007 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN); 2622 tcp_init_nondata_skb(buff, tp->write_seq++, TCPHDR_SYN);
3008 tp->retrans_stamp = TCP_SKB_CB(buff)->when = tcp_time_stamp;
3009 tcp_connect_queue_skb(sk, buff);
3010 TCP_ECN_send_syn(sk, buff); 2623 TCP_ECN_send_syn(sk, buff);
3011 2624
3012 /* Send off SYN; include data in Fast Open. */ 2625 /* Send it off. */
3013 err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : 2626 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3014 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2627 tp->retrans_stamp = TCP_SKB_CB(buff)->when;
2628 skb_header_release(buff);
2629 __tcp_add_write_queue_tail(sk, buff);
2630 sk->sk_wmem_queued += buff->truesize;
2631 sk_mem_charge(sk, buff->truesize);
2632 tp->packets_out += tcp_skb_pcount(buff);
2633 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
3015 if (err == -ECONNREFUSED) 2634 if (err == -ECONNREFUSED)
3016 return err; 2635 return err;
3017 2636
@@ -3098,7 +2717,7 @@ void tcp_send_ack(struct sock *sk)
3098 * tcp_transmit_skb() will set the ownership to this 2717 * tcp_transmit_skb() will set the ownership to this
3099 * sock. 2718 * sock.
3100 */ 2719 */
3101 buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); 2720 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3102 if (buff == NULL) { 2721 if (buff == NULL) {
3103 inet_csk_schedule_ack(sk); 2722 inet_csk_schedule_ack(sk);
3104 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; 2723 inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
@@ -3113,7 +2732,7 @@ void tcp_send_ack(struct sock *sk)
3113 2732
3114 /* Send it off, this clears delayed acks for us. */ 2733 /* Send it off, this clears delayed acks for us. */
3115 TCP_SKB_CB(buff)->when = tcp_time_stamp; 2734 TCP_SKB_CB(buff)->when = tcp_time_stamp;
3116 tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); 2735 tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC);
3117} 2736}
3118 2737
3119/* This routine sends a packet with an out of date sequence 2738/* This routine sends a packet with an out of date sequence
@@ -3133,7 +2752,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3133 struct sk_buff *skb; 2752 struct sk_buff *skb;
3134 2753
3135 /* We don't queue it, tcp_transmit_skb() sets ownership. */ 2754 /* We don't queue it, tcp_transmit_skb() sets ownership. */
3136 skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); 2755 skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
3137 if (skb == NULL) 2756 if (skb == NULL)
3138 return -1; 2757 return -1;
3139 2758
@@ -3148,15 +2767,6 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
3148 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC); 2767 return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
3149} 2768}
3150 2769
3151void tcp_send_window_probe(struct sock *sk)
3152{
3153 if (sk->sk_state == TCP_ESTABLISHED) {
3154 tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
3155 tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
3156 tcp_xmit_probe_skb(sk, 0);
3157 }
3158}
3159
3160/* Initiate keepalive or window probe from timer. */ 2770/* Initiate keepalive or window probe from timer. */
3161int tcp_write_wakeup(struct sock *sk) 2771int tcp_write_wakeup(struct sock *sk)
3162{ 2772{
@@ -3182,13 +2792,13 @@ int tcp_write_wakeup(struct sock *sk)
3182 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq || 2792 if (seg_size < TCP_SKB_CB(skb)->end_seq - TCP_SKB_CB(skb)->seq ||
3183 skb->len > mss) { 2793 skb->len > mss) {
3184 seg_size = min(seg_size, mss); 2794 seg_size = min(seg_size, mss);
3185 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 2795 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
3186 if (tcp_fragment(sk, skb, seg_size, mss)) 2796 if (tcp_fragment(sk, skb, seg_size, mss))
3187 return -1; 2797 return -1;
3188 } else if (!tcp_skb_pcount(skb)) 2798 } else if (!tcp_skb_pcount(skb))
3189 tcp_set_skb_tso_segs(sk, skb, mss); 2799 tcp_set_skb_tso_segs(sk, skb, mss);
3190 2800
3191 TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; 2801 TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
3192 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2802 TCP_SKB_CB(skb)->when = tcp_time_stamp;
3193 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); 2803 err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
3194 if (!err) 2804 if (!err)