aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/tcp.h193
-rw-r--r--net/ipv4/tcp_cong.c28
-rw-r--r--net/ipv4/tcp_input.c82
-rw-r--r--net/ipv4/tcp_ipv4.c9
-rw-r--r--net/ipv4/tcp_output.c87
5 files changed, 198 insertions, 201 deletions
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 369930497401..77f21c65bbca 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -445,34 +445,16 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
445extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, 445extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
446 sk_read_actor_t recv_actor); 446 sk_read_actor_t recv_actor);
447 447
448/* Initialize RCV_MSS value. 448extern void tcp_initialize_rcv_mss(struct sock *sk);
449 * RCV_MSS is an our guess about MSS used by the peer.
450 * We haven't any direct information about the MSS.
451 * It's better to underestimate the RCV_MSS rather than overestimate.
452 * Overestimations make us ACKing less frequently than needed.
453 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
454 */
455 449
456static inline void tcp_initialize_rcv_mss(struct sock *sk) 450static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
457{
458 struct tcp_sock *tp = tcp_sk(sk);
459 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
460
461 hint = min(hint, tp->rcv_wnd/2);
462 hint = min(hint, TCP_MIN_RCVMSS);
463 hint = max(hint, TCP_MIN_MSS);
464
465 inet_csk(sk)->icsk_ack.rcv_mss = hint;
466}
467
468static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
469{ 451{
470 tp->pred_flags = htonl((tp->tcp_header_len << 26) | 452 tp->pred_flags = htonl((tp->tcp_header_len << 26) |
471 ntohl(TCP_FLAG_ACK) | 453 ntohl(TCP_FLAG_ACK) |
472 snd_wnd); 454 snd_wnd);
473} 455}
474 456
475static __inline__ void tcp_fast_path_on(struct tcp_sock *tp) 457static inline void tcp_fast_path_on(struct tcp_sock *tp)
476{ 458{
477 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); 459 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
478} 460}
@@ -490,7 +472,7 @@ static inline void tcp_fast_path_check(struct sock *sk, struct tcp_sock *tp)
490 * Rcv_nxt can be after the window if our peer push more data 472 * Rcv_nxt can be after the window if our peer push more data
491 * than the offered window. 473 * than the offered window.
492 */ 474 */
493static __inline__ u32 tcp_receive_window(const struct tcp_sock *tp) 475static inline u32 tcp_receive_window(const struct tcp_sock *tp)
494{ 476{
495 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; 477 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt;
496 478
@@ -662,6 +644,7 @@ extern void tcp_cleanup_congestion_control(struct sock *sk);
662extern int tcp_set_default_congestion_control(const char *name); 644extern int tcp_set_default_congestion_control(const char *name);
663extern void tcp_get_default_congestion_control(char *name); 645extern void tcp_get_default_congestion_control(char *name);
664extern int tcp_set_congestion_control(struct sock *sk, const char *name); 646extern int tcp_set_congestion_control(struct sock *sk, const char *name);
647extern void tcp_slow_start(struct tcp_sock *tp);
665 648
666extern struct tcp_congestion_ops tcp_init_congestion_ops; 649extern struct tcp_congestion_ops tcp_init_congestion_ops;
667extern u32 tcp_reno_ssthresh(struct sock *sk); 650extern u32 tcp_reno_ssthresh(struct sock *sk);
@@ -701,7 +684,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
701 * "Packets left network, but not honestly ACKed yet" PLUS 684 * "Packets left network, but not honestly ACKed yet" PLUS
702 * "Packets fast retransmitted" 685 * "Packets fast retransmitted"
703 */ 686 */
704static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) 687static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
705{ 688{
706 return (tp->packets_out - tp->left_out + tp->retrans_out); 689 return (tp->packets_out - tp->left_out + tp->retrans_out);
707} 690}
@@ -721,33 +704,6 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
721 (tp->snd_cwnd >> 2))); 704 (tp->snd_cwnd >> 2)));
722} 705}
723 706
724/*
725 * Linear increase during slow start
726 */
727static inline void tcp_slow_start(struct tcp_sock *tp)
728{
729 if (sysctl_tcp_abc) {
730 /* RFC3465: Slow Start
731 * TCP sender SHOULD increase cwnd by the number of
732 * previously unacknowledged bytes ACKed by each incoming
733 * acknowledgment, provided the increase is not more than L
734 */
735 if (tp->bytes_acked < tp->mss_cache)
736 return;
737
738 /* We MAY increase by 2 if discovered delayed ack */
739 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
740 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
741 tp->snd_cwnd++;
742 }
743 }
744 tp->bytes_acked = 0;
745
746 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
747 tp->snd_cwnd++;
748}
749
750
751static inline void tcp_sync_left_out(struct tcp_sock *tp) 707static inline void tcp_sync_left_out(struct tcp_sock *tp)
752{ 708{
753 if (tp->rx_opt.sack_ok && 709 if (tp->rx_opt.sack_ok &&
@@ -756,34 +712,7 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
756 tp->left_out = tp->sacked_out + tp->lost_out; 712 tp->left_out = tp->sacked_out + tp->lost_out;
757} 713}
758 714
759/* Set slow start threshold and cwnd not falling to slow start */ 715extern void tcp_enter_cwr(struct sock *sk);
760static inline void __tcp_enter_cwr(struct sock *sk)
761{
762 const struct inet_connection_sock *icsk = inet_csk(sk);
763 struct tcp_sock *tp = tcp_sk(sk);
764
765 tp->undo_marker = 0;
766 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
767 tp->snd_cwnd = min(tp->snd_cwnd,
768 tcp_packets_in_flight(tp) + 1U);
769 tp->snd_cwnd_cnt = 0;
770 tp->high_seq = tp->snd_nxt;
771 tp->snd_cwnd_stamp = tcp_time_stamp;
772 TCP_ECN_queue_cwr(tp);
773}
774
775static inline void tcp_enter_cwr(struct sock *sk)
776{
777 struct tcp_sock *tp = tcp_sk(sk);
778
779 tp->prior_ssthresh = 0;
780 tp->bytes_acked = 0;
781 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
782 __tcp_enter_cwr(sk);
783 tcp_set_ca_state(sk, TCP_CA_CWR);
784 }
785}
786
787extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); 716extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst);
788 717
789/* Slow start with delack produces 3 packets of burst, so that 718/* Slow start with delack produces 3 packets of burst, so that
@@ -815,14 +744,14 @@ static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
815 return left <= tcp_max_burst(tp); 744 return left <= tcp_max_burst(tp);
816} 745}
817 746
818static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, 747static inline void tcp_minshall_update(struct tcp_sock *tp, int mss,
819 const struct sk_buff *skb) 748 const struct sk_buff *skb)
820{ 749{
821 if (skb->len < mss) 750 if (skb->len < mss)
822 tp->snd_sml = TCP_SKB_CB(skb)->end_seq; 751 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
823} 752}
824 753
825static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 754static inline void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
826{ 755{
827 const struct inet_connection_sock *icsk = inet_csk(sk); 756 const struct inet_connection_sock *icsk = inet_csk(sk);
828 if (!tp->packets_out && !icsk->icsk_pending) 757 if (!tp->packets_out && !icsk->icsk_pending)
@@ -830,18 +759,18 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t
830 icsk->icsk_rto, TCP_RTO_MAX); 759 icsk->icsk_rto, TCP_RTO_MAX);
831} 760}
832 761
833static __inline__ void tcp_push_pending_frames(struct sock *sk, 762static inline void tcp_push_pending_frames(struct sock *sk,
834 struct tcp_sock *tp) 763 struct tcp_sock *tp)
835{ 764{
836 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); 765 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
837} 766}
838 767
839static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) 768static inline void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
840{ 769{
841 tp->snd_wl1 = seq; 770 tp->snd_wl1 = seq;
842} 771}
843 772
844static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) 773static inline void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
845{ 774{
846 tp->snd_wl1 = seq; 775 tp->snd_wl1 = seq;
847} 776}
@@ -849,19 +778,19 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq)
849/* 778/*
850 * Calculate(/check) TCP checksum 779 * Calculate(/check) TCP checksum
851 */ 780 */
852static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len, 781static inline u16 tcp_v4_check(struct tcphdr *th, int len,
853 unsigned long saddr, unsigned long daddr, 782 unsigned long saddr, unsigned long daddr,
854 unsigned long base) 783 unsigned long base)
855{ 784{
856 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base); 785 return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
857} 786}
858 787
859static __inline__ int __tcp_checksum_complete(struct sk_buff *skb) 788static inline int __tcp_checksum_complete(struct sk_buff *skb)
860{ 789{
861 return __skb_checksum_complete(skb); 790 return __skb_checksum_complete(skb);
862} 791}
863 792
864static __inline__ int tcp_checksum_complete(struct sk_buff *skb) 793static inline int tcp_checksum_complete(struct sk_buff *skb)
865{ 794{
866 return skb->ip_summed != CHECKSUM_UNNECESSARY && 795 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
867 __tcp_checksum_complete(skb); 796 __tcp_checksum_complete(skb);
@@ -869,7 +798,7 @@ static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
869 798
870/* Prequeue for VJ style copy to user, combined with checksumming. */ 799/* Prequeue for VJ style copy to user, combined with checksumming. */
871 800
872static __inline__ void tcp_prequeue_init(struct tcp_sock *tp) 801static inline void tcp_prequeue_init(struct tcp_sock *tp)
873{ 802{
874 tp->ucopy.task = NULL; 803 tp->ucopy.task = NULL;
875 tp->ucopy.len = 0; 804 tp->ucopy.len = 0;
@@ -885,7 +814,7 @@ static __inline__ void tcp_prequeue_init(struct tcp_sock *tp)
885 * 814 *
886 * NOTE: is this not too big to inline? 815 * NOTE: is this not too big to inline?
887 */ 816 */
888static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) 817static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
889{ 818{
890 struct tcp_sock *tp = tcp_sk(sk); 819 struct tcp_sock *tp = tcp_sk(sk);
891 820
@@ -926,7 +855,7 @@ static const char *statename[]={
926}; 855};
927#endif 856#endif
928 857
929static __inline__ void tcp_set_state(struct sock *sk, int state) 858static inline void tcp_set_state(struct sock *sk, int state)
930{ 859{
931 int oldstate = sk->sk_state; 860 int oldstate = sk->sk_state;
932 861
@@ -960,7 +889,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state)
960#endif 889#endif
961} 890}
962 891
963static __inline__ void tcp_done(struct sock *sk) 892static inline void tcp_done(struct sock *sk)
964{ 893{
965 tcp_set_state(sk, TCP_CLOSE); 894 tcp_set_state(sk, TCP_CLOSE);
966 tcp_clear_xmit_timers(sk); 895 tcp_clear_xmit_timers(sk);
@@ -973,81 +902,13 @@ static __inline__ void tcp_done(struct sock *sk)
973 inet_csk_destroy_sock(sk); 902 inet_csk_destroy_sock(sk);
974} 903}
975 904
976static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) 905static inline void tcp_sack_reset(struct tcp_options_received *rx_opt)
977{ 906{
978 rx_opt->dsack = 0; 907 rx_opt->dsack = 0;
979 rx_opt->eff_sacks = 0; 908 rx_opt->eff_sacks = 0;
980 rx_opt->num_sacks = 0; 909 rx_opt->num_sacks = 0;
981} 910}
982 911
983static __inline__ void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp, __u32 tstamp)
984{
985 if (tp->rx_opt.tstamp_ok) {
986 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
987 (TCPOPT_NOP << 16) |
988 (TCPOPT_TIMESTAMP << 8) |
989 TCPOLEN_TIMESTAMP);
990 *ptr++ = htonl(tstamp);
991 *ptr++ = htonl(tp->rx_opt.ts_recent);
992 }
993 if (tp->rx_opt.eff_sacks) {
994 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
995 int this_sack;
996
997 *ptr++ = htonl((TCPOPT_NOP << 24) |
998 (TCPOPT_NOP << 16) |
999 (TCPOPT_SACK << 8) |
1000 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
1001 TCPOLEN_SACK_PERBLOCK)));
1002 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
1003 *ptr++ = htonl(sp[this_sack].start_seq);
1004 *ptr++ = htonl(sp[this_sack].end_seq);
1005 }
1006 if (tp->rx_opt.dsack) {
1007 tp->rx_opt.dsack = 0;
1008 tp->rx_opt.eff_sacks--;
1009 }
1010 }
1011}
1012
1013/* Construct a tcp options header for a SYN or SYN_ACK packet.
1014 * If this is every changed make sure to change the definition of
1015 * MAX_SYN_SIZE to match the new maximum number of options that you
1016 * can generate.
1017 */
1018static inline void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
1019 int offer_wscale, int wscale, __u32 tstamp, __u32 ts_recent)
1020{
1021 /* We always get an MSS option.
1022 * The option bytes which will be seen in normal data
1023 * packets should timestamps be used, must be in the MSS
1024 * advertised. But we subtract them from tp->mss_cache so
1025 * that calculations in tcp_sendmsg are simpler etc.
1026 * So account for this fact here if necessary. If we
1027 * don't do this correctly, as a receiver we won't
1028 * recognize data packets as being full sized when we
1029 * should, and thus we won't abide by the delayed ACK
1030 * rules correctly.
1031 * SACKs don't matter, we never delay an ACK when we
1032 * have any of those going out.
1033 */
1034 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
1035 if (ts) {
1036 if(sack)
1037 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
1038 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1039 else
1040 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1041 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1042 *ptr++ = htonl(tstamp); /* TSVAL */
1043 *ptr++ = htonl(ts_recent); /* TSECR */
1044 } else if(sack)
1045 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1046 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
1047 if (offer_wscale)
1048 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
1049}
1050
1051/* Determine a window scaling and initial window to offer. */ 912/* Determine a window scaling and initial window to offer. */
1052extern void tcp_select_initial_window(int __space, __u32 mss, 913extern void tcp_select_initial_window(int __space, __u32 mss,
1053 __u32 *rcv_wnd, __u32 *window_clamp, 914 __u32 *rcv_wnd, __u32 *window_clamp,
@@ -1072,9 +933,9 @@ static inline int tcp_full_space(const struct sock *sk)
1072 return tcp_win_from_space(sk->sk_rcvbuf); 933 return tcp_win_from_space(sk->sk_rcvbuf);
1073} 934}
1074 935
1075static __inline__ void tcp_openreq_init(struct request_sock *req, 936static inline void tcp_openreq_init(struct request_sock *req,
1076 struct tcp_options_received *rx_opt, 937 struct tcp_options_received *rx_opt,
1077 struct sk_buff *skb) 938 struct sk_buff *skb)
1078{ 939{
1079 struct inet_request_sock *ireq = inet_rsk(req); 940 struct inet_request_sock *ireq = inet_rsk(req);
1080 941
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c7cc62c8dc12..e688c687d62d 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -174,6 +174,34 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
174 return err; 174 return err;
175} 175}
176 176
177
178/*
179 * Linear increase during slow start
180 */
181void tcp_slow_start(struct tcp_sock *tp)
182{
183 if (sysctl_tcp_abc) {
184 /* RFC3465: Slow Start
185 * TCP sender SHOULD increase cwnd by the number of
186 * previously unacknowledged bytes ACKed by each incoming
187 * acknowledgment, provided the increase is not more than L
188 */
189 if (tp->bytes_acked < tp->mss_cache)
190 return;
191
192 /* We MAY increase by 2 if discovered delayed ack */
193 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
194 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
195 tp->snd_cwnd++;
196 }
197 }
198 tp->bytes_acked = 0;
199
200 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
201 tp->snd_cwnd++;
202}
203EXPORT_SYMBOL_GPL(tcp_slow_start);
204
177/* 205/*
178 * TCP Reno congestion control 206 * TCP Reno congestion control
179 * This is special case used for fallback as well. 207 * This is special case used for fallback as well.
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 981d1203b152..0a461232329f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -115,8 +115,8 @@ int sysctl_tcp_abc = 1;
115/* Adapt the MSS value used to make delayed ack decision to the 115/* Adapt the MSS value used to make delayed ack decision to the
116 * real world. 116 * real world.
117 */ 117 */
118static inline void tcp_measure_rcv_mss(struct sock *sk, 118static void tcp_measure_rcv_mss(struct sock *sk,
119 const struct sk_buff *skb) 119 const struct sk_buff *skb)
120{ 120{
121 struct inet_connection_sock *icsk = inet_csk(sk); 121 struct inet_connection_sock *icsk = inet_csk(sk);
122 const unsigned int lss = icsk->icsk_ack.last_seg_size; 122 const unsigned int lss = icsk->icsk_ack.last_seg_size;
@@ -246,8 +246,8 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
246 return 0; 246 return 0;
247} 247}
248 248
249static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, 249static void tcp_grow_window(struct sock *sk, struct tcp_sock *tp,
250 struct sk_buff *skb) 250 struct sk_buff *skb)
251{ 251{
252 /* Check #1 */ 252 /* Check #1 */
253 if (tp->rcv_ssthresh < tp->window_clamp && 253 if (tp->rcv_ssthresh < tp->window_clamp &&
@@ -341,6 +341,26 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
342} 342}
343 343
344
345/* Initialize RCV_MSS value.
346 * RCV_MSS is an our guess about MSS used by the peer.
347 * We haven't any direct information about the MSS.
348 * It's better to underestimate the RCV_MSS rather than overestimate.
349 * Overestimations make us ACKing less frequently than needed.
350 * Underestimations are more easy to detect and fix by tcp_measure_rcv_mss().
351 */
352void tcp_initialize_rcv_mss(struct sock *sk)
353{
354 struct tcp_sock *tp = tcp_sk(sk);
355 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
356
357 hint = min(hint, tp->rcv_wnd/2);
358 hint = min(hint, TCP_MIN_RCVMSS);
359 hint = max(hint, TCP_MIN_MSS);
360
361 inet_csk(sk)->icsk_ack.rcv_mss = hint;
362}
363
344/* Receiver "autotuning" code. 364/* Receiver "autotuning" code.
345 * 365 *
346 * The algorithm for RTT estimation w/o timestamps is based on 366 * The algorithm for RTT estimation w/o timestamps is based on
@@ -735,6 +755,27 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
735 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 755 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
736} 756}
737 757
758/* Set slow start threshold and cwnd not falling to slow start */
759void tcp_enter_cwr(struct sock *sk)
760{
761 struct tcp_sock *tp = tcp_sk(sk);
762
763 tp->prior_ssthresh = 0;
764 tp->bytes_acked = 0;
765 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
766 tp->undo_marker = 0;
767 tp->snd_ssthresh = inet_csk(sk)->icsk_ca_ops->ssthresh(sk);
768 tp->snd_cwnd = min(tp->snd_cwnd,
769 tcp_packets_in_flight(tp) + 1U);
770 tp->snd_cwnd_cnt = 0;
771 tp->high_seq = tp->snd_nxt;
772 tp->snd_cwnd_stamp = tcp_time_stamp;
773 TCP_ECN_queue_cwr(tp);
774
775 tcp_set_ca_state(sk, TCP_CA_CWR);
776 }
777}
778
738/* Initialize metrics on socket. */ 779/* Initialize metrics on socket. */
739 780
740static void tcp_init_metrics(struct sock *sk) 781static void tcp_init_metrics(struct sock *sk)
@@ -2070,8 +2111,8 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
2070 tcp_ack_no_tstamp(sk, seq_rtt, flag); 2111 tcp_ack_no_tstamp(sk, seq_rtt, flag);
2071} 2112}
2072 2113
2073static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, 2114static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2074 u32 in_flight, int good) 2115 u32 in_flight, int good)
2075{ 2116{
2076 const struct inet_connection_sock *icsk = inet_csk(sk); 2117 const struct inet_connection_sock *icsk = inet_csk(sk);
2077 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); 2118 icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
@@ -2082,7 +2123,7 @@ static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
2082 * RFC2988 recommends to restart timer to now+rto. 2123 * RFC2988 recommends to restart timer to now+rto.
2083 */ 2124 */
2084 2125
2085static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) 2126static void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
2086{ 2127{
2087 if (!tp->packets_out) { 2128 if (!tp->packets_out) {
2088 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 2129 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
@@ -2147,7 +2188,7 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2147 return acked; 2188 return acked;
2148} 2189}
2149 2190
2150static inline u32 tcp_usrtt(const struct sk_buff *skb) 2191static u32 tcp_usrtt(const struct sk_buff *skb)
2151{ 2192{
2152 struct timeval tv, now; 2193 struct timeval tv, now;
2153 2194
@@ -2583,8 +2624,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
2583/* Fast parse options. This hopes to only see timestamps. 2624/* Fast parse options. This hopes to only see timestamps.
2584 * If it is wrong it falls back on tcp_parse_options(). 2625 * If it is wrong it falls back on tcp_parse_options().
2585 */ 2626 */
2586static inline int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, 2627static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
2587 struct tcp_sock *tp) 2628 struct tcp_sock *tp)
2588{ 2629{
2589 if (th->doff == sizeof(struct tcphdr)>>2) { 2630 if (th->doff == sizeof(struct tcphdr)>>2) {
2590 tp->rx_opt.saw_tstamp = 0; 2631 tp->rx_opt.saw_tstamp = 0;
@@ -2804,8 +2845,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
2804 } 2845 }
2805} 2846}
2806 2847
2807static __inline__ int 2848static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2808tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2809{ 2849{
2810 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) { 2850 if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
2811 if (before(seq, sp->start_seq)) 2851 if (before(seq, sp->start_seq))
@@ -2817,7 +2857,7 @@ tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
2817 return 0; 2857 return 0;
2818} 2858}
2819 2859
2820static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq) 2860static void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2821{ 2861{
2822 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { 2862 if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) {
2823 if (before(seq, tp->rcv_nxt)) 2863 if (before(seq, tp->rcv_nxt))
@@ -2832,7 +2872,7 @@ static inline void tcp_dsack_set(struct tcp_sock *tp, u32 seq, u32 end_seq)
2832 } 2872 }
2833} 2873}
2834 2874
2835static inline void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq) 2875static void tcp_dsack_extend(struct tcp_sock *tp, u32 seq, u32 end_seq)
2836{ 2876{
2837 if (!tp->rx_opt.dsack) 2877 if (!tp->rx_opt.dsack)
2838 tcp_dsack_set(tp, seq, end_seq); 2878 tcp_dsack_set(tp, seq, end_seq);
@@ -2890,7 +2930,7 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
2890 } 2930 }
2891} 2931}
2892 2932
2893static __inline__ void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2) 2933static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
2894{ 2934{
2895 __u32 tmp; 2935 __u32 tmp;
2896 2936
@@ -3455,7 +3495,7 @@ void tcp_cwnd_application_limited(struct sock *sk)
3455 tp->snd_cwnd_stamp = tcp_time_stamp; 3495 tp->snd_cwnd_stamp = tcp_time_stamp;
3456} 3496}
3457 3497
3458static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp) 3498static int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
3459{ 3499{
3460 /* If the user specified a specific send buffer setting, do 3500 /* If the user specified a specific send buffer setting, do
3461 * not modify it. 3501 * not modify it.
@@ -3502,7 +3542,7 @@ static void tcp_new_space(struct sock *sk)
3502 sk->sk_write_space(sk); 3542 sk->sk_write_space(sk);
3503} 3543}
3504 3544
3505static inline void tcp_check_space(struct sock *sk) 3545static void tcp_check_space(struct sock *sk)
3506{ 3546{
3507 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { 3547 if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) {
3508 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); 3548 sock_reset_flag(sk, SOCK_QUEUE_SHRUNK);
@@ -3512,7 +3552,7 @@ static inline void tcp_check_space(struct sock *sk)
3512 } 3552 }
3513} 3553}
3514 3554
3515static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp) 3555static inline void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
3516{ 3556{
3517 tcp_push_pending_frames(sk, tp); 3557 tcp_push_pending_frames(sk, tp);
3518 tcp_check_space(sk); 3558 tcp_check_space(sk);
@@ -3544,7 +3584,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible)
3544 } 3584 }
3545} 3585}
3546 3586
3547static __inline__ void tcp_ack_snd_check(struct sock *sk) 3587static inline void tcp_ack_snd_check(struct sock *sk)
3548{ 3588{
3549 if (!inet_csk_ack_scheduled(sk)) { 3589 if (!inet_csk_ack_scheduled(sk)) {
3550 /* We sent a data segment already. */ 3590 /* We sent a data segment already. */
@@ -3692,8 +3732,7 @@ static int __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3692 return result; 3732 return result;
3693} 3733}
3694 3734
3695static __inline__ int 3735static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3696tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
3697{ 3736{
3698 return skb->ip_summed != CHECKSUM_UNNECESSARY && 3737 return skb->ip_summed != CHECKSUM_UNNECESSARY &&
3699 __tcp_checksum_complete_user(sk, skb); 3738 __tcp_checksum_complete_user(sk, skb);
@@ -4474,3 +4513,4 @@ EXPORT_SYMBOL(sysctl_tcp_abc);
4474EXPORT_SYMBOL(tcp_parse_options); 4513EXPORT_SYMBOL(tcp_parse_options);
4475EXPORT_SYMBOL(tcp_rcv_established); 4514EXPORT_SYMBOL(tcp_rcv_established);
4476EXPORT_SYMBOL(tcp_rcv_state_process); 4515EXPORT_SYMBOL(tcp_rcv_state_process);
4516EXPORT_SYMBOL(tcp_initialize_rcv_mss);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9b62d80bb20f..5c70493dff02 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -270,8 +270,7 @@ failure:
270/* 270/*
271 * This routine does path mtu discovery as defined in RFC1191. 271 * This routine does path mtu discovery as defined in RFC1191.
272 */ 272 */
273static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, 273static void do_pmtu_discovery(struct sock *sk, struct iphdr *iph, u32 mtu)
274 u32 mtu)
275{ 274{
276 struct dst_entry *dst; 275 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk); 276 struct inet_sock *inet = inet_sk(sk);
@@ -662,7 +661,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
662 kfree(inet_rsk(req)->opt); 661 kfree(inet_rsk(req)->opt);
663} 662}
664 663
665static inline void syn_flood_warning(struct sk_buff *skb) 664static void syn_flood_warning(struct sk_buff *skb)
666{ 665{
667 static unsigned long warntime; 666 static unsigned long warntime;
668 667
@@ -677,8 +676,8 @@ static inline void syn_flood_warning(struct sk_buff *skb)
677/* 676/*
678 * Save and compile IPv4 options into the request_sock if needed. 677 * Save and compile IPv4 options into the request_sock if needed.
679 */ 678 */
680static inline struct ip_options *tcp_v4_save_options(struct sock *sk, 679static struct ip_options *tcp_v4_save_options(struct sock *sk,
681 struct sk_buff *skb) 680 struct sk_buff *skb)
682{ 681{
683 struct ip_options *opt = &(IPCB(skb)->opt); 682 struct ip_options *opt = &(IPCB(skb)->opt);
684 struct ip_options *dopt = NULL; 683 struct ip_options *dopt = NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3a0a914de917..a7623ead39a8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -51,8 +51,8 @@ int sysctl_tcp_retrans_collapse = 1;
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 3; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
56{ 56{
57 sk->sk_send_head = skb->next; 57 sk->sk_send_head = skb->next;
58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) 58 if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
@@ -124,8 +124,8 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst)
124 tp->snd_cwnd_used = 0; 124 tp->snd_cwnd_used = 0;
125} 125}
126 126
127static inline void tcp_event_data_sent(struct tcp_sock *tp, 127static void tcp_event_data_sent(struct tcp_sock *tp,
128 struct sk_buff *skb, struct sock *sk) 128 struct sk_buff *skb, struct sock *sk)
129{ 129{
130 struct inet_connection_sock *icsk = inet_csk(sk); 130 struct inet_connection_sock *icsk = inet_csk(sk);
131 const u32 now = tcp_time_stamp; 131 const u32 now = tcp_time_stamp;
@@ -142,7 +142,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
142 icsk->icsk_ack.pingpong = 1; 142 icsk->icsk_ack.pingpong = 1;
143} 143}
144 144
145static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) 145static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
146{ 146{
147 tcp_dec_quickack_mode(sk, pkts); 147 tcp_dec_quickack_mode(sk, pkts);
148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); 148 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
@@ -212,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss,
212 * value can be stuffed directly into th->window for an outgoing 212 * value can be stuffed directly into th->window for an outgoing
213 * frame. 213 * frame.
214 */ 214 */
215static __inline__ u16 tcp_select_window(struct sock *sk) 215static u16 tcp_select_window(struct sock *sk)
216{ 216{
217 struct tcp_sock *tp = tcp_sk(sk); 217 struct tcp_sock *tp = tcp_sk(sk);
218 u32 cur_win = tcp_receive_window(tp); 218 u32 cur_win = tcp_receive_window(tp);
@@ -250,6 +250,75 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
250 return new_win; 250 return new_win;
251} 251}
252 252
253static void tcp_build_and_update_options(__u32 *ptr, struct tcp_sock *tp,
254 __u32 tstamp)
255{
256 if (tp->rx_opt.tstamp_ok) {
257 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) |
258 (TCPOPT_NOP << 16) |
259 (TCPOPT_TIMESTAMP << 8) |
260 TCPOLEN_TIMESTAMP);
261 *ptr++ = htonl(tstamp);
262 *ptr++ = htonl(tp->rx_opt.ts_recent);
263 }
264 if (tp->rx_opt.eff_sacks) {
265 struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks;
266 int this_sack;
267
268 *ptr++ = htonl((TCPOPT_NOP << 24) |
269 (TCPOPT_NOP << 16) |
270 (TCPOPT_SACK << 8) |
271 (TCPOLEN_SACK_BASE + (tp->rx_opt.eff_sacks *
272 TCPOLEN_SACK_PERBLOCK)));
273 for(this_sack = 0; this_sack < tp->rx_opt.eff_sacks; this_sack++) {
274 *ptr++ = htonl(sp[this_sack].start_seq);
275 *ptr++ = htonl(sp[this_sack].end_seq);
276 }
277 if (tp->rx_opt.dsack) {
278 tp->rx_opt.dsack = 0;
279 tp->rx_opt.eff_sacks--;
280 }
281 }
282}
283
284/* Construct a tcp options header for a SYN or SYN_ACK packet.
285 * If this is every changed make sure to change the definition of
286 * MAX_SYN_SIZE to match the new maximum number of options that you
287 * can generate.
288 */
289static void tcp_syn_build_options(__u32 *ptr, int mss, int ts, int sack,
290 int offer_wscale, int wscale, __u32 tstamp,
291 __u32 ts_recent)
292{
293 /* We always get an MSS option.
294 * The option bytes which will be seen in normal data
295 * packets should timestamps be used, must be in the MSS
296 * advertised. But we subtract them from tp->mss_cache so
297 * that calculations in tcp_sendmsg are simpler etc.
298 * So account for this fact here if necessary. If we
299 * don't do this correctly, as a receiver we won't
300 * recognize data packets as being full sized when we
301 * should, and thus we won't abide by the delayed ACK
302 * rules correctly.
303 * SACKs don't matter, we never delay an ACK when we
304 * have any of those going out.
305 */
306 *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
307 if (ts) {
308 if(sack)
309 *ptr++ = __constant_htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) |
310 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
311 else
312 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
313 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
314 *ptr++ = htonl(tstamp); /* TSVAL */
315 *ptr++ = htonl(ts_recent); /* TSECR */
316 } else if(sack)
317 *ptr++ = __constant_htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
318 (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM);
319 if (offer_wscale)
320 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | (wscale));
321}
253 322
254/* This routine actually transmits TCP packets queued in by 323/* This routine actually transmits TCP packets queued in by
255 * tcp_do_sendmsg(). This is used by both the initial 324 * tcp_do_sendmsg(). This is used by both the initial
@@ -724,7 +793,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
724 793
725/* Congestion window validation. (RFC2861) */ 794/* Congestion window validation. (RFC2861) */
726 795
727static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) 796static void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
728{ 797{
729 __u32 packets_out = tp->packets_out; 798 __u32 packets_out = tp->packets_out;
730 799
@@ -773,7 +842,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
773/* This must be invoked the first time we consider transmitting 842/* This must be invoked the first time we consider transmitting
774 * SKB onto the wire. 843 * SKB onto the wire.
775 */ 844 */
776static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) 845static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
777{ 846{
778 int tso_segs = tcp_skb_pcount(skb); 847 int tso_segs = tcp_skb_pcount(skb);
779 848
@@ -1794,7 +1863,7 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
1794/* 1863/*
1795 * Do all connect socket setups that can be done AF independent. 1864 * Do all connect socket setups that can be done AF independent.
1796 */ 1865 */
1797static inline void tcp_connect_init(struct sock *sk) 1866static void tcp_connect_init(struct sock *sk)
1798{ 1867{
1799 struct dst_entry *dst = __sk_dst_get(sk); 1868 struct dst_entry *dst = __sk_dst_get(sk);
1800 struct tcp_sock *tp = tcp_sk(sk); 1869 struct tcp_sock *tp = tcp_sk(sk);