aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt5
-rw-r--r--drivers/sbus/char/display7seg.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c1
-rw-r--r--include/linux/sysctl.h1
-rw-r--r--include/linux/tcp.h16
-rw-r--r--include/net/sock.h6
-rw-r--r--include/net/tcp.h71
-rw-r--r--net/ipv4/sysctl_net_ipv4.c8
-rw-r--r--net/ipv4/tcp.c3
-rw-r--r--net/ipv4/tcp_bic.c12
-rw-r--r--net/ipv4/tcp_cong.c40
-rw-r--r--net/ipv4/tcp_highspeed.c11
-rw-r--r--net/ipv4/tcp_htcp.c13
-rw-r--r--net/ipv4/tcp_hybla.c6
-rw-r--r--net/ipv4/tcp_input.c288
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_output.c61
-rw-r--r--net/ipv4/tcp_scalable.c14
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/tcp_vegas.c42
21 files changed, 414 insertions, 201 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 65895bb51414..ebc09a159f62 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -78,6 +78,11 @@ inet_peer_gc_maxtime - INTEGER
78 78
79TCP variables: 79TCP variables:
80 80
81tcp_abc - INTEGER
82 Controls Appropriate Byte Count defined in RFC3465. If set to
83 0 then does congestion avoid once per ack. 1 is conservative
84 value, and 2 is more agressive.
85
81tcp_syn_retries - INTEGER 86tcp_syn_retries - INTEGER
82 Number of times initial SYNs for an active TCP connection attempt 87 Number of times initial SYNs for an active TCP connection attempt
83 will be retransmitted. Should not be higher than 255. Default value 88 will be retransmitted. Should not be higher than 255. Default value
diff --git a/drivers/sbus/char/display7seg.c b/drivers/sbus/char/display7seg.c
index 2c86a4b809cd..c3a51d1fae5d 100644
--- a/drivers/sbus/char/display7seg.c
+++ b/drivers/sbus/char/display7seg.c
@@ -119,7 +119,7 @@ static long d7s_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
119{ 119{
120 __u8 regs = readb(d7s_regs); 120 __u8 regs = readb(d7s_regs);
121 __u8 ireg = 0; 121 __u8 ireg = 0;
122 int error = 0 122 int error = 0;
123 123
124 if (D7S_MINOR != iminor(file->f_dentry->d_inode)) 124 if (D7S_MINOR != iminor(file->f_dentry->d_inode))
125 return -ENODEV; 125 return -ENODEV;
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index c90723860a04..07498118359d 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -1704,7 +1704,6 @@ MODULE_DEVICE_TABLE(pci, lpfc_id_table);
1704 1704
1705static struct pci_driver lpfc_driver = { 1705static struct pci_driver lpfc_driver = {
1706 .name = LPFC_DRIVER_NAME, 1706 .name = LPFC_DRIVER_NAME,
1707 .owner = THIS_MODULE,
1708 .id_table = lpfc_id_table, 1707 .id_table = lpfc_id_table,
1709 .probe = lpfc_pci_probe_one, 1708 .probe = lpfc_pci_probe_one,
1710 .remove = __devexit_p(lpfc_pci_remove_one), 1709 .remove = __devexit_p(lpfc_pci_remove_one),
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 22cf5e1ac987..ab2791b3189d 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -390,6 +390,7 @@ enum
390 NET_TCP_BIC_BETA=108, 390 NET_TCP_BIC_BETA=108,
391 NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109, 391 NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
392 NET_TCP_CONG_CONTROL=110, 392 NET_TCP_CONG_CONTROL=110,
393 NET_TCP_ABC=111,
393}; 394};
394 395
395enum { 396enum {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ac4ca44c75ca..0e1da6602e05 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -307,6 +307,21 @@ struct tcp_sock {
307 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ 307 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
308 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ 308 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
309 309
310 struct tcp_sack_block recv_sack_cache[4];
311
312 /* from STCP, retrans queue hinting */
313 struct sk_buff* lost_skb_hint;
314
315 struct sk_buff *scoreboard_skb_hint;
316 struct sk_buff *retransmit_skb_hint;
317 struct sk_buff *forward_skb_hint;
318 struct sk_buff *fastpath_skb_hint;
319
320 int fastpath_cnt_hint;
321 int lost_cnt_hint;
322 int retransmit_cnt_hint;
323 int forward_cnt_hint;
324
310 __u16 advmss; /* Advertised MSS */ 325 __u16 advmss; /* Advertised MSS */
311 __u16 prior_ssthresh; /* ssthresh saved at recovery start */ 326 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
312 __u32 lost_out; /* Lost packets */ 327 __u32 lost_out; /* Lost packets */
@@ -326,6 +341,7 @@ struct tcp_sock {
326 __u32 snd_up; /* Urgent pointer */ 341 __u32 snd_up; /* Urgent pointer */
327 342
328 __u32 total_retrans; /* Total retransmits for entire connection */ 343 __u32 total_retrans; /* Total retransmits for entire connection */
344 __u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */
329 345
330 unsigned int keepalive_time; /* time before keep alive takes place */ 346 unsigned int keepalive_time; /* time before keep alive takes place */
331 unsigned int keepalive_intvl; /* time interval between keep alive probes */ 347 unsigned int keepalive_intvl; /* time interval between keep alive probes */
diff --git a/include/net/sock.h b/include/net/sock.h
index ff13c4cc287a..982b4ecd187b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1247,6 +1247,12 @@ static inline struct page *sk_stream_alloc_page(struct sock *sk)
1247 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \ 1247 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
1248 skb = skb->next) 1248 skb = skb->next)
1249 1249
1250/*from STCP for fast SACK Process*/
1251#define sk_stream_for_retrans_queue_from(skb, sk) \
1252 for (; (skb != (sk)->sk_send_head) && \
1253 (skb != (struct sk_buff *)&(sk)->sk_write_queue); \
1254 skb = skb->next)
1255
1250/* 1256/*
1251 * Default write policy as shown to user space via poll/select/SIGIO 1257 * Default write policy as shown to user space via poll/select/SIGIO
1252 */ 1258 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 96cc3b434e40..0f9848011972 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -89,10 +89,10 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
89 */ 89 */
90 90
91#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a 91#define TCP_SYN_RETRIES 5 /* number of times to retry active opening a
92 * connection: ~180sec is RFC minumum */ 92 * connection: ~180sec is RFC minimum */
93 93
94#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a 94#define TCP_SYNACK_RETRIES 5 /* number of times to retry passive opening a
95 * connection: ~180sec is RFC minumum */ 95 * connection: ~180sec is RFC minimum */
96 96
97 97
98#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned 98#define TCP_ORPHAN_RETRIES 7 /* number of times to retry on an orphaned
@@ -180,7 +180,7 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
180/* Flags in tp->nonagle */ 180/* Flags in tp->nonagle */
181#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ 181#define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */
182#define TCP_NAGLE_CORK 2 /* Socket is corked */ 182#define TCP_NAGLE_CORK 2 /* Socket is corked */
183#define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ 183#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
184 184
185extern struct inet_timewait_death_row tcp_death_row; 185extern struct inet_timewait_death_row tcp_death_row;
186 186
@@ -218,6 +218,7 @@ extern int sysctl_tcp_low_latency;
218extern int sysctl_tcp_nometrics_save; 218extern int sysctl_tcp_nometrics_save;
219extern int sysctl_tcp_moderate_rcvbuf; 219extern int sysctl_tcp_moderate_rcvbuf;
220extern int sysctl_tcp_tso_win_divisor; 220extern int sysctl_tcp_tso_win_divisor;
221extern int sysctl_tcp_abc;
221 222
222extern atomic_t tcp_memory_allocated; 223extern atomic_t tcp_memory_allocated;
223extern atomic_t tcp_sockets_allocated; 224extern atomic_t tcp_sockets_allocated;
@@ -551,13 +552,13 @@ extern u32 __tcp_select_window(struct sock *sk);
551 552
552/* TCP timestamps are only 32-bits, this causes a slight 553/* TCP timestamps are only 32-bits, this causes a slight
553 * complication on 64-bit systems since we store a snapshot 554 * complication on 64-bit systems since we store a snapshot
554 * of jiffies in the buffer control blocks below. We decidely 555 * of jiffies in the buffer control blocks below. We decidedly
555 * only use of the low 32-bits of jiffies and hide the ugly 556 * only use of the low 32-bits of jiffies and hide the ugly
556 * casts with the following macro. 557 * casts with the following macro.
557 */ 558 */
558#define tcp_time_stamp ((__u32)(jiffies)) 559#define tcp_time_stamp ((__u32)(jiffies))
559 560
560/* This is what the send packet queueing engine uses to pass 561/* This is what the send packet queuing engine uses to pass
561 * TCP per-packet control information to the transmission 562 * TCP per-packet control information to the transmission
562 * code. We also store the host-order sequence numbers in 563 * code. We also store the host-order sequence numbers in
563 * here too. This is 36 bytes on 32-bit architectures, 564 * here too. This is 36 bytes on 32-bit architectures,
@@ -597,7 +598,7 @@ struct tcp_skb_cb {
597#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ 598#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
598#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) 599#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS)
599 600
600#define TCPCB_URG 0x20 /* Urgent pointer advenced here */ 601#define TCPCB_URG 0x20 /* Urgent pointer advanced here */
601 602
602#define TCPCB_AT_TAIL (TCPCB_URG) 603#define TCPCB_AT_TAIL (TCPCB_URG)
603 604
@@ -765,6 +766,33 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk)
765 (tp->snd_cwnd >> 2))); 766 (tp->snd_cwnd >> 2)));
766} 767}
767 768
769/*
770 * Linear increase during slow start
771 */
772static inline void tcp_slow_start(struct tcp_sock *tp)
773{
774 if (sysctl_tcp_abc) {
775 /* RFC3465: Slow Start
776 * TCP sender SHOULD increase cwnd by the number of
777 * previously unacknowledged bytes ACKed by each incoming
778 * acknowledgment, provided the increase is not more than L
779 */
780 if (tp->bytes_acked < tp->mss_cache)
781 return;
782
783 /* We MAY increase by 2 if discovered delayed ack */
784 if (sysctl_tcp_abc > 1 && tp->bytes_acked > 2*tp->mss_cache) {
785 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
786 tp->snd_cwnd++;
787 }
788 }
789 tp->bytes_acked = 0;
790
791 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
792 tp->snd_cwnd++;
793}
794
795
768static inline void tcp_sync_left_out(struct tcp_sock *tp) 796static inline void tcp_sync_left_out(struct tcp_sock *tp)
769{ 797{
770 if (tp->rx_opt.sack_ok && 798 if (tp->rx_opt.sack_ok &&
@@ -794,6 +822,7 @@ static inline void tcp_enter_cwr(struct sock *sk)
794 struct tcp_sock *tp = tcp_sk(sk); 822 struct tcp_sock *tp = tcp_sk(sk);
795 823
796 tp->prior_ssthresh = 0; 824 tp->prior_ssthresh = 0;
825 tp->bytes_acked = 0;
797 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { 826 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
798 __tcp_enter_cwr(sk); 827 __tcp_enter_cwr(sk);
799 tcp_set_ca_state(sk, TCP_CA_CWR); 828 tcp_set_ca_state(sk, TCP_CA_CWR);
@@ -810,6 +839,27 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
810 return 3; 839 return 3;
811} 840}
812 841
842/* RFC2861 Check whether we are limited by application or congestion window
843 * This is the inverse of cwnd check in tcp_tso_should_defer
844 */
845static inline int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
846{
847 const struct tcp_sock *tp = tcp_sk(sk);
848 u32 left;
849
850 if (in_flight >= tp->snd_cwnd)
851 return 1;
852
853 if (!(sk->sk_route_caps & NETIF_F_TSO))
854 return 0;
855
856 left = tp->snd_cwnd - in_flight;
857 if (sysctl_tcp_tso_win_divisor)
858 return left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd;
859 else
860 return left <= tcp_max_burst(tp);
861}
862
813static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, 863static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
814 const struct sk_buff *skb) 864 const struct sk_buff *skb)
815{ 865{
@@ -1157,6 +1207,15 @@ static inline void tcp_mib_init(void)
1157 TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1); 1207 TCP_ADD_STATS_USER(TCP_MIB_MAXCONN, -1);
1158} 1208}
1159 1209
1210/*from STCP */
1211static inline void clear_all_retrans_hints(struct tcp_sock *tp){
1212 tp->lost_skb_hint = NULL;
1213 tp->scoreboard_skb_hint = NULL;
1214 tp->retransmit_skb_hint = NULL;
1215 tp->forward_skb_hint = NULL;
1216 tp->fastpath_skb_hint = NULL;
1217}
1218
1160/* /proc */ 1219/* /proc */
1161enum tcp_seq_states { 1220enum tcp_seq_states {
1162 TCP_SEQ_STATE_LISTENING, 1221 TCP_SEQ_STATE_LISTENING,
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 652685623519..01444a02b48b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -645,6 +645,14 @@ ctl_table ipv4_table[] = {
645 .proc_handler = &proc_tcp_congestion_control, 645 .proc_handler = &proc_tcp_congestion_control,
646 .strategy = &sysctl_tcp_congestion_control, 646 .strategy = &sysctl_tcp_congestion_control,
647 }, 647 },
648 {
649 .ctl_name = NET_TCP_ABC,
650 .procname = "tcp_abc",
651 .data = &sysctl_tcp_abc,
652 .maxlen = sizeof(int),
653 .mode = 0644,
654 .proc_handler = &proc_dointvec,
655 },
648 656
649 { .ctl_name = 0 } 657 { .ctl_name = 0 }
650}; 658};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 72b7c22e1ea5..9ac7a4f46bd8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1640,7 +1640,7 @@ int tcp_disconnect(struct sock *sk, int flags)
1640 } else if (tcp_need_reset(old_state) || 1640 } else if (tcp_need_reset(old_state) ||
1641 (tp->snd_nxt != tp->write_seq && 1641 (tp->snd_nxt != tp->write_seq &&
1642 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { 1642 (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) {
1643 /* The last check adjusts for discrepance of Linux wrt. RFC 1643 /* The last check adjusts for discrepancy of Linux wrt. RFC
1644 * states 1644 * states
1645 */ 1645 */
1646 tcp_send_active_reset(sk, gfp_any()); 1646 tcp_send_active_reset(sk, gfp_any());
@@ -1669,6 +1669,7 @@ int tcp_disconnect(struct sock *sk, int flags)
1669 tp->packets_out = 0; 1669 tp->packets_out = 0;
1670 tp->snd_ssthresh = 0x7fffffff; 1670 tp->snd_ssthresh = 0x7fffffff;
1671 tp->snd_cwnd_cnt = 0; 1671 tp->snd_cwnd_cnt = 0;
1672 tp->bytes_acked = 0;
1672 tcp_set_ca_state(sk, TCP_CA_Open); 1673 tcp_set_ca_state(sk, TCP_CA_Open);
1673 tcp_clear_retrans(tp); 1674 tcp_clear_retrans(tp);
1674 inet_csk_delack_init(sk); 1675 inet_csk_delack_init(sk);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index ae35e0609047..1d0cd86621b1 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -217,17 +217,15 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack,
217 217
218 bictcp_low_utilization(sk, data_acked); 218 bictcp_low_utilization(sk, data_acked);
219 219
220 if (in_flight < tp->snd_cwnd) 220 if (!tcp_is_cwnd_limited(sk, in_flight))
221 return; 221 return;
222 222
223 if (tp->snd_cwnd <= tp->snd_ssthresh) { 223 if (tp->snd_cwnd <= tp->snd_ssthresh)
224 /* In "safe" area, increase. */ 224 tcp_slow_start(tp);
225 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 225 else {
226 tp->snd_cwnd++;
227 } else {
228 bictcp_update(ca, tp->snd_cwnd); 226 bictcp_update(ca, tp->snd_cwnd);
229 227
230 /* In dangerous area, increase slowly. 228 /* In dangerous area, increase slowly.
231 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd 229 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
232 */ 230 */
233 if (tp->snd_cwnd_cnt >= ca->cnt) { 231 if (tp->snd_cwnd_cnt >= ca->cnt) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index bbf2d6624e89..c7cc62c8dc12 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -186,24 +186,32 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
186{ 186{
187 struct tcp_sock *tp = tcp_sk(sk); 187 struct tcp_sock *tp = tcp_sk(sk);
188 188
189 if (in_flight < tp->snd_cwnd) 189 if (!tcp_is_cwnd_limited(sk, in_flight))
190 return; 190 return;
191 191
192 if (tp->snd_cwnd <= tp->snd_ssthresh) { 192 /* In "safe" area, increase. */
193 /* In "safe" area, increase. */ 193 if (tp->snd_cwnd <= tp->snd_ssthresh)
194 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 194 tcp_slow_start(tp);
195 tp->snd_cwnd++; 195
196 } else { 196 /* In dangerous area, increase slowly. */
197 /* In dangerous area, increase slowly. 197 else if (sysctl_tcp_abc) {
198 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd 198 /* RFC3465: Apppriate Byte Count
199 */ 199 * increase once for each full cwnd acked
200 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 200 */
201 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 201 if (tp->bytes_acked >= tp->snd_cwnd*tp->mss_cache) {
202 tp->snd_cwnd++; 202 tp->bytes_acked -= tp->snd_cwnd*tp->mss_cache;
203 tp->snd_cwnd_cnt = 0; 203 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
204 } else 204 tp->snd_cwnd++;
205 tp->snd_cwnd_cnt++; 205 }
206 } 206 } else {
207 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */
208 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
209 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
210 tp->snd_cwnd++;
211 tp->snd_cwnd_cnt = 0;
212 } else
213 tp->snd_cwnd_cnt++;
214 }
207} 215}
208EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 216EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
209 217
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 6acc04bde080..82b3c189bd7d 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -111,18 +111,17 @@ static void hstcp_init(struct sock *sk)
111} 111}
112 112
113static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, 113static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt,
114 u32 in_flight, int good) 114 u32 in_flight, u32 pkts_acked)
115{ 115{
116 struct tcp_sock *tp = tcp_sk(sk); 116 struct tcp_sock *tp = tcp_sk(sk);
117 struct hstcp *ca = inet_csk_ca(sk); 117 struct hstcp *ca = inet_csk_ca(sk);
118 118
119 if (in_flight < tp->snd_cwnd) 119 if (!tcp_is_cwnd_limited(sk, in_flight))
120 return; 120 return;
121 121
122 if (tp->snd_cwnd <= tp->snd_ssthresh) { 122 if (tp->snd_cwnd <= tp->snd_ssthresh)
123 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 123 tcp_slow_start(tp);
124 tp->snd_cwnd++; 124 else {
125 } else {
126 /* Update AIMD parameters */ 125 /* Update AIMD parameters */
127 if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) { 126 if (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd) {
128 while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd && 127 while (tp->snd_cwnd > hstcp_aimd_vals[ca->ai].cwnd &&
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index e47b37984e95..3284cfb993e6 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -207,14 +207,13 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
207 struct tcp_sock *tp = tcp_sk(sk); 207 struct tcp_sock *tp = tcp_sk(sk);
208 struct htcp *ca = inet_csk_ca(sk); 208 struct htcp *ca = inet_csk_ca(sk);
209 209
210 if (in_flight < tp->snd_cwnd) 210 if (!tcp_is_cwnd_limited(sk, in_flight))
211 return; 211 return;
212 212
213 if (tp->snd_cwnd <= tp->snd_ssthresh) { 213 if (tp->snd_cwnd <= tp->snd_ssthresh)
214 /* In "safe" area, increase. */ 214 tcp_slow_start(tp);
215 if (tp->snd_cwnd < tp->snd_cwnd_clamp) 215 else {
216 tp->snd_cwnd++; 216
217 } else {
218 measure_rtt(sk); 217 measure_rtt(sk);
219 218
220 /* keep track of number of round-trip times since last backoff event */ 219 /* keep track of number of round-trip times since last backoff event */
@@ -224,7 +223,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
224 htcp_alpha_update(ca); 223 htcp_alpha_update(ca);
225 } 224 }
226 225
227 /* In dangerous area, increase slowly. 226 /* In dangerous area, increase slowly.
228 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd 227 * In theory this is tp->snd_cwnd += alpha / tp->snd_cwnd
229 */ 228 */
230 if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) { 229 if ((tp->snd_cwnd_cnt++ * ca->alpha)>>7 >= tp->snd_cwnd) {
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index 77add63623df..40dbb3877510 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -100,12 +100,12 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
100 ca->minrtt = tp->srtt; 100 ca->minrtt = tp->srtt;
101 } 101 }
102 102
103 if (!tcp_is_cwnd_limited(sk, in_flight))
104 return;
105
103 if (!ca->hybla_en) 106 if (!ca->hybla_en)
104 return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); 107 return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
105 108
106 if (in_flight < tp->snd_cwnd)
107 return;
108
109 if (ca->rho == 0) 109 if (ca->rho == 0)
110 hybla_recalc_param(sk); 110 hybla_recalc_param(sk);
111 111
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3e98b57578dc..40a26b7157b4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -42,7 +42,7 @@
42 * Andi Kleen : Moved open_request checking here 42 * Andi Kleen : Moved open_request checking here
43 * and process RSTs for open_requests. 43 * and process RSTs for open_requests.
44 * Andi Kleen : Better prune_queue, and other fixes. 44 * Andi Kleen : Better prune_queue, and other fixes.
45 * Andrey Savochkin: Fix RTT measurements in the presnce of 45 * Andrey Savochkin: Fix RTT measurements in the presence of
46 * timestamps. 46 * timestamps.
47 * Andrey Savochkin: Check sequence numbers correctly when 47 * Andrey Savochkin: Check sequence numbers correctly when
48 * removing SACKs due to in sequence incoming 48 * removing SACKs due to in sequence incoming
@@ -89,6 +89,7 @@ int sysctl_tcp_frto;
89int sysctl_tcp_nometrics_save; 89int sysctl_tcp_nometrics_save;
90 90
91int sysctl_tcp_moderate_rcvbuf = 1; 91int sysctl_tcp_moderate_rcvbuf = 1;
92int sysctl_tcp_abc = 1;
92 93
93#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 94#define FLAG_DATA 0x01 /* Incoming frame contained data. */
94#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 95#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk)
223 * of receiver window. Check #2. 224 * of receiver window. Check #2.
224 * 225 *
225 * The scheme does not work when sender sends good segments opening 226 * The scheme does not work when sender sends good segments opening
226 * window and then starts to feed us spagetti. But it should work 227 * window and then starts to feed us spaghetti. But it should work
227 * in common situations. Otherwise, we have to rely on queue collapsing. 228 * in common situations. Otherwise, we have to rely on queue collapsing.
228 */ 229 */
229 230
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp,
233{ 234{
234 /* Optimize this! */ 235 /* Optimize this! */
235 int truesize = tcp_win_from_space(skb->truesize)/2; 236 int truesize = tcp_win_from_space(skb->truesize)/2;
236 int window = tcp_full_space(sk)/2; 237 int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
237 238
238 while (tp->rcv_ssthresh <= window) { 239 while (tp->rcv_ssthresh <= window) {
239 if (truesize <= skb->len) 240 if (truesize <= skb->len)
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
277 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); 278 int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff);
278 279
279 /* Try to select rcvbuf so that 4 mss-sized segments 280 /* Try to select rcvbuf so that 4 mss-sized segments
280 * will fit to window and correspoding skbs will fit to our rcvbuf. 281 * will fit to window and corresponding skbs will fit to our rcvbuf.
281 * (was 3; 4 is minimum to allow fast retransmit to work.) 282 * (was 3; 4 is minimum to allow fast retransmit to work.)
282 */ 283 */
283 while (tcp_win_from_space(rcvmem) < tp->advmss) 284 while (tcp_win_from_space(rcvmem) < tp->advmss)
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
286 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); 287 sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]);
287} 288}
288 289
289/* 4. Try to fixup all. It is made iimediately after connection enters 290/* 4. Try to fixup all. It is made immediately after connection enters
290 * established state. 291 * established state.
291 */ 292 */
292static void tcp_init_buffer_space(struct sock *sk) 293static void tcp_init_buffer_space(struct sock *sk)
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk)
326static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) 327static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp)
327{ 328{
328 struct inet_connection_sock *icsk = inet_csk(sk); 329 struct inet_connection_sock *icsk = inet_csk(sk);
329 struct sk_buff *skb;
330 unsigned int app_win = tp->rcv_nxt - tp->copied_seq;
331 int ofo_win = 0;
332 330
333 icsk->icsk_ack.quick = 0; 331 icsk->icsk_ack.quick = 0;
334 332
335 skb_queue_walk(&tp->out_of_order_queue, skb) { 333 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
336 ofo_win += skb->len; 334 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
337 } 335 !tcp_memory_pressure &&
338 336 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
339 /* If overcommit is due to out of order segments, 337 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
340 * do not clamp window. Try to expand rcvbuf instead. 338 sysctl_tcp_rmem[2]);
341 */
342 if (ofo_win) {
343 if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] &&
344 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
345 !tcp_memory_pressure &&
346 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0])
347 sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc),
348 sysctl_tcp_rmem[2]);
349 } 339 }
350 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { 340 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
351 app_win += ofo_win;
352 if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf)
353 app_win >>= 1;
354 if (app_win > icsk->icsk_ack.rcv_mss)
355 app_win -= icsk->icsk_ack.rcv_mss;
356 app_win = max(app_win, 2U*tp->advmss);
357
358 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); 341 tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
359 }
360} 342}
361 343
362/* Receiver "autotuning" code. 344/* Receiver "autotuning" code.
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
385 * are stalled on filesystem I/O. 367 * are stalled on filesystem I/O.
386 * 368 *
387 * Also, since we are only going for a minimum in the 369 * Also, since we are only going for a minimum in the
388 * non-timestamp case, we do not smoothe things out 370 * non-timestamp case, we do not smoother things out
389 * else with timestamps disabled convergance takes too 371 * else with timestamps disabled convergence takes too
390 * long. 372 * long.
391 */ 373 */
392 if (!win_dep) { 374 if (!win_dep) {
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep)
395 } else if (m < new_sample) 377 } else if (m < new_sample)
396 new_sample = m << 3; 378 new_sample = m << 3;
397 } else { 379 } else {
398 /* No previous mesaure. */ 380 /* No previous measure. */
399 new_sample = m << 3; 381 new_sample = m << 3;
400 } 382 }
401 383
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
524 if (icsk->icsk_ack.ato > icsk->icsk_rto) 506 if (icsk->icsk_ack.ato > icsk->icsk_rto)
525 icsk->icsk_ack.ato = icsk->icsk_rto; 507 icsk->icsk_ack.ato = icsk->icsk_rto;
526 } else if (m > icsk->icsk_rto) { 508 } else if (m > icsk->icsk_rto) {
527 /* Too long gap. Apparently sender falled to 509 /* Too long gap. Apparently sender failed to
528 * restart window, so that we send ACKs quickly. 510 * restart window, so that we send ACKs quickly.
529 */ 511 */
530 tcp_incr_quickack(sk); 512 tcp_incr_quickack(sk);
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_
548 * To save cycles in the RFC 1323 implementation it was better to break 530 * To save cycles in the RFC 1323 implementation it was better to break
549 * it up into three procedures. -- erics 531 * it up into three procedures. -- erics
550 */ 532 */
551static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) 533static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
552{ 534{
553 struct tcp_sock *tp = tcp_sk(sk); 535 struct tcp_sock *tp = tcp_sk(sk);
554 const struct inet_connection_sock *icsk = inet_csk(sk);
555 long m = mrtt; /* RTT */ 536 long m = mrtt; /* RTT */
556 537
557 /* The following amusing code comes from Jacobson's 538 /* The following amusing code comes from Jacobson's
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
565 * 546 *
566 * Funny. This algorithm seems to be very broken. 547 * Funny. This algorithm seems to be very broken.
567 * These formulae increase RTO, when it should be decreased, increase 548 * These formulae increase RTO, when it should be decreased, increase
568 * too slowly, when it should be incresed fastly, decrease too fastly 549 * too slowly, when it should be increased fastly, decrease too fastly
569 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely 550 * etc. I guess in BSD RTO takes ONE value, so that it is absolutely
570 * does not matter how to _calculate_ it. Seems, it was trap 551 * does not matter how to _calculate_ it. Seems, it was trap
571 * that VJ failed to avoid. 8) 552 * that VJ failed to avoid. 8)
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt)
610 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); 591 tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
611 tp->rtt_seq = tp->snd_nxt; 592 tp->rtt_seq = tp->snd_nxt;
612 } 593 }
613
614 if (icsk->icsk_ca_ops->rtt_sample)
615 icsk->icsk_ca_ops->rtt_sample(sk, *usrtt);
616} 594}
617 595
618/* Calculate rto without backoff. This is the second half of Van Jacobson's 596/* Calculate rto without backoff. This is the second half of Van Jacobson's
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk)
629 * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ 607 * at least by solaris and freebsd. "Erratic ACKs" has _nothing_
630 * to do with delayed acks, because at cwnd>2 true delack timeout 608 * to do with delayed acks, because at cwnd>2 true delack timeout
631 * is invisible. Actually, Linux-2.4 also generates erratic 609 * is invisible. Actually, Linux-2.4 also generates erratic
632 * ACKs in some curcumstances. 610 * ACKs in some circumstances.
633 */ 611 */
634 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; 612 inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar;
635 613
636 /* 2. Fixups made earlier cannot be right. 614 /* 2. Fixups made earlier cannot be right.
637 * If we do not estimate RTO correctly without them, 615 * If we do not estimate RTO correctly without them,
638 * all the algo is pure shit and should be replaced 616 * all the algo is pure shit and should be replaced
639 * with correct one. It is exaclty, which we pretend to do. 617 * with correct one. It is exactly, which we pretend to do.
640 */ 618 */
641} 619}
642 620
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk)
794 * to make it more realistic. 772 * to make it more realistic.
795 * 773 *
796 * A bit of theory. RTT is time passed after "normal" sized packet 774 * A bit of theory. RTT is time passed after "normal" sized packet
797 * is sent until it is ACKed. In normal curcumstances sending small 775 * is sent until it is ACKed. In normal circumstances sending small
798 * packets force peer to delay ACKs and calculation is correct too. 776 * packets force peer to delay ACKs and calculation is correct too.
799 * The algorithm is adaptive and, provided we follow specs, it 777 * The algorithm is adaptive and, provided we follow specs, it
800 * NEVER underestimate RTT. BUT! If peer tries to make some clever 778 * NEVER underestimate RTT. BUT! If peer tries to make some clever
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
919 int prior_fackets; 897 int prior_fackets;
920 u32 lost_retrans = 0; 898 u32 lost_retrans = 0;
921 int flag = 0; 899 int flag = 0;
900 int dup_sack = 0;
922 int i; 901 int i;
923 902
924 if (!tp->sacked_out) 903 if (!tp->sacked_out)
925 tp->fackets_out = 0; 904 tp->fackets_out = 0;
926 prior_fackets = tp->fackets_out; 905 prior_fackets = tp->fackets_out;
927 906
928 for (i=0; i<num_sacks; i++, sp++) { 907 /* SACK fastpath:
929 struct sk_buff *skb; 908 * if the only SACK change is the increase of the end_seq of
930 __u32 start_seq = ntohl(sp->start_seq); 909 * the first block then only apply that SACK block
931 __u32 end_seq = ntohl(sp->end_seq); 910 * and use retrans queue hinting otherwise slowpath */
932 int fack_count = 0; 911 flag = 1;
933 int dup_sack = 0; 912 for (i = 0; i< num_sacks; i++) {
913 __u32 start_seq = ntohl(sp[i].start_seq);
914 __u32 end_seq = ntohl(sp[i].end_seq);
915
916 if (i == 0){
917 if (tp->recv_sack_cache[i].start_seq != start_seq)
918 flag = 0;
919 } else {
920 if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
921 (tp->recv_sack_cache[i].end_seq != end_seq))
922 flag = 0;
923 }
924 tp->recv_sack_cache[i].start_seq = start_seq;
925 tp->recv_sack_cache[i].end_seq = end_seq;
934 926
935 /* Check for D-SACK. */ 927 /* Check for D-SACK. */
936 if (i == 0) { 928 if (i == 0) {
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
962 if (before(ack, prior_snd_una - tp->max_window)) 954 if (before(ack, prior_snd_una - tp->max_window))
963 return 0; 955 return 0;
964 } 956 }
957 }
958
959 if (flag)
960 num_sacks = 1;
961 else {
962 int j;
963 tp->fastpath_skb_hint = NULL;
964
965 /* order SACK blocks to allow in order walk of the retrans queue */
966 for (i = num_sacks-1; i > 0; i--) {
967 for (j = 0; j < i; j++){
968 if (after(ntohl(sp[j].start_seq),
969 ntohl(sp[j+1].start_seq))){
970 sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq);
971 sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq);
972 sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq);
973 sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq);
974 }
975
976 }
977 }
978 }
979
980 /* clear flag as used for different purpose in following code */
981 flag = 0;
982
983 for (i=0; i<num_sacks; i++, sp++) {
984 struct sk_buff *skb;
985 __u32 start_seq = ntohl(sp->start_seq);
986 __u32 end_seq = ntohl(sp->end_seq);
987 int fack_count;
988
989 /* Use SACK fastpath hint if valid */
990 if (tp->fastpath_skb_hint) {
991 skb = tp->fastpath_skb_hint;
992 fack_count = tp->fastpath_cnt_hint;
993 } else {
994 skb = sk->sk_write_queue.next;
995 fack_count = 0;
996 }
965 997
966 /* Event "B" in the comment above. */ 998 /* Event "B" in the comment above. */
967 if (after(end_seq, tp->high_seq)) 999 if (after(end_seq, tp->high_seq))
968 flag |= FLAG_DATA_LOST; 1000 flag |= FLAG_DATA_LOST;
969 1001
970 sk_stream_for_retrans_queue(skb, sk) { 1002 sk_stream_for_retrans_queue_from(skb, sk) {
971 int in_sack, pcount; 1003 int in_sack, pcount;
972 u8 sacked; 1004 u8 sacked;
973 1005
1006 tp->fastpath_skb_hint = skb;
1007 tp->fastpath_cnt_hint = fack_count;
1008
974 /* The retransmission queue is always in order, so 1009 /* The retransmission queue is always in order, so
975 * we can short-circuit the walk early. 1010 * we can short-circuit the walk early.
976 */ 1011 */
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1045 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); 1080 TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
1046 tp->lost_out -= tcp_skb_pcount(skb); 1081 tp->lost_out -= tcp_skb_pcount(skb);
1047 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083
1084 /* clear lost hint */
1085 tp->retransmit_skb_hint = NULL;
1048 } 1086 }
1049 } else { 1087 } else {
1050 /* New sack for not retransmitted frame, 1088 /* New sack for not retransmitted frame,
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1057 if (sacked & TCPCB_LOST) { 1095 if (sacked & TCPCB_LOST) {
1058 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1096 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1059 tp->lost_out -= tcp_skb_pcount(skb); 1097 tp->lost_out -= tcp_skb_pcount(skb);
1098
1099 /* clear lost hint */
1100 tp->retransmit_skb_hint = NULL;
1060 } 1101 }
1061 } 1102 }
1062 1103
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1080 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { 1121 (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
1081 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1122 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1082 tp->retrans_out -= tcp_skb_pcount(skb); 1123 tp->retrans_out -= tcp_skb_pcount(skb);
1124 tp->retransmit_skb_hint = NULL;
1083 } 1125 }
1084 } 1126 }
1085 } 1127 }
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1107 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1149 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1108 tp->retrans_out -= tcp_skb_pcount(skb); 1150 tp->retrans_out -= tcp_skb_pcount(skb);
1109 1151
1152 /* clear lost hint */
1153 tp->retransmit_skb_hint = NULL;
1154
1110 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { 1155 if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
1111 tp->lost_out += tcp_skb_pcount(skb); 1156 tp->lost_out += tcp_skb_pcount(skb);
1112 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1157 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk)
1214 tcp_set_ca_state(sk, TCP_CA_Loss); 1259 tcp_set_ca_state(sk, TCP_CA_Loss);
1215 tp->high_seq = tp->frto_highmark; 1260 tp->high_seq = tp->frto_highmark;
1216 TCP_ECN_queue_cwr(tp); 1261 TCP_ECN_queue_cwr(tp);
1262
1263 clear_all_retrans_hints(tp);
1217} 1264}
1218 1265
1219void tcp_clear_retrans(struct tcp_sock *tp) 1266void tcp_clear_retrans(struct tcp_sock *tp)
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how)
1251 tp->snd_cwnd_cnt = 0; 1298 tp->snd_cwnd_cnt = 0;
1252 tp->snd_cwnd_stamp = tcp_time_stamp; 1299 tp->snd_cwnd_stamp = tcp_time_stamp;
1253 1300
1301 tp->bytes_acked = 0;
1254 tcp_clear_retrans(tp); 1302 tcp_clear_retrans(tp);
1255 1303
1256 /* Push undo marker, if it was plain RTO and nothing 1304 /* Push undo marker, if it was plain RTO and nothing
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how)
1279 tcp_set_ca_state(sk, TCP_CA_Loss); 1327 tcp_set_ca_state(sk, TCP_CA_Loss);
1280 tp->high_seq = tp->snd_nxt; 1328 tp->high_seq = tp->snd_nxt;
1281 TCP_ECN_queue_cwr(tp); 1329 TCP_ECN_queue_cwr(tp);
1330
1331 clear_all_retrans_hints(tp);
1282} 1332}
1283 1333
1284static int tcp_check_sack_reneging(struct sock *sk) 1334static int tcp_check_sack_reneging(struct sock *sk)
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
1503 int packets, u32 high_seq) 1553 int packets, u32 high_seq)
1504{ 1554{
1505 struct sk_buff *skb; 1555 struct sk_buff *skb;
1506 int cnt = packets; 1556 int cnt;
1507 1557
1508 BUG_TRAP(cnt <= tp->packets_out); 1558 BUG_TRAP(packets <= tp->packets_out);
1559 if (tp->lost_skb_hint) {
1560 skb = tp->lost_skb_hint;
1561 cnt = tp->lost_cnt_hint;
1562 } else {
1563 skb = sk->sk_write_queue.next;
1564 cnt = 0;
1565 }
1509 1566
1510 sk_stream_for_retrans_queue(skb, sk) { 1567 sk_stream_for_retrans_queue_from(skb, sk) {
1511 cnt -= tcp_skb_pcount(skb); 1568 /* TODO: do this better */
1512 if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) 1569 /* this is not the most efficient way to do this... */
1570 tp->lost_skb_hint = skb;
1571 tp->lost_cnt_hint = cnt;
1572 cnt += tcp_skb_pcount(skb);
1573 if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq))
1513 break; 1574 break;
1514 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1575 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1515 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1576 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1516 tp->lost_out += tcp_skb_pcount(skb); 1577 tp->lost_out += tcp_skb_pcount(skb);
1578
1579 /* clear xmit_retransmit_queue hints
1580 * if this is beyond hint */
1581 if(tp->retransmit_skb_hint != NULL &&
1582 before(TCP_SKB_CB(skb)->seq,
1583 TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) {
1584
1585 tp->retransmit_skb_hint = NULL;
1586 }
1517 } 1587 }
1518 } 1588 }
1519 tcp_sync_left_out(tp); 1589 tcp_sync_left_out(tp);
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
1540 if (tcp_head_timedout(sk, tp)) { 1610 if (tcp_head_timedout(sk, tp)) {
1541 struct sk_buff *skb; 1611 struct sk_buff *skb;
1542 1612
1543 sk_stream_for_retrans_queue(skb, sk) { 1613 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
1544 if (tcp_skb_timedout(sk, skb) && 1614 : sk->sk_write_queue.next;
1545 !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { 1615
1616 sk_stream_for_retrans_queue_from(skb, sk) {
1617 if (!tcp_skb_timedout(sk, skb))
1618 break;
1619
1620 if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
1546 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; 1621 TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
1547 tp->lost_out += tcp_skb_pcount(skb); 1622 tp->lost_out += tcp_skb_pcount(skb);
1623
1624 /* clear xmit_retrans hint */
1625 if (tp->retransmit_skb_hint &&
1626 before(TCP_SKB_CB(skb)->seq,
1627 TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
1628
1629 tp->retransmit_skb_hint = NULL;
1548 } 1630 }
1549 } 1631 }
1632
1633 tp->scoreboard_skb_hint = skb;
1634
1550 tcp_sync_left_out(tp); 1635 tcp_sync_left_out(tp);
1551 } 1636 }
1552} 1637}
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo)
1626 } 1711 }
1627 tcp_moderate_cwnd(tp); 1712 tcp_moderate_cwnd(tp);
1628 tp->snd_cwnd_stamp = tcp_time_stamp; 1713 tp->snd_cwnd_stamp = tcp_time_stamp;
1714
1715 /* There is something screwy going on with the retrans hints after
1716 an undo */
1717 clear_all_retrans_hints(tp);
1629} 1718}
1630 1719
1631static inline int tcp_may_undo(struct tcp_sock *tp) 1720static inline int tcp_may_undo(struct tcp_sock *tp)
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
1709 sk_stream_for_retrans_queue(skb, sk) { 1798 sk_stream_for_retrans_queue(skb, sk) {
1710 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; 1799 TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
1711 } 1800 }
1801
1802 clear_all_retrans_hints(tp);
1803
1712 DBGUNDO(sk, tp, "partial loss"); 1804 DBGUNDO(sk, tp, "partial loss");
1713 tp->lost_out = 0; 1805 tp->lost_out = 0;
1714 tp->left_out = tp->sacked_out; 1806 tp->left_out = tp->sacked_out;
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1908 TCP_ECN_queue_cwr(tp); 2000 TCP_ECN_queue_cwr(tp);
1909 } 2001 }
1910 2002
2003 tp->bytes_acked = 0;
1911 tp->snd_cwnd_cnt = 0; 2004 tp->snd_cwnd_cnt = 0;
1912 tcp_set_ca_state(sk, TCP_CA_Recovery); 2005 tcp_set_ca_state(sk, TCP_CA_Recovery);
1913 } 2006 }
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
1919} 2012}
1920 2013
1921/* Read draft-ietf-tcplw-high-performance before mucking 2014/* Read draft-ietf-tcplw-high-performance before mucking
1922 * with this code. (Superceeds RFC1323) 2015 * with this code. (Supersedes RFC1323)
1923 */ 2016 */
1924static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) 2017static void tcp_ack_saw_tstamp(struct sock *sk, int flag)
1925{ 2018{
1926 /* RTTM Rule: A TSecr value received in a segment is used to 2019 /* RTTM Rule: A TSecr value received in a segment is used to
1927 * update the averaged RTT measurement only if the segment 2020 * update the averaged RTT measurement only if the segment
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
1932 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> 2025 * 1998/04/10 Andrey V. Savochkin <saw@msu.ru>
1933 * 2026 *
1934 * Changed: reset backoff as soon as we see the first valid sample. 2027 * Changed: reset backoff as soon as we see the first valid sample.
1935 * If we do not, we get strongly overstimated rto. With timestamps 2028 * If we do not, we get strongly overestimated rto. With timestamps
1936 * samples are accepted even from very old segments: f.e., when rtt=1 2029 * samples are accepted even from very old segments: f.e., when rtt=1
1937 * increases to 8, we retransmit 5 times and after 8 seconds delayed 2030 * increases to 8, we retransmit 5 times and after 8 seconds delayed
1938 * answer arrives rto becomes 120 seconds! If at least one of segments 2031 * answer arrives rto becomes 120 seconds! If at least one of segments
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag)
1940 */ 2033 */
1941 struct tcp_sock *tp = tcp_sk(sk); 2034 struct tcp_sock *tp = tcp_sk(sk);
1942 const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; 2035 const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr;
1943 tcp_rtt_estimator(sk, seq_rtt, usrtt); 2036 tcp_rtt_estimator(sk, seq_rtt);
1944 tcp_set_rto(sk); 2037 tcp_set_rto(sk);
1945 inet_csk(sk)->icsk_backoff = 0; 2038 inet_csk(sk)->icsk_backoff = 0;
1946 tcp_bound_rto(sk); 2039 tcp_bound_rto(sk);
1947} 2040}
1948 2041
1949static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) 2042static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag)
1950{ 2043{
1951 /* We don't have a timestamp. Can only use 2044 /* We don't have a timestamp. Can only use
1952 * packets that are not retransmitted to determine 2045 * packets that are not retransmitted to determine
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag
1960 if (flag & FLAG_RETRANS_DATA_ACKED) 2053 if (flag & FLAG_RETRANS_DATA_ACKED)
1961 return; 2054 return;
1962 2055
1963 tcp_rtt_estimator(sk, seq_rtt, usrtt); 2056 tcp_rtt_estimator(sk, seq_rtt);
1964 tcp_set_rto(sk); 2057 tcp_set_rto(sk);
1965 inet_csk(sk)->icsk_backoff = 0; 2058 inet_csk(sk)->icsk_backoff = 0;
1966 tcp_bound_rto(sk); 2059 tcp_bound_rto(sk);
1967} 2060}
1968 2061
1969static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, 2062static inline void tcp_ack_update_rtt(struct sock *sk, const int flag,
1970 const s32 seq_rtt, u32 *usrtt) 2063 const s32 seq_rtt)
1971{ 2064{
1972 const struct tcp_sock *tp = tcp_sk(sk); 2065 const struct tcp_sock *tp = tcp_sk(sk);
1973 /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ 2066 /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */
1974 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) 2067 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
1975 tcp_ack_saw_tstamp(sk, usrtt, flag); 2068 tcp_ack_saw_tstamp(sk, flag);
1976 else if (seq_rtt >= 0) 2069 else if (seq_rtt >= 0)
1977 tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); 2070 tcp_ack_no_tstamp(sk, seq_rtt, flag);
1978} 2071}
1979 2072
1980static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, 2073static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
2054 return acked; 2147 return acked;
2055} 2148}
2056 2149
2150static inline u32 tcp_usrtt(const struct sk_buff *skb)
2151{
2152 struct timeval tv, now;
2153
2154 do_gettimeofday(&now);
2155 skb_get_timestamp(skb, &tv);
2156 return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec);
2157}
2057 2158
2058/* Remove acknowledged frames from the retransmission queue. */ 2159/* Remove acknowledged frames from the retransmission queue. */
2059static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt) 2160static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
2060{ 2161{
2061 struct tcp_sock *tp = tcp_sk(sk); 2162 struct tcp_sock *tp = tcp_sk(sk);
2163 const struct inet_connection_sock *icsk = inet_csk(sk);
2062 struct sk_buff *skb; 2164 struct sk_buff *skb;
2063 __u32 now = tcp_time_stamp; 2165 __u32 now = tcp_time_stamp;
2064 int acked = 0; 2166 int acked = 0;
2065 __s32 seq_rtt = -1; 2167 __s32 seq_rtt = -1;
2066 struct timeval usnow;
2067 u32 pkts_acked = 0; 2168 u32 pkts_acked = 0;
2068 2169 void (*rtt_sample)(struct sock *sk, u32 usrtt)
2069 if (seq_usrtt) 2170 = icsk->icsk_ca_ops->rtt_sample;
2070 do_gettimeofday(&usnow);
2071 2171
2072 while ((skb = skb_peek(&sk->sk_write_queue)) && 2172 while ((skb = skb_peek(&sk->sk_write_queue)) &&
2073 skb != sk->sk_send_head) { 2173 skb != sk->sk_send_head) {
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
2107 tp->retrans_out -= tcp_skb_pcount(skb); 2207 tp->retrans_out -= tcp_skb_pcount(skb);
2108 acked |= FLAG_RETRANS_DATA_ACKED; 2208 acked |= FLAG_RETRANS_DATA_ACKED;
2109 seq_rtt = -1; 2209 seq_rtt = -1;
2110 } else if (seq_rtt < 0) 2210 } else if (seq_rtt < 0) {
2111 seq_rtt = now - scb->when; 2211 seq_rtt = now - scb->when;
2112 if (seq_usrtt) { 2212 if (rtt_sample)
2113 struct timeval tv; 2213 (*rtt_sample)(sk, tcp_usrtt(skb));
2114
2115 skb_get_timestamp(skb, &tv);
2116 *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000
2117 + (usnow.tv_usec - tv.tv_usec);
2118 } 2214 }
2119
2120 if (sacked & TCPCB_SACKED_ACKED) 2215 if (sacked & TCPCB_SACKED_ACKED)
2121 tp->sacked_out -= tcp_skb_pcount(skb); 2216 tp->sacked_out -= tcp_skb_pcount(skb);
2122 if (sacked & TCPCB_LOST) 2217 if (sacked & TCPCB_LOST)
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
2126 !before(scb->end_seq, tp->snd_up)) 2221 !before(scb->end_seq, tp->snd_up))
2127 tp->urg_mode = 0; 2222 tp->urg_mode = 0;
2128 } 2223 }
2129 } else if (seq_rtt < 0) 2224 } else if (seq_rtt < 0) {
2130 seq_rtt = now - scb->when; 2225 seq_rtt = now - scb->when;
2226 if (rtt_sample)
2227 (*rtt_sample)(sk, tcp_usrtt(skb));
2228 }
2131 tcp_dec_pcount_approx(&tp->fackets_out, skb); 2229 tcp_dec_pcount_approx(&tp->fackets_out, skb);
2132 tcp_packets_out_dec(tp, skb); 2230 tcp_packets_out_dec(tp, skb);
2133 __skb_unlink(skb, &sk->sk_write_queue); 2231 __skb_unlink(skb, &sk->sk_write_queue);
2134 sk_stream_free_skb(sk, skb); 2232 sk_stream_free_skb(sk, skb);
2233 clear_all_retrans_hints(tp);
2135 } 2234 }
2136 2235
2137 if (acked&FLAG_ACKED) { 2236 if (acked&FLAG_ACKED) {
2138 const struct inet_connection_sock *icsk = inet_csk(sk); 2237 tcp_ack_update_rtt(sk, acked, seq_rtt);
2139 tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt);
2140 tcp_ack_packets_out(sk, tp); 2238 tcp_ack_packets_out(sk, tp);
2141 2239
2142 if (icsk->icsk_ca_ops->pkts_acked) 2240 if (icsk->icsk_ca_ops->pkts_acked)
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una)
2284 } 2382 }
2285 2383
2286 /* F-RTO affects on two new ACKs following RTO. 2384 /* F-RTO affects on two new ACKs following RTO.
2287 * At latest on third ACK the TCP behavor is back to normal. 2385 * At latest on third ACK the TCP behavior is back to normal.
2288 */ 2386 */
2289 tp->frto_counter = (tp->frto_counter + 1) % 3; 2387 tp->frto_counter = (tp->frto_counter + 1) % 3;
2290} 2388}
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2299 u32 ack = TCP_SKB_CB(skb)->ack_seq; 2397 u32 ack = TCP_SKB_CB(skb)->ack_seq;
2300 u32 prior_in_flight; 2398 u32 prior_in_flight;
2301 s32 seq_rtt; 2399 s32 seq_rtt;
2302 s32 seq_usrtt = 0;
2303 int prior_packets; 2400 int prior_packets;
2304 2401
2305 /* If the ack is newer than sent or older than previous acks 2402 /* If the ack is newer than sent or older than previous acks
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2311 if (before(ack, prior_snd_una)) 2408 if (before(ack, prior_snd_una))
2312 goto old_ack; 2409 goto old_ack;
2313 2410
2411 if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR)
2412 tp->bytes_acked += ack - prior_snd_una;
2413
2314 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { 2414 if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
2315 /* Window is constant, pure forward advance. 2415 /* Window is constant, pure forward advance.
2316 * No more checks are required. 2416 * No more checks are required.
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
2352 prior_in_flight = tcp_packets_in_flight(tp); 2452 prior_in_flight = tcp_packets_in_flight(tp);
2353 2453
2354 /* See if we can take anything off of the retransmit queue. */ 2454 /* See if we can take anything off of the retransmit queue. */
2355 flag |= tcp_clean_rtx_queue(sk, &seq_rtt, 2455 flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
2356 icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL);
2357 2456
2358 if (tp->frto_counter) 2457 if (tp->frto_counter)
2359 tcp_process_frto(sk, prior_snd_una); 2458 tcp_process_frto(sk, prior_snd_una);
2360 2459
2361 if (tcp_ack_is_dubious(sk, flag)) { 2460 if (tcp_ack_is_dubious(sk, flag)) {
2362 /* Advanve CWND, if state allows this. */ 2461 /* Advance CWND, if state allows this. */
2363 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) 2462 if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
2364 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); 2463 tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
2365 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); 2464 tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list,
3148{ 3247{
3149 struct sk_buff *skb; 3248 struct sk_buff *skb;
3150 3249
3151 /* First, check that queue is collapsable and find 3250 /* First, check that queue is collapsible and find
3152 * the point where collapsing can be useful. */ 3251 * the point where collapsing can be useful. */
3153 for (skb = head; skb != tail; ) { 3252 for (skb = head; skb != tail; ) {
3154 /* No new bits? It is possible on ofo queue. */ 3253 /* No new bits? It is possible on ofo queue. */
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk)
3456 3555
3457/* 3556/*
3458 * This routine is only called when we have urgent data 3557 * This routine is only called when we have urgent data
3459 * signalled. Its the 'slow' part of tcp_urg. It could be 3558 * signaled. Its the 'slow' part of tcp_urg. It could be
3460 * moved inline now as tcp_urg is only called from one 3559 * moved inline now as tcp_urg is only called from one
3461 * place. We handle URGent data wrong. We have to - as 3560 * place. We handle URGent data wrong. We have to - as
3462 * BSD still doesn't use the correction from RFC961. 3561 * BSD still doesn't use the correction from RFC961.
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
3501 * urgent. To do this requires some care. We cannot just ignore 3600 * urgent. To do this requires some care. We cannot just ignore
3502 * tp->copied_seq since we would read the last urgent byte again 3601 * tp->copied_seq since we would read the last urgent byte again
3503 * as data, nor can we alter copied_seq until this data arrives 3602 * as data, nor can we alter copied_seq until this data arrives
3504 * or we break the sematics of SIOCATMARK (and thus sockatmark()) 3603 * or we break the semantics of SIOCATMARK (and thus sockatmark())
3505 * 3604 *
3506 * NOTE. Double Dutch. Rendering to plain English: author of comment 3605 * NOTE. Double Dutch. Rendering to plain English: author of comment
3507 * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); 3606 * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB);
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3646 tp->rx_opt.saw_tstamp = 0; 3745 tp->rx_opt.saw_tstamp = 0;
3647 3746
3648 /* pred_flags is 0xS?10 << 16 + snd_wnd 3747 /* pred_flags is 0xS?10 << 16 + snd_wnd
3649 * if header_predition is to be made 3748 * if header_prediction is to be made
3650 * 'S' will always be tp->tcp_header_len >> 2 3749 * 'S' will always be tp->tcp_header_len >> 2
3651 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to 3750 * '?' will be 0 for the fast path, otherwise pred_flags is 0 to
3652 * turn it off (when there are holes in the receive 3751 * turn it off (when there are holes in the receive
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4242 */ 4341 */
4243 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && 4342 if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
4244 !tp->srtt) 4343 !tp->srtt)
4245 tcp_ack_saw_tstamp(sk, NULL, 0); 4344 tcp_ack_saw_tstamp(sk, 0);
4246 4345
4247 if (tp->rx_opt.tstamp_ok) 4346 if (tp->rx_opt.tstamp_ok)
4248 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; 4347 tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -4372,6 +4471,7 @@ discard:
4372 4471
4373EXPORT_SYMBOL(sysctl_tcp_ecn); 4472EXPORT_SYMBOL(sysctl_tcp_ecn);
4374EXPORT_SYMBOL(sysctl_tcp_reordering); 4473EXPORT_SYMBOL(sysctl_tcp_reordering);
4474EXPORT_SYMBOL(sysctl_tcp_abc);
4375EXPORT_SYMBOL(tcp_parse_options); 4475EXPORT_SYMBOL(tcp_parse_options);
4376EXPORT_SYMBOL(tcp_rcv_established); 4476EXPORT_SYMBOL(tcp_rcv_established);
4377EXPORT_SYMBOL(tcp_rcv_state_process); 4477EXPORT_SYMBOL(tcp_rcv_state_process);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ac1fcf5b4ebc..4d5021e1929b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -39,7 +39,7 @@
39 * request_sock handling and moved 39 * request_sock handling and moved
40 * most of it into the af independent code. 40 * most of it into the af independent code.
41 * Added tail drop and some other bugfixes. 41 * Added tail drop and some other bugfixes.
42 * Added new listen sematics. 42 * Added new listen semantics.
43 * Mike McLagan : Routing by source 43 * Mike McLagan : Routing by source
44 * Juan Jose Ciarlante: ip_dynaddr bits 44 * Juan Jose Ciarlante: ip_dynaddr bits
45 * Andi Kleen: various fixes. 45 * Andi Kleen: various fixes.
@@ -1210,7 +1210,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
1210 1210
1211 /* An explanation is required here, I think. 1211 /* An explanation is required here, I think.
1212 * Packet length and doff are validated by header prediction, 1212 * Packet length and doff are validated by header prediction,
1213 * provided case of th->doff==0 is elimineted. 1213 * provided case of th->doff==0 is eliminated.
1214 * So, we defer the checks. */ 1214 * So, we defer the checks. */
1215 if ((skb->ip_summed != CHECKSUM_UNNECESSARY && 1215 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1216 tcp_v4_checksum_init(skb))) 1216 tcp_v4_checksum_init(skb)))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index b1a63b2c6b4a..1b66a2ac4321 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -158,7 +158,7 @@ kill_with_rst:
158 /* I am shamed, but failed to make it more elegant. 158 /* I am shamed, but failed to make it more elegant.
159 * Yes, it is direct reference to IP, which is impossible 159 * Yes, it is direct reference to IP, which is impossible
160 * to generalize to IPv6. Taking into account that IPv6 160 * to generalize to IPv6. Taking into account that IPv6
161 * do not undertsnad recycling in any case, it not 161 * do not understand recycling in any case, it not
162 * a big problem in practice. --ANK */ 162 * a big problem in practice. --ANK */
163 if (tw->tw_family == AF_INET && 163 if (tw->tw_family == AF_INET &&
164 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && 164 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
@@ -194,7 +194,7 @@ kill_with_rst:
194 /* In window segment, it may be only reset or bare ack. */ 194 /* In window segment, it may be only reset or bare ack. */
195 195
196 if (th->rst) { 196 if (th->rst) {
197 /* This is TIME_WAIT assasination, in two flavors. 197 /* This is TIME_WAIT assassination, in two flavors.
198 * Oh well... nobody has a sufficient solution to this 198 * Oh well... nobody has a sufficient solution to this
199 * protocol bug yet. 199 * protocol bug yet.
200 */ 200 */
@@ -380,6 +380,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
380 */ 380 */
381 newtp->snd_cwnd = 2; 381 newtp->snd_cwnd = 2;
382 newtp->snd_cwnd_cnt = 0; 382 newtp->snd_cwnd_cnt = 0;
383 newtp->bytes_acked = 0;
383 384
384 newtp->frto_counter = 0; 385 newtp->frto_counter = 0;
385 newtp->frto_highmark = 0; 386 newtp->frto_highmark = 0;
@@ -550,7 +551,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
550 551
551 /* RFC793 page 36: "If the connection is in any non-synchronized state ... 552 /* RFC793 page 36: "If the connection is in any non-synchronized state ...
552 * and the incoming segment acknowledges something not yet 553 * and the incoming segment acknowledges something not yet
553 * sent (the segment carries an unaccaptable ACK) ... 554 * sent (the segment carries an unacceptable ACK) ...
554 * a reset is sent." 555 * a reset is sent."
555 * 556 *
556 * Invalid ACK: reset will be sent by listening socket 557 * Invalid ACK: reset will be sent by listening socket
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b907456a79f4..029c70dfb585 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -436,6 +436,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss
436 u16 flags; 436 u16 flags;
437 437
438 BUG_ON(len > skb->len); 438 BUG_ON(len > skb->len);
439
440 clear_all_retrans_hints(tp);
439 nsize = skb_headlen(skb) - len; 441 nsize = skb_headlen(skb) - len;
440 if (nsize < 0) 442 if (nsize < 0)
441 nsize = 0; 443 nsize = 0;
@@ -599,7 +601,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
599 for TCP options, but includes only bare TCP header. 601 for TCP options, but includes only bare TCP header.
600 602
601 tp->rx_opt.mss_clamp is mss negotiated at connection setup. 603 tp->rx_opt.mss_clamp is mss negotiated at connection setup.
602 It is minumum of user_mss and mss received with SYN. 604 It is minimum of user_mss and mss received with SYN.
603 It also does not include TCP options. 605 It also does not include TCP options.
604 606
605 tp->pmtu_cookie is last pmtu, seen by this function. 607 tp->pmtu_cookie is last pmtu, seen by this function.
@@ -1171,7 +1173,7 @@ u32 __tcp_select_window(struct sock *sk)
1171{ 1173{
1172 struct inet_connection_sock *icsk = inet_csk(sk); 1174 struct inet_connection_sock *icsk = inet_csk(sk);
1173 struct tcp_sock *tp = tcp_sk(sk); 1175 struct tcp_sock *tp = tcp_sk(sk);
1174 /* MSS for the peer's data. Previous verions used mss_clamp 1176 /* MSS for the peer's data. Previous versions used mss_clamp
1175 * here. I don't know if the value based on our guesses 1177 * here. I don't know if the value based on our guesses
1176 * of peer's MSS is better for the performance. It's more correct 1178 * of peer's MSS is better for the performance. It's more correct
1177 * but may be worse for the performance because of rcv_mss 1179 * but may be worse for the performance because of rcv_mss
@@ -1260,7 +1262,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
1260 BUG_ON(tcp_skb_pcount(skb) != 1 || 1262 BUG_ON(tcp_skb_pcount(skb) != 1 ||
1261 tcp_skb_pcount(next_skb) != 1); 1263 tcp_skb_pcount(next_skb) != 1);
1262 1264
1263 /* Ok. We will be able to collapse the packet. */ 1265 /* changing transmit queue under us so clear hints */
1266 clear_all_retrans_hints(tp);
1267
1268 /* Ok. We will be able to collapse the packet. */
1264 __skb_unlink(next_skb, &sk->sk_write_queue); 1269 __skb_unlink(next_skb, &sk->sk_write_queue);
1265 1270
1266 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); 1271 memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size);
@@ -1330,6 +1335,8 @@ void tcp_simple_retransmit(struct sock *sk)
1330 } 1335 }
1331 } 1336 }
1332 1337
1338 clear_all_retrans_hints(tp);
1339
1333 if (!lost) 1340 if (!lost)
1334 return; 1341 return;
1335 1342
@@ -1361,7 +1368,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1361 int err; 1368 int err;
1362 1369
1363 /* Do not sent more than we queued. 1/4 is reserved for possible 1370 /* Do not sent more than we queued. 1/4 is reserved for possible
1364 * copying overhead: frgagmentation, tunneling, mangling etc. 1371 * copying overhead: fragmentation, tunneling, mangling etc.
1365 */ 1372 */
1366 if (atomic_read(&sk->sk_wmem_alloc) > 1373 if (atomic_read(&sk->sk_wmem_alloc) >
1367 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf)) 1374 min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
@@ -1468,13 +1475,25 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1468 const struct inet_connection_sock *icsk = inet_csk(sk); 1475 const struct inet_connection_sock *icsk = inet_csk(sk);
1469 struct tcp_sock *tp = tcp_sk(sk); 1476 struct tcp_sock *tp = tcp_sk(sk);
1470 struct sk_buff *skb; 1477 struct sk_buff *skb;
1471 int packet_cnt = tp->lost_out; 1478 int packet_cnt;
1479
1480 if (tp->retransmit_skb_hint) {
1481 skb = tp->retransmit_skb_hint;
1482 packet_cnt = tp->retransmit_cnt_hint;
1483 }else{
1484 skb = sk->sk_write_queue.next;
1485 packet_cnt = 0;
1486 }
1472 1487
1473 /* First pass: retransmit lost packets. */ 1488 /* First pass: retransmit lost packets. */
1474 if (packet_cnt) { 1489 if (tp->lost_out) {
1475 sk_stream_for_retrans_queue(skb, sk) { 1490 sk_stream_for_retrans_queue_from(skb, sk) {
1476 __u8 sacked = TCP_SKB_CB(skb)->sacked; 1491 __u8 sacked = TCP_SKB_CB(skb)->sacked;
1477 1492
1493 /* we could do better than to assign each time */
1494 tp->retransmit_skb_hint = skb;
1495 tp->retransmit_cnt_hint = packet_cnt;
1496
1478 /* Assume this retransmit will generate 1497 /* Assume this retransmit will generate
1479 * only one packet for congestion window 1498 * only one packet for congestion window
1480 * calculation purposes. This works because 1499 * calculation purposes. This works because
@@ -1485,10 +1504,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1485 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) 1504 if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
1486 return; 1505 return;
1487 1506
1488 if (sacked&TCPCB_LOST) { 1507 if (sacked & TCPCB_LOST) {
1489 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { 1508 if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) {
1490 if (tcp_retransmit_skb(sk, skb)) 1509 if (tcp_retransmit_skb(sk, skb)) {
1510 tp->retransmit_skb_hint = NULL;
1491 return; 1511 return;
1512 }
1492 if (icsk->icsk_ca_state != TCP_CA_Loss) 1513 if (icsk->icsk_ca_state != TCP_CA_Loss)
1493 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); 1514 NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS);
1494 else 1515 else
@@ -1501,8 +1522,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1501 TCP_RTO_MAX); 1522 TCP_RTO_MAX);
1502 } 1523 }
1503 1524
1504 packet_cnt -= tcp_skb_pcount(skb); 1525 packet_cnt += tcp_skb_pcount(skb);
1505 if (packet_cnt <= 0) 1526 if (packet_cnt >= tp->lost_out)
1506 break; 1527 break;
1507 } 1528 }
1508 } 1529 }
@@ -1528,9 +1549,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1528 if (tcp_may_send_now(sk, tp)) 1549 if (tcp_may_send_now(sk, tp))
1529 return; 1550 return;
1530 1551
1531 packet_cnt = 0; 1552 if (tp->forward_skb_hint) {
1553 skb = tp->forward_skb_hint;
1554 packet_cnt = tp->forward_cnt_hint;
1555 } else{
1556 skb = sk->sk_write_queue.next;
1557 packet_cnt = 0;
1558 }
1559
1560 sk_stream_for_retrans_queue_from(skb, sk) {
1561 tp->forward_cnt_hint = packet_cnt;
1562 tp->forward_skb_hint = skb;
1532 1563
1533 sk_stream_for_retrans_queue(skb, sk) {
1534 /* Similar to the retransmit loop above we 1564 /* Similar to the retransmit loop above we
1535 * can pretend that the retransmitted SKB 1565 * can pretend that the retransmitted SKB
1536 * we send out here will be composed of one 1566 * we send out here will be composed of one
@@ -1547,8 +1577,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
1547 continue; 1577 continue;
1548 1578
1549 /* Ok, retransmit it. */ 1579 /* Ok, retransmit it. */
1550 if (tcp_retransmit_skb(sk, skb)) 1580 if (tcp_retransmit_skb(sk, skb)) {
1581 tp->forward_skb_hint = NULL;
1551 break; 1582 break;
1583 }
1552 1584
1553 if (skb == skb_peek(&sk->sk_write_queue)) 1585 if (skb == skb_peek(&sk->sk_write_queue))
1554 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 1586 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
@@ -2058,3 +2090,4 @@ EXPORT_SYMBOL(tcp_connect);
2058EXPORT_SYMBOL(tcp_make_synack); 2090EXPORT_SYMBOL(tcp_make_synack);
2059EXPORT_SYMBOL(tcp_simple_retransmit); 2091EXPORT_SYMBOL(tcp_simple_retransmit);
2060EXPORT_SYMBOL(tcp_sync_mss); 2092EXPORT_SYMBOL(tcp_sync_mss);
2093EXPORT_SYMBOL(sysctl_tcp_tso_win_divisor);
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 327770bf5522..26d7486ee501 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -20,20 +20,20 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
20 u32 in_flight, int flag) 20 u32 in_flight, int flag)
21{ 21{
22 struct tcp_sock *tp = tcp_sk(sk); 22 struct tcp_sock *tp = tcp_sk(sk);
23 if (in_flight < tp->snd_cwnd) 23
24 if (!tcp_is_cwnd_limited(sk, in_flight))
24 return; 25 return;
25 26
26 if (tp->snd_cwnd <= tp->snd_ssthresh) { 27 if (tp->snd_cwnd <= tp->snd_ssthresh)
27 tp->snd_cwnd++; 28 tcp_slow_start(tp);
28 } else { 29 else {
29 tp->snd_cwnd_cnt++; 30 tp->snd_cwnd_cnt++;
30 if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ 31 if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
31 tp->snd_cwnd++; 32 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
33 tp->snd_cwnd++;
32 tp->snd_cwnd_cnt = 0; 34 tp->snd_cwnd_cnt = 0;
33 } 35 }
34 } 36 }
35 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
36 tp->snd_cwnd_stamp = tcp_time_stamp;
37} 37}
38 38
39static u32 tcp_scalable_ssthresh(struct sock *sk) 39static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 415ee47ac1c5..e1880959614a 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -58,7 +58,7 @@ static void tcp_write_err(struct sock *sk)
58 * to prevent DoS attacks. It is called when a retransmission timeout 58 * to prevent DoS attacks. It is called when a retransmission timeout
59 * or zero probe timeout occurs on orphaned socket. 59 * or zero probe timeout occurs on orphaned socket.
60 * 60 *
61 * Criterium is still not confirmed experimentally and may change. 61 * Criteria is still not confirmed experimentally and may change.
62 * We kill the socket, if: 62 * We kill the socket, if:
63 * 1. If number of orphaned sockets exceeds an administratively configured 63 * 1. If number of orphaned sockets exceeds an administratively configured
64 * limit. 64 * limit.
@@ -132,7 +132,7 @@ static int tcp_write_timeout(struct sock *sk)
132 hole detection. :-( 132 hole detection. :-(
133 133
134 It is place to make it. It is not made. I do not want 134 It is place to make it. It is not made. I do not want
135 to make it. It is disguisting. It does not work in any 135 to make it. It is disgusting. It does not work in any
136 case. Let me to cite the same draft, which requires for 136 case. Let me to cite the same draft, which requires for
137 us to implement this: 137 us to implement this:
138 138
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 93c5f92070f9..4376814d29fb 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -236,8 +236,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
236 /* We don't have enough RTT samples to do the Vegas 236 /* We don't have enough RTT samples to do the Vegas
237 * calculation, so we'll behave like Reno. 237 * calculation, so we'll behave like Reno.
238 */ 238 */
239 if (tp->snd_cwnd > tp->snd_ssthresh) 239 tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, cnt);
240 tp->snd_cwnd++;
241 } else { 240 } else {
242 u32 rtt, target_cwnd, diff; 241 u32 rtt, target_cwnd, diff;
243 242
@@ -275,7 +274,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
275 */ 274 */
276 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd; 275 diff = (old_wnd << V_PARAM_SHIFT) - target_cwnd;
277 276
278 if (tp->snd_cwnd < tp->snd_ssthresh) { 277 if (tp->snd_cwnd <= tp->snd_ssthresh) {
279 /* Slow start. */ 278 /* Slow start. */
280 if (diff > gamma) { 279 if (diff > gamma) {
281 /* Going too fast. Time to slow down 280 /* Going too fast. Time to slow down
@@ -295,6 +294,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
295 V_PARAM_SHIFT)+1); 294 V_PARAM_SHIFT)+1);
296 295
297 } 296 }
297 tcp_slow_start(tp);
298 } else { 298 } else {
299 /* Congestion avoidance. */ 299 /* Congestion avoidance. */
300 u32 next_snd_cwnd; 300 u32 next_snd_cwnd;
@@ -327,37 +327,17 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
327 else if (next_snd_cwnd < tp->snd_cwnd) 327 else if (next_snd_cwnd < tp->snd_cwnd)
328 tp->snd_cwnd--; 328 tp->snd_cwnd--;
329 } 329 }
330 }
331 330
332 /* Wipe the slate clean for the next RTT. */ 331 if (tp->snd_cwnd < 2)
333 vegas->cntRTT = 0; 332 tp->snd_cwnd = 2;
334 vegas->minRTT = 0x7fffffff; 333 else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
334 tp->snd_cwnd = tp->snd_cwnd_clamp;
335 }
335 } 336 }
336 337
337 /* The following code is executed for every ack we receive, 338 /* Wipe the slate clean for the next RTT. */
338 * except for conditions checked in should_advance_cwnd() 339 vegas->cntRTT = 0;
339 * before the call to tcp_cong_avoid(). Mainly this means that 340 vegas->minRTT = 0x7fffffff;
340 * we only execute this code if the ack actually acked some
341 * data.
342 */
343
344 /* If we are in slow start, increase our cwnd in response to this ACK.
345 * (If we are not in slow start then we are in congestion avoidance,
346 * and adjust our congestion window only once per RTT. See the code
347 * above.)
348 */
349 if (tp->snd_cwnd <= tp->snd_ssthresh)
350 tp->snd_cwnd++;
351
352 /* to keep cwnd from growing without bound */
353 tp->snd_cwnd = min_t(u32, tp->snd_cwnd, tp->snd_cwnd_clamp);
354
355 /* Make sure that we are never so timid as to reduce our cwnd below
356 * 2 MSS.
357 *
358 * Going below 2 MSS would risk huge delayed ACKs from our receiver.
359 */
360 tp->snd_cwnd = max(tp->snd_cwnd, 2U);
361} 341}
362 342
363/* Extract info for Tcp socket info provided via netlink. */ 343/* Extract info for Tcp socket info provided via netlink. */