diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 288 |
1 files changed, 194 insertions, 94 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e98b57578dc..40a26b7157b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -42,7 +42,7 @@ | |||
42 | * Andi Kleen : Moved open_request checking here | 42 | * Andi Kleen : Moved open_request checking here |
43 | * and process RSTs for open_requests. | 43 | * and process RSTs for open_requests. |
44 | * Andi Kleen : Better prune_queue, and other fixes. | 44 | * Andi Kleen : Better prune_queue, and other fixes. |
45 | * Andrey Savochkin: Fix RTT measurements in the presnce of | 45 | * Andrey Savochkin: Fix RTT measurements in the presence of |
46 | * timestamps. | 46 | * timestamps. |
47 | * Andrey Savochkin: Check sequence numbers correctly when | 47 | * Andrey Savochkin: Check sequence numbers correctly when |
48 | * removing SACKs due to in sequence incoming | 48 | * removing SACKs due to in sequence incoming |
@@ -89,6 +89,7 @@ int sysctl_tcp_frto; | |||
89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save; |
90 | 90 | ||
91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf = 1; |
92 | int sysctl_tcp_abc = 1; | ||
92 | 93 | ||
93 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
94 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
223 | * of receiver window. Check #2. | 224 | * of receiver window. Check #2. |
224 | * | 225 | * |
225 | * The scheme does not work when sender sends good segments opening | 226 | * The scheme does not work when sender sends good segments opening |
226 | * window and then starts to feed us spagetti. But it should work | 227 | * window and then starts to feed us spaghetti. But it should work |
227 | * in common situations. Otherwise, we have to rely on queue collapsing. | 228 | * in common situations. Otherwise, we have to rely on queue collapsing. |
228 | */ | 229 | */ |
229 | 230 | ||
@@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
233 | { | 234 | { |
234 | /* Optimize this! */ | 235 | /* Optimize this! */ |
235 | int truesize = tcp_win_from_space(skb->truesize)/2; | 236 | int truesize = tcp_win_from_space(skb->truesize)/2; |
236 | int window = tcp_full_space(sk)/2; | 237 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; |
237 | 238 | ||
238 | while (tp->rcv_ssthresh <= window) { | 239 | while (tp->rcv_ssthresh <= window) { |
239 | if (truesize <= skb->len) | 240 | if (truesize <= skb->len) |
@@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
277 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | 278 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); |
278 | 279 | ||
279 | /* Try to select rcvbuf so that 4 mss-sized segments | 280 | /* Try to select rcvbuf so that 4 mss-sized segments |
280 | * will fit to window and correspoding skbs will fit to our rcvbuf. | 281 | * will fit to window and corresponding skbs will fit to our rcvbuf. |
281 | * (was 3; 4 is minimum to allow fast retransmit to work.) | 282 | * (was 3; 4 is minimum to allow fast retransmit to work.) |
282 | */ | 283 | */ |
283 | while (tcp_win_from_space(rcvmem) < tp->advmss) | 284 | while (tcp_win_from_space(rcvmem) < tp->advmss) |
@@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
286 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); | 287 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); |
287 | } | 288 | } |
288 | 289 | ||
289 | /* 4. Try to fixup all. It is made iimediately after connection enters | 290 | /* 4. Try to fixup all. It is made immediately after connection enters |
290 | * established state. | 291 | * established state. |
291 | */ | 292 | */ |
292 | static void tcp_init_buffer_space(struct sock *sk) | 293 | static void tcp_init_buffer_space(struct sock *sk) |
@@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
326 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 327 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) |
327 | { | 328 | { |
328 | struct inet_connection_sock *icsk = inet_csk(sk); | 329 | struct inet_connection_sock *icsk = inet_csk(sk); |
329 | struct sk_buff *skb; | ||
330 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; | ||
331 | int ofo_win = 0; | ||
332 | 330 | ||
333 | icsk->icsk_ack.quick = 0; | 331 | icsk->icsk_ack.quick = 0; |
334 | 332 | ||
335 | skb_queue_walk(&tp->out_of_order_queue, skb) { | 333 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
336 | ofo_win += skb->len; | 334 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
337 | } | 335 | !tcp_memory_pressure && |
338 | 336 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | |
339 | /* If overcommit is due to out of order segments, | 337 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
340 | * do not clamp window. Try to expand rcvbuf instead. | 338 | sysctl_tcp_rmem[2]); |
341 | */ | ||
342 | if (ofo_win) { | ||
343 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | ||
344 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | ||
345 | !tcp_memory_pressure && | ||
346 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) | ||
347 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | ||
348 | sysctl_tcp_rmem[2]); | ||
349 | } | 339 | } |
350 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { | 340 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) |
351 | app_win += ofo_win; | ||
352 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) | ||
353 | app_win >>= 1; | ||
354 | if (app_win > icsk->icsk_ack.rcv_mss) | ||
355 | app_win -= icsk->icsk_ack.rcv_mss; | ||
356 | app_win = max(app_win, 2U*tp->advmss); | ||
357 | |||
358 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); |
359 | } | ||
360 | } | 342 | } |
361 | 343 | ||
362 | /* Receiver "autotuning" code. | 344 | /* Receiver "autotuning" code. |
@@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
385 | * are stalled on filesystem I/O. | 367 | * are stalled on filesystem I/O. |
386 | * | 368 | * |
387 | * Also, since we are only going for a minimum in the | 369 | * Also, since we are only going for a minimum in the |
388 | * non-timestamp case, we do not smoothe things out | 370 | * non-timestamp case, we do not smoother things out |
389 | * else with timestamps disabled convergance takes too | 371 | * else with timestamps disabled convergence takes too |
390 | * long. | 372 | * long. |
391 | */ | 373 | */ |
392 | if (!win_dep) { | 374 | if (!win_dep) { |
@@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
395 | } else if (m < new_sample) | 377 | } else if (m < new_sample) |
396 | new_sample = m << 3; | 378 | new_sample = m << 3; |
397 | } else { | 379 | } else { |
398 | /* No previous mesaure. */ | 380 | /* No previous measure. */ |
399 | new_sample = m << 3; | 381 | new_sample = m << 3; |
400 | } | 382 | } |
401 | 383 | ||
@@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
524 | if (icsk->icsk_ack.ato > icsk->icsk_rto) | 506 | if (icsk->icsk_ack.ato > icsk->icsk_rto) |
525 | icsk->icsk_ack.ato = icsk->icsk_rto; | 507 | icsk->icsk_ack.ato = icsk->icsk_rto; |
526 | } else if (m > icsk->icsk_rto) { | 508 | } else if (m > icsk->icsk_rto) { |
527 | /* Too long gap. Apparently sender falled to | 509 | /* Too long gap. Apparently sender failed to |
528 | * restart window, so that we send ACKs quickly. | 510 | * restart window, so that we send ACKs quickly. |
529 | */ | 511 | */ |
530 | tcp_incr_quickack(sk); | 512 | tcp_incr_quickack(sk); |
@@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
548 | * To save cycles in the RFC 1323 implementation it was better to break | 530 | * To save cycles in the RFC 1323 implementation it was better to break |
549 | * it up into three procedures. -- erics | 531 | * it up into three procedures. -- erics |
550 | */ | 532 | */ |
551 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | 533 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) |
552 | { | 534 | { |
553 | struct tcp_sock *tp = tcp_sk(sk); | 535 | struct tcp_sock *tp = tcp_sk(sk); |
554 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
555 | long m = mrtt; /* RTT */ | 536 | long m = mrtt; /* RTT */ |
556 | 537 | ||
557 | /* The following amusing code comes from Jacobson's | 538 | /* The following amusing code comes from Jacobson's |
@@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
565 | * | 546 | * |
566 | * Funny. This algorithm seems to be very broken. | 547 | * Funny. This algorithm seems to be very broken. |
567 | * These formulae increase RTO, when it should be decreased, increase | 548 | * These formulae increase RTO, when it should be decreased, increase |
568 | * too slowly, when it should be incresed fastly, decrease too fastly | 549 | * too slowly, when it should be increased fastly, decrease too fastly |
569 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely | 550 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely |
570 | * does not matter how to _calculate_ it. Seems, it was trap | 551 | * does not matter how to _calculate_ it. Seems, it was trap |
571 | * that VJ failed to avoid. 8) | 552 | * that VJ failed to avoid. 8) |
@@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
610 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); | 591 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); |
611 | tp->rtt_seq = tp->snd_nxt; | 592 | tp->rtt_seq = tp->snd_nxt; |
612 | } | 593 | } |
613 | |||
614 | if (icsk->icsk_ca_ops->rtt_sample) | ||
615 | icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); | ||
616 | } | 594 | } |
617 | 595 | ||
618 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 596 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
@@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk) | |||
629 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ | 607 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ |
630 | * to do with delayed acks, because at cwnd>2 true delack timeout | 608 | * to do with delayed acks, because at cwnd>2 true delack timeout |
631 | * is invisible. Actually, Linux-2.4 also generates erratic | 609 | * is invisible. Actually, Linux-2.4 also generates erratic |
632 | * ACKs in some curcumstances. | 610 | * ACKs in some circumstances. |
633 | */ | 611 | */ |
634 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 612 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; |
635 | 613 | ||
636 | /* 2. Fixups made earlier cannot be right. | 614 | /* 2. Fixups made earlier cannot be right. |
637 | * If we do not estimate RTO correctly without them, | 615 | * If we do not estimate RTO correctly without them, |
638 | * all the algo is pure shit and should be replaced | 616 | * all the algo is pure shit and should be replaced |
639 | * with correct one. It is exaclty, which we pretend to do. | 617 | * with correct one. It is exactly, which we pretend to do. |
640 | */ | 618 | */ |
641 | } | 619 | } |
642 | 620 | ||
@@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
794 | * to make it more realistic. | 772 | * to make it more realistic. |
795 | * | 773 | * |
796 | * A bit of theory. RTT is time passed after "normal" sized packet | 774 | * A bit of theory. RTT is time passed after "normal" sized packet |
797 | * is sent until it is ACKed. In normal curcumstances sending small | 775 | * is sent until it is ACKed. In normal circumstances sending small |
798 | * packets force peer to delay ACKs and calculation is correct too. | 776 | * packets force peer to delay ACKs and calculation is correct too. |
799 | * The algorithm is adaptive and, provided we follow specs, it | 777 | * The algorithm is adaptive and, provided we follow specs, it |
800 | * NEVER underestimate RTT. BUT! If peer tries to make some clever | 778 | * NEVER underestimate RTT. BUT! If peer tries to make some clever |
@@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
919 | int prior_fackets; | 897 | int prior_fackets; |
920 | u32 lost_retrans = 0; | 898 | u32 lost_retrans = 0; |
921 | int flag = 0; | 899 | int flag = 0; |
900 | int dup_sack = 0; | ||
922 | int i; | 901 | int i; |
923 | 902 | ||
924 | if (!tp->sacked_out) | 903 | if (!tp->sacked_out) |
925 | tp->fackets_out = 0; | 904 | tp->fackets_out = 0; |
926 | prior_fackets = tp->fackets_out; | 905 | prior_fackets = tp->fackets_out; |
927 | 906 | ||
928 | for (i=0; i<num_sacks; i++, sp++) { | 907 | /* SACK fastpath: |
929 | struct sk_buff *skb; | 908 | * if the only SACK change is the increase of the end_seq of |
930 | __u32 start_seq = ntohl(sp->start_seq); | 909 | * the first block then only apply that SACK block |
931 | __u32 end_seq = ntohl(sp->end_seq); | 910 | * and use retrans queue hinting otherwise slowpath */ |
932 | int fack_count = 0; | 911 | flag = 1; |
933 | int dup_sack = 0; | 912 | for (i = 0; i< num_sacks; i++) { |
913 | __u32 start_seq = ntohl(sp[i].start_seq); | ||
914 | __u32 end_seq = ntohl(sp[i].end_seq); | ||
915 | |||
916 | if (i == 0){ | ||
917 | if (tp->recv_sack_cache[i].start_seq != start_seq) | ||
918 | flag = 0; | ||
919 | } else { | ||
920 | if ((tp->recv_sack_cache[i].start_seq != start_seq) || | ||
921 | (tp->recv_sack_cache[i].end_seq != end_seq)) | ||
922 | flag = 0; | ||
923 | } | ||
924 | tp->recv_sack_cache[i].start_seq = start_seq; | ||
925 | tp->recv_sack_cache[i].end_seq = end_seq; | ||
934 | 926 | ||
935 | /* Check for D-SACK. */ | 927 | /* Check for D-SACK. */ |
936 | if (i == 0) { | 928 | if (i == 0) { |
@@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
962 | if (before(ack, prior_snd_una - tp->max_window)) | 954 | if (before(ack, prior_snd_una - tp->max_window)) |
963 | return 0; | 955 | return 0; |
964 | } | 956 | } |
957 | } | ||
958 | |||
959 | if (flag) | ||
960 | num_sacks = 1; | ||
961 | else { | ||
962 | int j; | ||
963 | tp->fastpath_skb_hint = NULL; | ||
964 | |||
965 | /* order SACK blocks to allow in order walk of the retrans queue */ | ||
966 | for (i = num_sacks-1; i > 0; i--) { | ||
967 | for (j = 0; j < i; j++){ | ||
968 | if (after(ntohl(sp[j].start_seq), | ||
969 | ntohl(sp[j+1].start_seq))){ | ||
970 | sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); | ||
971 | sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); | ||
972 | sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); | ||
973 | sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); | ||
974 | } | ||
975 | |||
976 | } | ||
977 | } | ||
978 | } | ||
979 | |||
980 | /* clear flag as used for different purpose in following code */ | ||
981 | flag = 0; | ||
982 | |||
983 | for (i=0; i<num_sacks; i++, sp++) { | ||
984 | struct sk_buff *skb; | ||
985 | __u32 start_seq = ntohl(sp->start_seq); | ||
986 | __u32 end_seq = ntohl(sp->end_seq); | ||
987 | int fack_count; | ||
988 | |||
989 | /* Use SACK fastpath hint if valid */ | ||
990 | if (tp->fastpath_skb_hint) { | ||
991 | skb = tp->fastpath_skb_hint; | ||
992 | fack_count = tp->fastpath_cnt_hint; | ||
993 | } else { | ||
994 | skb = sk->sk_write_queue.next; | ||
995 | fack_count = 0; | ||
996 | } | ||
965 | 997 | ||
966 | /* Event "B" in the comment above. */ | 998 | /* Event "B" in the comment above. */ |
967 | if (after(end_seq, tp->high_seq)) | 999 | if (after(end_seq, tp->high_seq)) |
968 | flag |= FLAG_DATA_LOST; | 1000 | flag |= FLAG_DATA_LOST; |
969 | 1001 | ||
970 | sk_stream_for_retrans_queue(skb, sk) { | 1002 | sk_stream_for_retrans_queue_from(skb, sk) { |
971 | int in_sack, pcount; | 1003 | int in_sack, pcount; |
972 | u8 sacked; | 1004 | u8 sacked; |
973 | 1005 | ||
1006 | tp->fastpath_skb_hint = skb; | ||
1007 | tp->fastpath_cnt_hint = fack_count; | ||
1008 | |||
974 | /* The retransmission queue is always in order, so | 1009 | /* The retransmission queue is always in order, so |
975 | * we can short-circuit the walk early. | 1010 | * we can short-circuit the walk early. |
976 | */ | 1011 | */ |
@@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1045 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1080 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
1046 | tp->lost_out -= tcp_skb_pcount(skb); | 1081 | tp->lost_out -= tcp_skb_pcount(skb); |
1047 | tp->retrans_out -= tcp_skb_pcount(skb); | 1082 | tp->retrans_out -= tcp_skb_pcount(skb); |
1083 | |||
1084 | /* clear lost hint */ | ||
1085 | tp->retransmit_skb_hint = NULL; | ||
1048 | } | 1086 | } |
1049 | } else { | 1087 | } else { |
1050 | /* New sack for not retransmitted frame, | 1088 | /* New sack for not retransmitted frame, |
@@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1057 | if (sacked & TCPCB_LOST) { | 1095 | if (sacked & TCPCB_LOST) { |
1058 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1096 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1059 | tp->lost_out -= tcp_skb_pcount(skb); | 1097 | tp->lost_out -= tcp_skb_pcount(skb); |
1098 | |||
1099 | /* clear lost hint */ | ||
1100 | tp->retransmit_skb_hint = NULL; | ||
1060 | } | 1101 | } |
1061 | } | 1102 | } |
1062 | 1103 | ||
@@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1080 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { | 1121 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { |
1081 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1122 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1082 | tp->retrans_out -= tcp_skb_pcount(skb); | 1123 | tp->retrans_out -= tcp_skb_pcount(skb); |
1124 | tp->retransmit_skb_hint = NULL; | ||
1083 | } | 1125 | } |
1084 | } | 1126 | } |
1085 | } | 1127 | } |
@@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
1107 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1149 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1108 | tp->retrans_out -= tcp_skb_pcount(skb); | 1150 | tp->retrans_out -= tcp_skb_pcount(skb); |
1109 | 1151 | ||
1152 | /* clear lost hint */ | ||
1153 | tp->retransmit_skb_hint = NULL; | ||
1154 | |||
1110 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { | 1155 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { |
1111 | tp->lost_out += tcp_skb_pcount(skb); | 1156 | tp->lost_out += tcp_skb_pcount(skb); |
1112 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1157 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
@@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
1214 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1259 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1215 | tp->high_seq = tp->frto_highmark; | 1260 | tp->high_seq = tp->frto_highmark; |
1216 | TCP_ECN_queue_cwr(tp); | 1261 | TCP_ECN_queue_cwr(tp); |
1262 | |||
1263 | clear_all_retrans_hints(tp); | ||
1217 | } | 1264 | } |
1218 | 1265 | ||
1219 | void tcp_clear_retrans(struct tcp_sock *tp) | 1266 | void tcp_clear_retrans(struct tcp_sock *tp) |
@@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1251 | tp->snd_cwnd_cnt = 0; | 1298 | tp->snd_cwnd_cnt = 0; |
1252 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1299 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1253 | 1300 | ||
1301 | tp->bytes_acked = 0; | ||
1254 | tcp_clear_retrans(tp); | 1302 | tcp_clear_retrans(tp); |
1255 | 1303 | ||
1256 | /* Push undo marker, if it was plain RTO and nothing | 1304 | /* Push undo marker, if it was plain RTO and nothing |
@@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
1279 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1327 | tcp_set_ca_state(sk, TCP_CA_Loss); |
1280 | tp->high_seq = tp->snd_nxt; | 1328 | tp->high_seq = tp->snd_nxt; |
1281 | TCP_ECN_queue_cwr(tp); | 1329 | TCP_ECN_queue_cwr(tp); |
1330 | |||
1331 | clear_all_retrans_hints(tp); | ||
1282 | } | 1332 | } |
1283 | 1333 | ||
1284 | static int tcp_check_sack_reneging(struct sock *sk) | 1334 | static int tcp_check_sack_reneging(struct sock *sk) |
@@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
1503 | int packets, u32 high_seq) | 1553 | int packets, u32 high_seq) |
1504 | { | 1554 | { |
1505 | struct sk_buff *skb; | 1555 | struct sk_buff *skb; |
1506 | int cnt = packets; | 1556 | int cnt; |
1507 | 1557 | ||
1508 | BUG_TRAP(cnt <= tp->packets_out); | 1558 | BUG_TRAP(packets <= tp->packets_out); |
1559 | if (tp->lost_skb_hint) { | ||
1560 | skb = tp->lost_skb_hint; | ||
1561 | cnt = tp->lost_cnt_hint; | ||
1562 | } else { | ||
1563 | skb = sk->sk_write_queue.next; | ||
1564 | cnt = 0; | ||
1565 | } | ||
1509 | 1566 | ||
1510 | sk_stream_for_retrans_queue(skb, sk) { | 1567 | sk_stream_for_retrans_queue_from(skb, sk) { |
1511 | cnt -= tcp_skb_pcount(skb); | 1568 | /* TODO: do this better */ |
1512 | if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | 1569 | /* this is not the most efficient way to do this... */ |
1570 | tp->lost_skb_hint = skb; | ||
1571 | tp->lost_cnt_hint = cnt; | ||
1572 | cnt += tcp_skb_pcount(skb); | ||
1573 | if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | ||
1513 | break; | 1574 | break; |
1514 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1575 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
1515 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1576 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1516 | tp->lost_out += tcp_skb_pcount(skb); | 1577 | tp->lost_out += tcp_skb_pcount(skb); |
1578 | |||
1579 | /* clear xmit_retransmit_queue hints | ||
1580 | * if this is beyond hint */ | ||
1581 | if(tp->retransmit_skb_hint != NULL && | ||
1582 | before(TCP_SKB_CB(skb)->seq, | ||
1583 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | ||
1584 | |||
1585 | tp->retransmit_skb_hint = NULL; | ||
1586 | } | ||
1517 | } | 1587 | } |
1518 | } | 1588 | } |
1519 | tcp_sync_left_out(tp); | 1589 | tcp_sync_left_out(tp); |
@@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
1540 | if (tcp_head_timedout(sk, tp)) { | 1610 | if (tcp_head_timedout(sk, tp)) { |
1541 | struct sk_buff *skb; | 1611 | struct sk_buff *skb; |
1542 | 1612 | ||
1543 | sk_stream_for_retrans_queue(skb, sk) { | 1613 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
1544 | if (tcp_skb_timedout(sk, skb) && | 1614 | : sk->sk_write_queue.next; |
1545 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1615 | |
1616 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
1617 | if (!tcp_skb_timedout(sk, skb)) | ||
1618 | break; | ||
1619 | |||
1620 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | ||
1546 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1621 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
1547 | tp->lost_out += tcp_skb_pcount(skb); | 1622 | tp->lost_out += tcp_skb_pcount(skb); |
1623 | |||
1624 | /* clear xmit_retrans hint */ | ||
1625 | if (tp->retransmit_skb_hint && | ||
1626 | before(TCP_SKB_CB(skb)->seq, | ||
1627 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
1628 | |||
1629 | tp->retransmit_skb_hint = NULL; | ||
1548 | } | 1630 | } |
1549 | } | 1631 | } |
1632 | |||
1633 | tp->scoreboard_skb_hint = skb; | ||
1634 | |||
1550 | tcp_sync_left_out(tp); | 1635 | tcp_sync_left_out(tp); |
1551 | } | 1636 | } |
1552 | } | 1637 | } |
@@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
1626 | } | 1711 | } |
1627 | tcp_moderate_cwnd(tp); | 1712 | tcp_moderate_cwnd(tp); |
1628 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1713 | tp->snd_cwnd_stamp = tcp_time_stamp; |
1714 | |||
1715 | /* There is something screwy going on with the retrans hints after | ||
1716 | an undo */ | ||
1717 | clear_all_retrans_hints(tp); | ||
1629 | } | 1718 | } |
1630 | 1719 | ||
1631 | static inline int tcp_may_undo(struct tcp_sock *tp) | 1720 | static inline int tcp_may_undo(struct tcp_sock *tp) |
@@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
1709 | sk_stream_for_retrans_queue(skb, sk) { | 1798 | sk_stream_for_retrans_queue(skb, sk) { |
1710 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1799 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
1711 | } | 1800 | } |
1801 | |||
1802 | clear_all_retrans_hints(tp); | ||
1803 | |||
1712 | DBGUNDO(sk, tp, "partial loss"); | 1804 | DBGUNDO(sk, tp, "partial loss"); |
1713 | tp->lost_out = 0; | 1805 | tp->lost_out = 0; |
1714 | tp->left_out = tp->sacked_out; | 1806 | tp->left_out = tp->sacked_out; |
@@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1908 | TCP_ECN_queue_cwr(tp); | 2000 | TCP_ECN_queue_cwr(tp); |
1909 | } | 2001 | } |
1910 | 2002 | ||
2003 | tp->bytes_acked = 0; | ||
1911 | tp->snd_cwnd_cnt = 0; | 2004 | tp->snd_cwnd_cnt = 0; |
1912 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2005 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
1913 | } | 2006 | } |
@@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
1919 | } | 2012 | } |
1920 | 2013 | ||
1921 | /* Read draft-ietf-tcplw-high-performance before mucking | 2014 | /* Read draft-ietf-tcplw-high-performance before mucking |
1922 | * with this code. (Superceeds RFC1323) | 2015 | * with this code. (Supersedes RFC1323) |
1923 | */ | 2016 | */ |
1924 | static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | 2017 | static void tcp_ack_saw_tstamp(struct sock *sk, int flag) |
1925 | { | 2018 | { |
1926 | /* RTTM Rule: A TSecr value received in a segment is used to | 2019 | /* RTTM Rule: A TSecr value received in a segment is used to |
1927 | * update the averaged RTT measurement only if the segment | 2020 | * update the averaged RTT measurement only if the segment |
@@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
1932 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> | 2025 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> |
1933 | * | 2026 | * |
1934 | * Changed: reset backoff as soon as we see the first valid sample. | 2027 | * Changed: reset backoff as soon as we see the first valid sample. |
1935 | * If we do not, we get strongly overstimated rto. With timestamps | 2028 | * If we do not, we get strongly overestimated rto. With timestamps |
1936 | * samples are accepted even from very old segments: f.e., when rtt=1 | 2029 | * samples are accepted even from very old segments: f.e., when rtt=1 |
1937 | * increases to 8, we retransmit 5 times and after 8 seconds delayed | 2030 | * increases to 8, we retransmit 5 times and after 8 seconds delayed |
1938 | * answer arrives rto becomes 120 seconds! If at least one of segments | 2031 | * answer arrives rto becomes 120 seconds! If at least one of segments |
@@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
1940 | */ | 2033 | */ |
1941 | struct tcp_sock *tp = tcp_sk(sk); | 2034 | struct tcp_sock *tp = tcp_sk(sk); |
1942 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 2035 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; |
1943 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2036 | tcp_rtt_estimator(sk, seq_rtt); |
1944 | tcp_set_rto(sk); | 2037 | tcp_set_rto(sk); |
1945 | inet_csk(sk)->icsk_backoff = 0; | 2038 | inet_csk(sk)->icsk_backoff = 0; |
1946 | tcp_bound_rto(sk); | 2039 | tcp_bound_rto(sk); |
1947 | } | 2040 | } |
1948 | 2041 | ||
1949 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) | 2042 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) |
1950 | { | 2043 | { |
1951 | /* We don't have a timestamp. Can only use | 2044 | /* We don't have a timestamp. Can only use |
1952 | * packets that are not retransmitted to determine | 2045 | * packets that are not retransmitted to determine |
@@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag | |||
1960 | if (flag & FLAG_RETRANS_DATA_ACKED) | 2053 | if (flag & FLAG_RETRANS_DATA_ACKED) |
1961 | return; | 2054 | return; |
1962 | 2055 | ||
1963 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2056 | tcp_rtt_estimator(sk, seq_rtt); |
1964 | tcp_set_rto(sk); | 2057 | tcp_set_rto(sk); |
1965 | inet_csk(sk)->icsk_backoff = 0; | 2058 | inet_csk(sk)->icsk_backoff = 0; |
1966 | tcp_bound_rto(sk); | 2059 | tcp_bound_rto(sk); |
1967 | } | 2060 | } |
1968 | 2061 | ||
1969 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 2062 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |
1970 | const s32 seq_rtt, u32 *usrtt) | 2063 | const s32 seq_rtt) |
1971 | { | 2064 | { |
1972 | const struct tcp_sock *tp = tcp_sk(sk); | 2065 | const struct tcp_sock *tp = tcp_sk(sk); |
1973 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ | 2066 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ |
1974 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 2067 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) |
1975 | tcp_ack_saw_tstamp(sk, usrtt, flag); | 2068 | tcp_ack_saw_tstamp(sk, flag); |
1976 | else if (seq_rtt >= 0) | 2069 | else if (seq_rtt >= 0) |
1977 | tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); | 2070 | tcp_ack_no_tstamp(sk, seq_rtt, flag); |
1978 | } | 2071 | } |
1979 | 2072 | ||
1980 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | 2073 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
@@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
2054 | return acked; | 2147 | return acked; |
2055 | } | 2148 | } |
2056 | 2149 | ||
2150 | static inline u32 tcp_usrtt(const struct sk_buff *skb) | ||
2151 | { | ||
2152 | struct timeval tv, now; | ||
2153 | |||
2154 | do_gettimeofday(&now); | ||
2155 | skb_get_timestamp(skb, &tv); | ||
2156 | return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); | ||
2157 | } | ||
2057 | 2158 | ||
2058 | /* Remove acknowledged frames from the retransmission queue. */ | 2159 | /* Remove acknowledged frames from the retransmission queue. */ |
2059 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt) | 2160 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) |
2060 | { | 2161 | { |
2061 | struct tcp_sock *tp = tcp_sk(sk); | 2162 | struct tcp_sock *tp = tcp_sk(sk); |
2163 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
2062 | struct sk_buff *skb; | 2164 | struct sk_buff *skb; |
2063 | __u32 now = tcp_time_stamp; | 2165 | __u32 now = tcp_time_stamp; |
2064 | int acked = 0; | 2166 | int acked = 0; |
2065 | __s32 seq_rtt = -1; | 2167 | __s32 seq_rtt = -1; |
2066 | struct timeval usnow; | ||
2067 | u32 pkts_acked = 0; | 2168 | u32 pkts_acked = 0; |
2068 | 2169 | void (*rtt_sample)(struct sock *sk, u32 usrtt) | |
2069 | if (seq_usrtt) | 2170 | = icsk->icsk_ca_ops->rtt_sample; |
2070 | do_gettimeofday(&usnow); | ||
2071 | 2171 | ||
2072 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2172 | while ((skb = skb_peek(&sk->sk_write_queue)) && |
2073 | skb != sk->sk_send_head) { | 2173 | skb != sk->sk_send_head) { |
@@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2107 | tp->retrans_out -= tcp_skb_pcount(skb); | 2207 | tp->retrans_out -= tcp_skb_pcount(skb); |
2108 | acked |= FLAG_RETRANS_DATA_ACKED; | 2208 | acked |= FLAG_RETRANS_DATA_ACKED; |
2109 | seq_rtt = -1; | 2209 | seq_rtt = -1; |
2110 | } else if (seq_rtt < 0) | 2210 | } else if (seq_rtt < 0) { |
2111 | seq_rtt = now - scb->when; | 2211 | seq_rtt = now - scb->when; |
2112 | if (seq_usrtt) { | 2212 | if (rtt_sample) |
2113 | struct timeval tv; | 2213 | (*rtt_sample)(sk, tcp_usrtt(skb)); |
2114 | |||
2115 | skb_get_timestamp(skb, &tv); | ||
2116 | *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 | ||
2117 | + (usnow.tv_usec - tv.tv_usec); | ||
2118 | } | 2214 | } |
2119 | |||
2120 | if (sacked & TCPCB_SACKED_ACKED) | 2215 | if (sacked & TCPCB_SACKED_ACKED) |
2121 | tp->sacked_out -= tcp_skb_pcount(skb); | 2216 | tp->sacked_out -= tcp_skb_pcount(skb); |
2122 | if (sacked & TCPCB_LOST) | 2217 | if (sacked & TCPCB_LOST) |
@@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
2126 | !before(scb->end_seq, tp->snd_up)) | 2221 | !before(scb->end_seq, tp->snd_up)) |
2127 | tp->urg_mode = 0; | 2222 | tp->urg_mode = 0; |
2128 | } | 2223 | } |
2129 | } else if (seq_rtt < 0) | 2224 | } else if (seq_rtt < 0) { |
2130 | seq_rtt = now - scb->when; | 2225 | seq_rtt = now - scb->when; |
2226 | if (rtt_sample) | ||
2227 | (*rtt_sample)(sk, tcp_usrtt(skb)); | ||
2228 | } | ||
2131 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2229 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
2132 | tcp_packets_out_dec(tp, skb); | 2230 | tcp_packets_out_dec(tp, skb); |
2133 | __skb_unlink(skb, &sk->sk_write_queue); | 2231 | __skb_unlink(skb, &sk->sk_write_queue); |
2134 | sk_stream_free_skb(sk, skb); | 2232 | sk_stream_free_skb(sk, skb); |
2233 | clear_all_retrans_hints(tp); | ||
2135 | } | 2234 | } |
2136 | 2235 | ||
2137 | if (acked&FLAG_ACKED) { | 2236 | if (acked&FLAG_ACKED) { |
2138 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2237 | tcp_ack_update_rtt(sk, acked, seq_rtt); |
2139 | tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); | ||
2140 | tcp_ack_packets_out(sk, tp); | 2238 | tcp_ack_packets_out(sk, tp); |
2141 | 2239 | ||
2142 | if (icsk->icsk_ca_ops->pkts_acked) | 2240 | if (icsk->icsk_ca_ops->pkts_acked) |
@@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | |||
2284 | } | 2382 | } |
2285 | 2383 | ||
2286 | /* F-RTO affects on two new ACKs following RTO. | 2384 | /* F-RTO affects on two new ACKs following RTO. |
2287 | * At latest on third ACK the TCP behavor is back to normal. | 2385 | * At latest on third ACK the TCP behavior is back to normal. |
2288 | */ | 2386 | */ |
2289 | tp->frto_counter = (tp->frto_counter + 1) % 3; | 2387 | tp->frto_counter = (tp->frto_counter + 1) % 3; |
2290 | } | 2388 | } |
@@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2299 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 2397 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
2300 | u32 prior_in_flight; | 2398 | u32 prior_in_flight; |
2301 | s32 seq_rtt; | 2399 | s32 seq_rtt; |
2302 | s32 seq_usrtt = 0; | ||
2303 | int prior_packets; | 2400 | int prior_packets; |
2304 | 2401 | ||
2305 | /* If the ack is newer than sent or older than previous acks | 2402 | /* If the ack is newer than sent or older than previous acks |
@@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2311 | if (before(ack, prior_snd_una)) | 2408 | if (before(ack, prior_snd_una)) |
2312 | goto old_ack; | 2409 | goto old_ack; |
2313 | 2410 | ||
2411 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | ||
2412 | tp->bytes_acked += ack - prior_snd_una; | ||
2413 | |||
2314 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2414 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
2315 | /* Window is constant, pure forward advance. | 2415 | /* Window is constant, pure forward advance. |
2316 | * No more checks are required. | 2416 | * No more checks are required. |
@@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
2352 | prior_in_flight = tcp_packets_in_flight(tp); | 2452 | prior_in_flight = tcp_packets_in_flight(tp); |
2353 | 2453 | ||
2354 | /* See if we can take anything off of the retransmit queue. */ | 2454 | /* See if we can take anything off of the retransmit queue. */ |
2355 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt, | 2455 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt); |
2356 | icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); | ||
2357 | 2456 | ||
2358 | if (tp->frto_counter) | 2457 | if (tp->frto_counter) |
2359 | tcp_process_frto(sk, prior_snd_una); | 2458 | tcp_process_frto(sk, prior_snd_una); |
2360 | 2459 | ||
2361 | if (tcp_ack_is_dubious(sk, flag)) { | 2460 | if (tcp_ack_is_dubious(sk, flag)) { |
2362 | /* Advanve CWND, if state allows this. */ | 2461 | /* Advance CWND, if state allows this. */ |
2363 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) | 2462 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) |
2364 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); | 2463 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); |
2365 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); | 2464 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); |
@@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
3148 | { | 3247 | { |
3149 | struct sk_buff *skb; | 3248 | struct sk_buff *skb; |
3150 | 3249 | ||
3151 | /* First, check that queue is collapsable and find | 3250 | /* First, check that queue is collapsible and find |
3152 | * the point where collapsing can be useful. */ | 3251 | * the point where collapsing can be useful. */ |
3153 | for (skb = head; skb != tail; ) { | 3252 | for (skb = head; skb != tail; ) { |
3154 | /* No new bits? It is possible on ofo queue. */ | 3253 | /* No new bits? It is possible on ofo queue. */ |
@@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk) | |||
3456 | 3555 | ||
3457 | /* | 3556 | /* |
3458 | * This routine is only called when we have urgent data | 3557 | * This routine is only called when we have urgent data |
3459 | * signalled. Its the 'slow' part of tcp_urg. It could be | 3558 | * signaled. Its the 'slow' part of tcp_urg. It could be |
3460 | * moved inline now as tcp_urg is only called from one | 3559 | * moved inline now as tcp_urg is only called from one |
3461 | * place. We handle URGent data wrong. We have to - as | 3560 | * place. We handle URGent data wrong. We have to - as |
3462 | * BSD still doesn't use the correction from RFC961. | 3561 | * BSD still doesn't use the correction from RFC961. |
@@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) | |||
3501 | * urgent. To do this requires some care. We cannot just ignore | 3600 | * urgent. To do this requires some care. We cannot just ignore |
3502 | * tp->copied_seq since we would read the last urgent byte again | 3601 | * tp->copied_seq since we would read the last urgent byte again |
3503 | * as data, nor can we alter copied_seq until this data arrives | 3602 | * as data, nor can we alter copied_seq until this data arrives |
3504 | * or we break the sematics of SIOCATMARK (and thus sockatmark()) | 3603 | * or we break the semantics of SIOCATMARK (and thus sockatmark()) |
3505 | * | 3604 | * |
3506 | * NOTE. Double Dutch. Rendering to plain English: author of comment | 3605 | * NOTE. Double Dutch. Rendering to plain English: author of comment |
3507 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); | 3606 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); |
@@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
3646 | tp->rx_opt.saw_tstamp = 0; | 3745 | tp->rx_opt.saw_tstamp = 0; |
3647 | 3746 | ||
3648 | /* pred_flags is 0xS?10 << 16 + snd_wnd | 3747 | /* pred_flags is 0xS?10 << 16 + snd_wnd |
3649 | * if header_predition is to be made | 3748 | * if header_prediction is to be made |
3650 | * 'S' will always be tp->tcp_header_len >> 2 | 3749 | * 'S' will always be tp->tcp_header_len >> 2 |
3651 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to | 3750 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to |
3652 | * turn it off (when there are holes in the receive | 3751 | * turn it off (when there are holes in the receive |
@@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
4242 | */ | 4341 | */ |
4243 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 4342 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
4244 | !tp->srtt) | 4343 | !tp->srtt) |
4245 | tcp_ack_saw_tstamp(sk, NULL, 0); | 4344 | tcp_ack_saw_tstamp(sk, 0); |
4246 | 4345 | ||
4247 | if (tp->rx_opt.tstamp_ok) | 4346 | if (tp->rx_opt.tstamp_ok) |
4248 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 4347 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
@@ -4372,6 +4471,7 @@ discard: | |||
4372 | 4471 | ||
4373 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 4472 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
4374 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 4473 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
4474 | EXPORT_SYMBOL(sysctl_tcp_abc); | ||
4375 | EXPORT_SYMBOL(tcp_parse_options); | 4475 | EXPORT_SYMBOL(tcp_parse_options); |
4376 | EXPORT_SYMBOL(tcp_rcv_established); | 4476 | EXPORT_SYMBOL(tcp_rcv_established); |
4377 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4477 | EXPORT_SYMBOL(tcp_rcv_state_process); |