diff options
| author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-11 00:24:21 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-11 00:24:21 -0500 |
| commit | 79ffeeb9e66da8c60de8c8ab676658bcbc47c1f7 (patch) | |
| tree | f3c3841e1d4b5c7cd3695fe34ff23e0be08d7dac /net/ipv4/tcp_input.c | |
| parent | a5aac37f1cdbbd1e587fc618e778ddae124e5ac3 (diff) | |
| parent | 6a438bbe68c7013a42d9c5aee5a40d7dafdbe6ec (diff) | |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
Diffstat (limited to 'net/ipv4/tcp_input.c')
| -rw-r--r-- | net/ipv4/tcp_input.c | 288 |
1 files changed, 194 insertions, 94 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3e98b57578dc..40a26b7157b4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
| @@ -42,7 +42,7 @@ | |||
| 42 | * Andi Kleen : Moved open_request checking here | 42 | * Andi Kleen : Moved open_request checking here |
| 43 | * and process RSTs for open_requests. | 43 | * and process RSTs for open_requests. |
| 44 | * Andi Kleen : Better prune_queue, and other fixes. | 44 | * Andi Kleen : Better prune_queue, and other fixes. |
| 45 | * Andrey Savochkin: Fix RTT measurements in the presnce of | 45 | * Andrey Savochkin: Fix RTT measurements in the presence of |
| 46 | * timestamps. | 46 | * timestamps. |
| 47 | * Andrey Savochkin: Check sequence numbers correctly when | 47 | * Andrey Savochkin: Check sequence numbers correctly when |
| 48 | * removing SACKs due to in sequence incoming | 48 | * removing SACKs due to in sequence incoming |
| @@ -89,6 +89,7 @@ int sysctl_tcp_frto; | |||
| 89 | int sysctl_tcp_nometrics_save; | 89 | int sysctl_tcp_nometrics_save; |
| 90 | 90 | ||
| 91 | int sysctl_tcp_moderate_rcvbuf = 1; | 91 | int sysctl_tcp_moderate_rcvbuf = 1; |
| 92 | int sysctl_tcp_abc = 1; | ||
| 92 | 93 | ||
| 93 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 94 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
| 94 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 95 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
| @@ -223,7 +224,7 @@ static void tcp_fixup_sndbuf(struct sock *sk) | |||
| 223 | * of receiver window. Check #2. | 224 | * of receiver window. Check #2. |
| 224 | * | 225 | * |
| 225 | * The scheme does not work when sender sends good segments opening | 226 | * The scheme does not work when sender sends good segments opening |
| 226 | * window and then starts to feed us spagetti. But it should work | 227 | * window and then starts to feed us spaghetti. But it should work |
| 227 | * in common situations. Otherwise, we have to rely on queue collapsing. | 228 | * in common situations. Otherwise, we have to rely on queue collapsing. |
| 228 | */ | 229 | */ |
| 229 | 230 | ||
| @@ -233,7 +234,7 @@ static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, | |||
| 233 | { | 234 | { |
| 234 | /* Optimize this! */ | 235 | /* Optimize this! */ |
| 235 | int truesize = tcp_win_from_space(skb->truesize)/2; | 236 | int truesize = tcp_win_from_space(skb->truesize)/2; |
| 236 | int window = tcp_full_space(sk)/2; | 237 | int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2; |
| 237 | 238 | ||
| 238 | while (tp->rcv_ssthresh <= window) { | 239 | while (tp->rcv_ssthresh <= window) { |
| 239 | if (truesize <= skb->len) | 240 | if (truesize <= skb->len) |
| @@ -277,7 +278,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
| 277 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); | 278 | int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); |
| 278 | 279 | ||
| 279 | /* Try to select rcvbuf so that 4 mss-sized segments | 280 | /* Try to select rcvbuf so that 4 mss-sized segments |
| 280 | * will fit to window and correspoding skbs will fit to our rcvbuf. | 281 | * will fit to window and corresponding skbs will fit to our rcvbuf. |
| 281 | * (was 3; 4 is minimum to allow fast retransmit to work.) | 282 | * (was 3; 4 is minimum to allow fast retransmit to work.) |
| 282 | */ | 283 | */ |
| 283 | while (tcp_win_from_space(rcvmem) < tp->advmss) | 284 | while (tcp_win_from_space(rcvmem) < tp->advmss) |
| @@ -286,7 +287,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
| 286 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); | 287 | sk->sk_rcvbuf = min(4 * rcvmem, sysctl_tcp_rmem[2]); |
| 287 | } | 288 | } |
| 288 | 289 | ||
| 289 | /* 4. Try to fixup all. It is made iimediately after connection enters | 290 | /* 4. Try to fixup all. It is made immediately after connection enters |
| 290 | * established state. | 291 | * established state. |
| 291 | */ | 292 | */ |
| 292 | static void tcp_init_buffer_space(struct sock *sk) | 293 | static void tcp_init_buffer_space(struct sock *sk) |
| @@ -326,37 +327,18 @@ static void tcp_init_buffer_space(struct sock *sk) | |||
| 326 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) | 327 | static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) |
| 327 | { | 328 | { |
| 328 | struct inet_connection_sock *icsk = inet_csk(sk); | 329 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 329 | struct sk_buff *skb; | ||
| 330 | unsigned int app_win = tp->rcv_nxt - tp->copied_seq; | ||
| 331 | int ofo_win = 0; | ||
| 332 | 330 | ||
| 333 | icsk->icsk_ack.quick = 0; | 331 | icsk->icsk_ack.quick = 0; |
| 334 | 332 | ||
| 335 | skb_queue_walk(&tp->out_of_order_queue, skb) { | 333 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && |
| 336 | ofo_win += skb->len; | 334 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && |
| 337 | } | 335 | !tcp_memory_pressure && |
| 338 | 336 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { | |
| 339 | /* If overcommit is due to out of order segments, | 337 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), |
| 340 | * do not clamp window. Try to expand rcvbuf instead. | 338 | sysctl_tcp_rmem[2]); |
| 341 | */ | ||
| 342 | if (ofo_win) { | ||
| 343 | if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && | ||
| 344 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && | ||
| 345 | !tcp_memory_pressure && | ||
| 346 | atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) | ||
| 347 | sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), | ||
| 348 | sysctl_tcp_rmem[2]); | ||
| 349 | } | 339 | } |
| 350 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { | 340 | if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) |
| 351 | app_win += ofo_win; | ||
| 352 | if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) | ||
| 353 | app_win >>= 1; | ||
| 354 | if (app_win > icsk->icsk_ack.rcv_mss) | ||
| 355 | app_win -= icsk->icsk_ack.rcv_mss; | ||
| 356 | app_win = max(app_win, 2U*tp->advmss); | ||
| 357 | |||
| 358 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); | 341 | tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss); |
| 359 | } | ||
| 360 | } | 342 | } |
| 361 | 343 | ||
| 362 | /* Receiver "autotuning" code. | 344 | /* Receiver "autotuning" code. |
| @@ -385,8 +367,8 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
| 385 | * are stalled on filesystem I/O. | 367 | * are stalled on filesystem I/O. |
| 386 | * | 368 | * |
| 387 | * Also, since we are only going for a minimum in the | 369 | * Also, since we are only going for a minimum in the |
| 388 | * non-timestamp case, we do not smoothe things out | 370 | * non-timestamp case, we do not smoother things out |
| 389 | * else with timestamps disabled convergance takes too | 371 | * else with timestamps disabled convergence takes too |
| 390 | * long. | 372 | * long. |
| 391 | */ | 373 | */ |
| 392 | if (!win_dep) { | 374 | if (!win_dep) { |
| @@ -395,7 +377,7 @@ static void tcp_rcv_rtt_update(struct tcp_sock *tp, u32 sample, int win_dep) | |||
| 395 | } else if (m < new_sample) | 377 | } else if (m < new_sample) |
| 396 | new_sample = m << 3; | 378 | new_sample = m << 3; |
| 397 | } else { | 379 | } else { |
| 398 | /* No previous mesaure. */ | 380 | /* No previous measure. */ |
| 399 | new_sample = m << 3; | 381 | new_sample = m << 3; |
| 400 | } | 382 | } |
| 401 | 383 | ||
| @@ -524,7 +506,7 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
| 524 | if (icsk->icsk_ack.ato > icsk->icsk_rto) | 506 | if (icsk->icsk_ack.ato > icsk->icsk_rto) |
| 525 | icsk->icsk_ack.ato = icsk->icsk_rto; | 507 | icsk->icsk_ack.ato = icsk->icsk_rto; |
| 526 | } else if (m > icsk->icsk_rto) { | 508 | } else if (m > icsk->icsk_rto) { |
| 527 | /* Too long gap. Apparently sender falled to | 509 | /* Too long gap. Apparently sender failed to |
| 528 | * restart window, so that we send ACKs quickly. | 510 | * restart window, so that we send ACKs quickly. |
| 529 | */ | 511 | */ |
| 530 | tcp_incr_quickack(sk); | 512 | tcp_incr_quickack(sk); |
| @@ -548,10 +530,9 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ | |||
| 548 | * To save cycles in the RFC 1323 implementation it was better to break | 530 | * To save cycles in the RFC 1323 implementation it was better to break |
| 549 | * it up into three procedures. -- erics | 531 | * it up into three procedures. -- erics |
| 550 | */ | 532 | */ |
| 551 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | 533 | static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) |
| 552 | { | 534 | { |
| 553 | struct tcp_sock *tp = tcp_sk(sk); | 535 | struct tcp_sock *tp = tcp_sk(sk); |
| 554 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 555 | long m = mrtt; /* RTT */ | 536 | long m = mrtt; /* RTT */ |
| 556 | 537 | ||
| 557 | /* The following amusing code comes from Jacobson's | 538 | /* The following amusing code comes from Jacobson's |
| @@ -565,7 +546,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
| 565 | * | 546 | * |
| 566 | * Funny. This algorithm seems to be very broken. | 547 | * Funny. This algorithm seems to be very broken. |
| 567 | * These formulae increase RTO, when it should be decreased, increase | 548 | * These formulae increase RTO, when it should be decreased, increase |
| 568 | * too slowly, when it should be incresed fastly, decrease too fastly | 549 | * too slowly, when it should be increased fastly, decrease too fastly |
| 569 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely | 550 | * etc. I guess in BSD RTO takes ONE value, so that it is absolutely |
| 570 | * does not matter how to _calculate_ it. Seems, it was trap | 551 | * does not matter how to _calculate_ it. Seems, it was trap |
| 571 | * that VJ failed to avoid. 8) | 552 | * that VJ failed to avoid. 8) |
| @@ -610,9 +591,6 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) | |||
| 610 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); | 591 | tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); |
| 611 | tp->rtt_seq = tp->snd_nxt; | 592 | tp->rtt_seq = tp->snd_nxt; |
| 612 | } | 593 | } |
| 613 | |||
| 614 | if (icsk->icsk_ca_ops->rtt_sample) | ||
| 615 | icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); | ||
| 616 | } | 594 | } |
| 617 | 595 | ||
| 618 | /* Calculate rto without backoff. This is the second half of Van Jacobson's | 596 | /* Calculate rto without backoff. This is the second half of Van Jacobson's |
| @@ -629,14 +607,14 @@ static inline void tcp_set_rto(struct sock *sk) | |||
| 629 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ | 607 | * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ |
| 630 | * to do with delayed acks, because at cwnd>2 true delack timeout | 608 | * to do with delayed acks, because at cwnd>2 true delack timeout |
| 631 | * is invisible. Actually, Linux-2.4 also generates erratic | 609 | * is invisible. Actually, Linux-2.4 also generates erratic |
| 632 | * ACKs in some curcumstances. | 610 | * ACKs in some circumstances. |
| 633 | */ | 611 | */ |
| 634 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; | 612 | inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; |
| 635 | 613 | ||
| 636 | /* 2. Fixups made earlier cannot be right. | 614 | /* 2. Fixups made earlier cannot be right. |
| 637 | * If we do not estimate RTO correctly without them, | 615 | * If we do not estimate RTO correctly without them, |
| 638 | * all the algo is pure shit and should be replaced | 616 | * all the algo is pure shit and should be replaced |
| 639 | * with correct one. It is exaclty, which we pretend to do. | 617 | * with correct one. It is exactly, which we pretend to do. |
| 640 | */ | 618 | */ |
| 641 | } | 619 | } |
| 642 | 620 | ||
| @@ -794,7 +772,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
| 794 | * to make it more realistic. | 772 | * to make it more realistic. |
| 795 | * | 773 | * |
| 796 | * A bit of theory. RTT is time passed after "normal" sized packet | 774 | * A bit of theory. RTT is time passed after "normal" sized packet |
| 797 | * is sent until it is ACKed. In normal curcumstances sending small | 775 | * is sent until it is ACKed. In normal circumstances sending small |
| 798 | * packets force peer to delay ACKs and calculation is correct too. | 776 | * packets force peer to delay ACKs and calculation is correct too. |
| 799 | * The algorithm is adaptive and, provided we follow specs, it | 777 | * The algorithm is adaptive and, provided we follow specs, it |
| 800 | * NEVER underestimate RTT. BUT! If peer tries to make some clever | 778 | * NEVER underestimate RTT. BUT! If peer tries to make some clever |
| @@ -919,18 +897,32 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 919 | int prior_fackets; | 897 | int prior_fackets; |
| 920 | u32 lost_retrans = 0; | 898 | u32 lost_retrans = 0; |
| 921 | int flag = 0; | 899 | int flag = 0; |
| 900 | int dup_sack = 0; | ||
| 922 | int i; | 901 | int i; |
| 923 | 902 | ||
| 924 | if (!tp->sacked_out) | 903 | if (!tp->sacked_out) |
| 925 | tp->fackets_out = 0; | 904 | tp->fackets_out = 0; |
| 926 | prior_fackets = tp->fackets_out; | 905 | prior_fackets = tp->fackets_out; |
| 927 | 906 | ||
| 928 | for (i=0; i<num_sacks; i++, sp++) { | 907 | /* SACK fastpath: |
| 929 | struct sk_buff *skb; | 908 | * if the only SACK change is the increase of the end_seq of |
| 930 | __u32 start_seq = ntohl(sp->start_seq); | 909 | * the first block then only apply that SACK block |
| 931 | __u32 end_seq = ntohl(sp->end_seq); | 910 | * and use retrans queue hinting otherwise slowpath */ |
| 932 | int fack_count = 0; | 911 | flag = 1; |
| 933 | int dup_sack = 0; | 912 | for (i = 0; i< num_sacks; i++) { |
| 913 | __u32 start_seq = ntohl(sp[i].start_seq); | ||
| 914 | __u32 end_seq = ntohl(sp[i].end_seq); | ||
| 915 | |||
| 916 | if (i == 0){ | ||
| 917 | if (tp->recv_sack_cache[i].start_seq != start_seq) | ||
| 918 | flag = 0; | ||
| 919 | } else { | ||
| 920 | if ((tp->recv_sack_cache[i].start_seq != start_seq) || | ||
| 921 | (tp->recv_sack_cache[i].end_seq != end_seq)) | ||
| 922 | flag = 0; | ||
| 923 | } | ||
| 924 | tp->recv_sack_cache[i].start_seq = start_seq; | ||
| 925 | tp->recv_sack_cache[i].end_seq = end_seq; | ||
| 934 | 926 | ||
| 935 | /* Check for D-SACK. */ | 927 | /* Check for D-SACK. */ |
| 936 | if (i == 0) { | 928 | if (i == 0) { |
| @@ -962,15 +954,58 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 962 | if (before(ack, prior_snd_una - tp->max_window)) | 954 | if (before(ack, prior_snd_una - tp->max_window)) |
| 963 | return 0; | 955 | return 0; |
| 964 | } | 956 | } |
| 957 | } | ||
| 958 | |||
| 959 | if (flag) | ||
| 960 | num_sacks = 1; | ||
| 961 | else { | ||
| 962 | int j; | ||
| 963 | tp->fastpath_skb_hint = NULL; | ||
| 964 | |||
| 965 | /* order SACK blocks to allow in order walk of the retrans queue */ | ||
| 966 | for (i = num_sacks-1; i > 0; i--) { | ||
| 967 | for (j = 0; j < i; j++){ | ||
| 968 | if (after(ntohl(sp[j].start_seq), | ||
| 969 | ntohl(sp[j+1].start_seq))){ | ||
| 970 | sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); | ||
| 971 | sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); | ||
| 972 | sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); | ||
| 973 | sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); | ||
| 974 | } | ||
| 975 | |||
| 976 | } | ||
| 977 | } | ||
| 978 | } | ||
| 979 | |||
| 980 | /* clear flag as used for different purpose in following code */ | ||
| 981 | flag = 0; | ||
| 982 | |||
| 983 | for (i=0; i<num_sacks; i++, sp++) { | ||
| 984 | struct sk_buff *skb; | ||
| 985 | __u32 start_seq = ntohl(sp->start_seq); | ||
| 986 | __u32 end_seq = ntohl(sp->end_seq); | ||
| 987 | int fack_count; | ||
| 988 | |||
| 989 | /* Use SACK fastpath hint if valid */ | ||
| 990 | if (tp->fastpath_skb_hint) { | ||
| 991 | skb = tp->fastpath_skb_hint; | ||
| 992 | fack_count = tp->fastpath_cnt_hint; | ||
| 993 | } else { | ||
| 994 | skb = sk->sk_write_queue.next; | ||
| 995 | fack_count = 0; | ||
| 996 | } | ||
| 965 | 997 | ||
| 966 | /* Event "B" in the comment above. */ | 998 | /* Event "B" in the comment above. */ |
| 967 | if (after(end_seq, tp->high_seq)) | 999 | if (after(end_seq, tp->high_seq)) |
| 968 | flag |= FLAG_DATA_LOST; | 1000 | flag |= FLAG_DATA_LOST; |
| 969 | 1001 | ||
| 970 | sk_stream_for_retrans_queue(skb, sk) { | 1002 | sk_stream_for_retrans_queue_from(skb, sk) { |
| 971 | int in_sack, pcount; | 1003 | int in_sack, pcount; |
| 972 | u8 sacked; | 1004 | u8 sacked; |
| 973 | 1005 | ||
| 1006 | tp->fastpath_skb_hint = skb; | ||
| 1007 | tp->fastpath_cnt_hint = fack_count; | ||
| 1008 | |||
| 974 | /* The retransmission queue is always in order, so | 1009 | /* The retransmission queue is always in order, so |
| 975 | * we can short-circuit the walk early. | 1010 | * we can short-circuit the walk early. |
| 976 | */ | 1011 | */ |
| @@ -1045,6 +1080,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 1045 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); | 1080 | TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); |
| 1046 | tp->lost_out -= tcp_skb_pcount(skb); | 1081 | tp->lost_out -= tcp_skb_pcount(skb); |
| 1047 | tp->retrans_out -= tcp_skb_pcount(skb); | 1082 | tp->retrans_out -= tcp_skb_pcount(skb); |
| 1083 | |||
| 1084 | /* clear lost hint */ | ||
| 1085 | tp->retransmit_skb_hint = NULL; | ||
| 1048 | } | 1086 | } |
| 1049 | } else { | 1087 | } else { |
| 1050 | /* New sack for not retransmitted frame, | 1088 | /* New sack for not retransmitted frame, |
| @@ -1057,6 +1095,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 1057 | if (sacked & TCPCB_LOST) { | 1095 | if (sacked & TCPCB_LOST) { |
| 1058 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1096 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
| 1059 | tp->lost_out -= tcp_skb_pcount(skb); | 1097 | tp->lost_out -= tcp_skb_pcount(skb); |
| 1098 | |||
| 1099 | /* clear lost hint */ | ||
| 1100 | tp->retransmit_skb_hint = NULL; | ||
| 1060 | } | 1101 | } |
| 1061 | } | 1102 | } |
| 1062 | 1103 | ||
| @@ -1080,6 +1121,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 1080 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { | 1121 | (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { |
| 1081 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1122 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
| 1082 | tp->retrans_out -= tcp_skb_pcount(skb); | 1123 | tp->retrans_out -= tcp_skb_pcount(skb); |
| 1124 | tp->retransmit_skb_hint = NULL; | ||
| 1083 | } | 1125 | } |
| 1084 | } | 1126 | } |
| 1085 | } | 1127 | } |
| @@ -1107,6 +1149,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ | |||
| 1107 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1149 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
| 1108 | tp->retrans_out -= tcp_skb_pcount(skb); | 1150 | tp->retrans_out -= tcp_skb_pcount(skb); |
| 1109 | 1151 | ||
| 1152 | /* clear lost hint */ | ||
| 1153 | tp->retransmit_skb_hint = NULL; | ||
| 1154 | |||
| 1110 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { | 1155 | if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { |
| 1111 | tp->lost_out += tcp_skb_pcount(skb); | 1156 | tp->lost_out += tcp_skb_pcount(skb); |
| 1112 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1157 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
| @@ -1214,6 +1259,8 @@ static void tcp_enter_frto_loss(struct sock *sk) | |||
| 1214 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1259 | tcp_set_ca_state(sk, TCP_CA_Loss); |
| 1215 | tp->high_seq = tp->frto_highmark; | 1260 | tp->high_seq = tp->frto_highmark; |
| 1216 | TCP_ECN_queue_cwr(tp); | 1261 | TCP_ECN_queue_cwr(tp); |
| 1262 | |||
| 1263 | clear_all_retrans_hints(tp); | ||
| 1217 | } | 1264 | } |
| 1218 | 1265 | ||
| 1219 | void tcp_clear_retrans(struct tcp_sock *tp) | 1266 | void tcp_clear_retrans(struct tcp_sock *tp) |
| @@ -1251,6 +1298,7 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
| 1251 | tp->snd_cwnd_cnt = 0; | 1298 | tp->snd_cwnd_cnt = 0; |
| 1252 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1299 | tp->snd_cwnd_stamp = tcp_time_stamp; |
| 1253 | 1300 | ||
| 1301 | tp->bytes_acked = 0; | ||
| 1254 | tcp_clear_retrans(tp); | 1302 | tcp_clear_retrans(tp); |
| 1255 | 1303 | ||
| 1256 | /* Push undo marker, if it was plain RTO and nothing | 1304 | /* Push undo marker, if it was plain RTO and nothing |
| @@ -1279,6 +1327,8 @@ void tcp_enter_loss(struct sock *sk, int how) | |||
| 1279 | tcp_set_ca_state(sk, TCP_CA_Loss); | 1327 | tcp_set_ca_state(sk, TCP_CA_Loss); |
| 1280 | tp->high_seq = tp->snd_nxt; | 1328 | tp->high_seq = tp->snd_nxt; |
| 1281 | TCP_ECN_queue_cwr(tp); | 1329 | TCP_ECN_queue_cwr(tp); |
| 1330 | |||
| 1331 | clear_all_retrans_hints(tp); | ||
| 1282 | } | 1332 | } |
| 1283 | 1333 | ||
| 1284 | static int tcp_check_sack_reneging(struct sock *sk) | 1334 | static int tcp_check_sack_reneging(struct sock *sk) |
| @@ -1503,17 +1553,37 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, | |||
| 1503 | int packets, u32 high_seq) | 1553 | int packets, u32 high_seq) |
| 1504 | { | 1554 | { |
| 1505 | struct sk_buff *skb; | 1555 | struct sk_buff *skb; |
| 1506 | int cnt = packets; | 1556 | int cnt; |
| 1507 | 1557 | ||
| 1508 | BUG_TRAP(cnt <= tp->packets_out); | 1558 | BUG_TRAP(packets <= tp->packets_out); |
| 1559 | if (tp->lost_skb_hint) { | ||
| 1560 | skb = tp->lost_skb_hint; | ||
| 1561 | cnt = tp->lost_cnt_hint; | ||
| 1562 | } else { | ||
| 1563 | skb = sk->sk_write_queue.next; | ||
| 1564 | cnt = 0; | ||
| 1565 | } | ||
| 1509 | 1566 | ||
| 1510 | sk_stream_for_retrans_queue(skb, sk) { | 1567 | sk_stream_for_retrans_queue_from(skb, sk) { |
| 1511 | cnt -= tcp_skb_pcount(skb); | 1568 | /* TODO: do this better */ |
| 1512 | if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | 1569 | /* this is not the most efficient way to do this... */ |
| 1570 | tp->lost_skb_hint = skb; | ||
| 1571 | tp->lost_cnt_hint = cnt; | ||
| 1572 | cnt += tcp_skb_pcount(skb); | ||
| 1573 | if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) | ||
| 1513 | break; | 1574 | break; |
| 1514 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1575 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { |
| 1515 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1576 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
| 1516 | tp->lost_out += tcp_skb_pcount(skb); | 1577 | tp->lost_out += tcp_skb_pcount(skb); |
| 1578 | |||
| 1579 | /* clear xmit_retransmit_queue hints | ||
| 1580 | * if this is beyond hint */ | ||
| 1581 | if(tp->retransmit_skb_hint != NULL && | ||
| 1582 | before(TCP_SKB_CB(skb)->seq, | ||
| 1583 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) { | ||
| 1584 | |||
| 1585 | tp->retransmit_skb_hint = NULL; | ||
| 1586 | } | ||
| 1517 | } | 1587 | } |
| 1518 | } | 1588 | } |
| 1519 | tcp_sync_left_out(tp); | 1589 | tcp_sync_left_out(tp); |
| @@ -1540,13 +1610,28 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) | |||
| 1540 | if (tcp_head_timedout(sk, tp)) { | 1610 | if (tcp_head_timedout(sk, tp)) { |
| 1541 | struct sk_buff *skb; | 1611 | struct sk_buff *skb; |
| 1542 | 1612 | ||
| 1543 | sk_stream_for_retrans_queue(skb, sk) { | 1613 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint |
| 1544 | if (tcp_skb_timedout(sk, skb) && | 1614 | : sk->sk_write_queue.next; |
| 1545 | !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | 1615 | |
| 1616 | sk_stream_for_retrans_queue_from(skb, sk) { | ||
| 1617 | if (!tcp_skb_timedout(sk, skb)) | ||
| 1618 | break; | ||
| 1619 | |||
| 1620 | if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { | ||
| 1546 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; | 1621 | TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; |
| 1547 | tp->lost_out += tcp_skb_pcount(skb); | 1622 | tp->lost_out += tcp_skb_pcount(skb); |
| 1623 | |||
| 1624 | /* clear xmit_retrans hint */ | ||
| 1625 | if (tp->retransmit_skb_hint && | ||
| 1626 | before(TCP_SKB_CB(skb)->seq, | ||
| 1627 | TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) | ||
| 1628 | |||
| 1629 | tp->retransmit_skb_hint = NULL; | ||
| 1548 | } | 1630 | } |
| 1549 | } | 1631 | } |
| 1632 | |||
| 1633 | tp->scoreboard_skb_hint = skb; | ||
| 1634 | |||
| 1550 | tcp_sync_left_out(tp); | 1635 | tcp_sync_left_out(tp); |
| 1551 | } | 1636 | } |
| 1552 | } | 1637 | } |
| @@ -1626,6 +1711,10 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) | |||
| 1626 | } | 1711 | } |
| 1627 | tcp_moderate_cwnd(tp); | 1712 | tcp_moderate_cwnd(tp); |
| 1628 | tp->snd_cwnd_stamp = tcp_time_stamp; | 1713 | tp->snd_cwnd_stamp = tcp_time_stamp; |
| 1714 | |||
| 1715 | /* There is something screwy going on with the retrans hints after | ||
| 1716 | an undo */ | ||
| 1717 | clear_all_retrans_hints(tp); | ||
| 1629 | } | 1718 | } |
| 1630 | 1719 | ||
| 1631 | static inline int tcp_may_undo(struct tcp_sock *tp) | 1720 | static inline int tcp_may_undo(struct tcp_sock *tp) |
| @@ -1709,6 +1798,9 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) | |||
| 1709 | sk_stream_for_retrans_queue(skb, sk) { | 1798 | sk_stream_for_retrans_queue(skb, sk) { |
| 1710 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; | 1799 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; |
| 1711 | } | 1800 | } |
| 1801 | |||
| 1802 | clear_all_retrans_hints(tp); | ||
| 1803 | |||
| 1712 | DBGUNDO(sk, tp, "partial loss"); | 1804 | DBGUNDO(sk, tp, "partial loss"); |
| 1713 | tp->lost_out = 0; | 1805 | tp->lost_out = 0; |
| 1714 | tp->left_out = tp->sacked_out; | 1806 | tp->left_out = tp->sacked_out; |
| @@ -1908,6 +2000,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
| 1908 | TCP_ECN_queue_cwr(tp); | 2000 | TCP_ECN_queue_cwr(tp); |
| 1909 | } | 2001 | } |
| 1910 | 2002 | ||
| 2003 | tp->bytes_acked = 0; | ||
| 1911 | tp->snd_cwnd_cnt = 0; | 2004 | tp->snd_cwnd_cnt = 0; |
| 1912 | tcp_set_ca_state(sk, TCP_CA_Recovery); | 2005 | tcp_set_ca_state(sk, TCP_CA_Recovery); |
| 1913 | } | 2006 | } |
| @@ -1919,9 +2012,9 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, | |||
| 1919 | } | 2012 | } |
| 1920 | 2013 | ||
| 1921 | /* Read draft-ietf-tcplw-high-performance before mucking | 2014 | /* Read draft-ietf-tcplw-high-performance before mucking |
| 1922 | * with this code. (Superceeds RFC1323) | 2015 | * with this code. (Supersedes RFC1323) |
| 1923 | */ | 2016 | */ |
| 1924 | static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | 2017 | static void tcp_ack_saw_tstamp(struct sock *sk, int flag) |
| 1925 | { | 2018 | { |
| 1926 | /* RTTM Rule: A TSecr value received in a segment is used to | 2019 | /* RTTM Rule: A TSecr value received in a segment is used to |
| 1927 | * update the averaged RTT measurement only if the segment | 2020 | * update the averaged RTT measurement only if the segment |
| @@ -1932,7 +2025,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
| 1932 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> | 2025 | * 1998/04/10 Andrey V. Savochkin <saw@msu.ru> |
| 1933 | * | 2026 | * |
| 1934 | * Changed: reset backoff as soon as we see the first valid sample. | 2027 | * Changed: reset backoff as soon as we see the first valid sample. |
| 1935 | * If we do not, we get strongly overstimated rto. With timestamps | 2028 | * If we do not, we get strongly overestimated rto. With timestamps |
| 1936 | * samples are accepted even from very old segments: f.e., when rtt=1 | 2029 | * samples are accepted even from very old segments: f.e., when rtt=1 |
| 1937 | * increases to 8, we retransmit 5 times and after 8 seconds delayed | 2030 | * increases to 8, we retransmit 5 times and after 8 seconds delayed |
| 1938 | * answer arrives rto becomes 120 seconds! If at least one of segments | 2031 | * answer arrives rto becomes 120 seconds! If at least one of segments |
| @@ -1940,13 +2033,13 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) | |||
| 1940 | */ | 2033 | */ |
| 1941 | struct tcp_sock *tp = tcp_sk(sk); | 2034 | struct tcp_sock *tp = tcp_sk(sk); |
| 1942 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; | 2035 | const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; |
| 1943 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2036 | tcp_rtt_estimator(sk, seq_rtt); |
| 1944 | tcp_set_rto(sk); | 2037 | tcp_set_rto(sk); |
| 1945 | inet_csk(sk)->icsk_backoff = 0; | 2038 | inet_csk(sk)->icsk_backoff = 0; |
| 1946 | tcp_bound_rto(sk); | 2039 | tcp_bound_rto(sk); |
| 1947 | } | 2040 | } |
| 1948 | 2041 | ||
| 1949 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) | 2042 | static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, int flag) |
| 1950 | { | 2043 | { |
| 1951 | /* We don't have a timestamp. Can only use | 2044 | /* We don't have a timestamp. Can only use |
| 1952 | * packets that are not retransmitted to determine | 2045 | * packets that are not retransmitted to determine |
| @@ -1960,21 +2053,21 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag | |||
| 1960 | if (flag & FLAG_RETRANS_DATA_ACKED) | 2053 | if (flag & FLAG_RETRANS_DATA_ACKED) |
| 1961 | return; | 2054 | return; |
| 1962 | 2055 | ||
| 1963 | tcp_rtt_estimator(sk, seq_rtt, usrtt); | 2056 | tcp_rtt_estimator(sk, seq_rtt); |
| 1964 | tcp_set_rto(sk); | 2057 | tcp_set_rto(sk); |
| 1965 | inet_csk(sk)->icsk_backoff = 0; | 2058 | inet_csk(sk)->icsk_backoff = 0; |
| 1966 | tcp_bound_rto(sk); | 2059 | tcp_bound_rto(sk); |
| 1967 | } | 2060 | } |
| 1968 | 2061 | ||
| 1969 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, | 2062 | static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, |
| 1970 | const s32 seq_rtt, u32 *usrtt) | 2063 | const s32 seq_rtt) |
| 1971 | { | 2064 | { |
| 1972 | const struct tcp_sock *tp = tcp_sk(sk); | 2065 | const struct tcp_sock *tp = tcp_sk(sk); |
| 1973 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ | 2066 | /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ |
| 1974 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) | 2067 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) |
| 1975 | tcp_ack_saw_tstamp(sk, usrtt, flag); | 2068 | tcp_ack_saw_tstamp(sk, flag); |
| 1976 | else if (seq_rtt >= 0) | 2069 | else if (seq_rtt >= 0) |
| 1977 | tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); | 2070 | tcp_ack_no_tstamp(sk, seq_rtt, flag); |
| 1978 | } | 2071 | } |
| 1979 | 2072 | ||
| 1980 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, | 2073 | static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, |
| @@ -2054,20 +2147,27 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, | |||
| 2054 | return acked; | 2147 | return acked; |
| 2055 | } | 2148 | } |
| 2056 | 2149 | ||
| 2150 | static inline u32 tcp_usrtt(const struct sk_buff *skb) | ||
| 2151 | { | ||
| 2152 | struct timeval tv, now; | ||
| 2153 | |||
| 2154 | do_gettimeofday(&now); | ||
| 2155 | skb_get_timestamp(skb, &tv); | ||
| 2156 | return (now.tv_sec - tv.tv_sec) * 1000000 + (now.tv_usec - tv.tv_usec); | ||
| 2157 | } | ||
| 2057 | 2158 | ||
| 2058 | /* Remove acknowledged frames from the retransmission queue. */ | 2159 | /* Remove acknowledged frames from the retransmission queue. */ |
| 2059 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt) | 2160 | static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) |
| 2060 | { | 2161 | { |
| 2061 | struct tcp_sock *tp = tcp_sk(sk); | 2162 | struct tcp_sock *tp = tcp_sk(sk); |
| 2163 | const struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 2062 | struct sk_buff *skb; | 2164 | struct sk_buff *skb; |
| 2063 | __u32 now = tcp_time_stamp; | 2165 | __u32 now = tcp_time_stamp; |
| 2064 | int acked = 0; | 2166 | int acked = 0; |
| 2065 | __s32 seq_rtt = -1; | 2167 | __s32 seq_rtt = -1; |
| 2066 | struct timeval usnow; | ||
| 2067 | u32 pkts_acked = 0; | 2168 | u32 pkts_acked = 0; |
| 2068 | 2169 | void (*rtt_sample)(struct sock *sk, u32 usrtt) | |
| 2069 | if (seq_usrtt) | 2170 | = icsk->icsk_ca_ops->rtt_sample; |
| 2070 | do_gettimeofday(&usnow); | ||
| 2071 | 2171 | ||
| 2072 | while ((skb = skb_peek(&sk->sk_write_queue)) && | 2172 | while ((skb = skb_peek(&sk->sk_write_queue)) && |
| 2073 | skb != sk->sk_send_head) { | 2173 | skb != sk->sk_send_head) { |
| @@ -2107,16 +2207,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
| 2107 | tp->retrans_out -= tcp_skb_pcount(skb); | 2207 | tp->retrans_out -= tcp_skb_pcount(skb); |
| 2108 | acked |= FLAG_RETRANS_DATA_ACKED; | 2208 | acked |= FLAG_RETRANS_DATA_ACKED; |
| 2109 | seq_rtt = -1; | 2209 | seq_rtt = -1; |
| 2110 | } else if (seq_rtt < 0) | 2210 | } else if (seq_rtt < 0) { |
| 2111 | seq_rtt = now - scb->when; | 2211 | seq_rtt = now - scb->when; |
| 2112 | if (seq_usrtt) { | 2212 | if (rtt_sample) |
| 2113 | struct timeval tv; | 2213 | (*rtt_sample)(sk, tcp_usrtt(skb)); |
| 2114 | |||
| 2115 | skb_get_timestamp(skb, &tv); | ||
| 2116 | *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 | ||
| 2117 | + (usnow.tv_usec - tv.tv_usec); | ||
| 2118 | } | 2214 | } |
| 2119 | |||
| 2120 | if (sacked & TCPCB_SACKED_ACKED) | 2215 | if (sacked & TCPCB_SACKED_ACKED) |
| 2121 | tp->sacked_out -= tcp_skb_pcount(skb); | 2216 | tp->sacked_out -= tcp_skb_pcount(skb); |
| 2122 | if (sacked & TCPCB_LOST) | 2217 | if (sacked & TCPCB_LOST) |
| @@ -2126,17 +2221,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt | |||
| 2126 | !before(scb->end_seq, tp->snd_up)) | 2221 | !before(scb->end_seq, tp->snd_up)) |
| 2127 | tp->urg_mode = 0; | 2222 | tp->urg_mode = 0; |
| 2128 | } | 2223 | } |
| 2129 | } else if (seq_rtt < 0) | 2224 | } else if (seq_rtt < 0) { |
| 2130 | seq_rtt = now - scb->when; | 2225 | seq_rtt = now - scb->when; |
| 2226 | if (rtt_sample) | ||
| 2227 | (*rtt_sample)(sk, tcp_usrtt(skb)); | ||
| 2228 | } | ||
| 2131 | tcp_dec_pcount_approx(&tp->fackets_out, skb); | 2229 | tcp_dec_pcount_approx(&tp->fackets_out, skb); |
| 2132 | tcp_packets_out_dec(tp, skb); | 2230 | tcp_packets_out_dec(tp, skb); |
| 2133 | __skb_unlink(skb, &sk->sk_write_queue); | 2231 | __skb_unlink(skb, &sk->sk_write_queue); |
| 2134 | sk_stream_free_skb(sk, skb); | 2232 | sk_stream_free_skb(sk, skb); |
| 2233 | clear_all_retrans_hints(tp); | ||
| 2135 | } | 2234 | } |
| 2136 | 2235 | ||
| 2137 | if (acked&FLAG_ACKED) { | 2236 | if (acked&FLAG_ACKED) { |
| 2138 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2237 | tcp_ack_update_rtt(sk, acked, seq_rtt); |
| 2139 | tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); | ||
| 2140 | tcp_ack_packets_out(sk, tp); | 2238 | tcp_ack_packets_out(sk, tp); |
| 2141 | 2239 | ||
| 2142 | if (icsk->icsk_ca_ops->pkts_acked) | 2240 | if (icsk->icsk_ca_ops->pkts_acked) |
| @@ -2284,7 +2382,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) | |||
| 2284 | } | 2382 | } |
| 2285 | 2383 | ||
| 2286 | /* F-RTO affects on two new ACKs following RTO. | 2384 | /* F-RTO affects on two new ACKs following RTO. |
| 2287 | * At latest on third ACK the TCP behavor is back to normal. | 2385 | * At latest on third ACK the TCP behavior is back to normal. |
| 2288 | */ | 2386 | */ |
| 2289 | tp->frto_counter = (tp->frto_counter + 1) % 3; | 2387 | tp->frto_counter = (tp->frto_counter + 1) % 3; |
| 2290 | } | 2388 | } |
| @@ -2299,7 +2397,6 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 2299 | u32 ack = TCP_SKB_CB(skb)->ack_seq; | 2397 | u32 ack = TCP_SKB_CB(skb)->ack_seq; |
| 2300 | u32 prior_in_flight; | 2398 | u32 prior_in_flight; |
| 2301 | s32 seq_rtt; | 2399 | s32 seq_rtt; |
| 2302 | s32 seq_usrtt = 0; | ||
| 2303 | int prior_packets; | 2400 | int prior_packets; |
| 2304 | 2401 | ||
| 2305 | /* If the ack is newer than sent or older than previous acks | 2402 | /* If the ack is newer than sent or older than previous acks |
| @@ -2311,6 +2408,9 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 2311 | if (before(ack, prior_snd_una)) | 2408 | if (before(ack, prior_snd_una)) |
| 2312 | goto old_ack; | 2409 | goto old_ack; |
| 2313 | 2410 | ||
| 2411 | if (sysctl_tcp_abc && icsk->icsk_ca_state < TCP_CA_CWR) | ||
| 2412 | tp->bytes_acked += ack - prior_snd_una; | ||
| 2413 | |||
| 2314 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { | 2414 | if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) { |
| 2315 | /* Window is constant, pure forward advance. | 2415 | /* Window is constant, pure forward advance. |
| 2316 | * No more checks are required. | 2416 | * No more checks are required. |
| @@ -2352,14 +2452,13 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
| 2352 | prior_in_flight = tcp_packets_in_flight(tp); | 2452 | prior_in_flight = tcp_packets_in_flight(tp); |
| 2353 | 2453 | ||
| 2354 | /* See if we can take anything off of the retransmit queue. */ | 2454 | /* See if we can take anything off of the retransmit queue. */ |
| 2355 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt, | 2455 | flag |= tcp_clean_rtx_queue(sk, &seq_rtt); |
| 2356 | icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); | ||
| 2357 | 2456 | ||
| 2358 | if (tp->frto_counter) | 2457 | if (tp->frto_counter) |
| 2359 | tcp_process_frto(sk, prior_snd_una); | 2458 | tcp_process_frto(sk, prior_snd_una); |
| 2360 | 2459 | ||
| 2361 | if (tcp_ack_is_dubious(sk, flag)) { | 2460 | if (tcp_ack_is_dubious(sk, flag)) { |
| 2362 | /* Advanve CWND, if state allows this. */ | 2461 | /* Advance CWND, if state allows this. */ |
| 2363 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) | 2462 | if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) |
| 2364 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); | 2463 | tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); |
| 2365 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); | 2464 | tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); |
| @@ -3148,7 +3247,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
| 3148 | { | 3247 | { |
| 3149 | struct sk_buff *skb; | 3248 | struct sk_buff *skb; |
| 3150 | 3249 | ||
| 3151 | /* First, check that queue is collapsable and find | 3250 | /* First, check that queue is collapsible and find |
| 3152 | * the point where collapsing can be useful. */ | 3251 | * the point where collapsing can be useful. */ |
| 3153 | for (skb = head; skb != tail; ) { | 3252 | for (skb = head; skb != tail; ) { |
| 3154 | /* No new bits? It is possible on ofo queue. */ | 3253 | /* No new bits? It is possible on ofo queue. */ |
| @@ -3456,7 +3555,7 @@ static __inline__ void tcp_ack_snd_check(struct sock *sk) | |||
| 3456 | 3555 | ||
| 3457 | /* | 3556 | /* |
| 3458 | * This routine is only called when we have urgent data | 3557 | * This routine is only called when we have urgent data |
| 3459 | * signalled. Its the 'slow' part of tcp_urg. It could be | 3558 | * signaled. Its the 'slow' part of tcp_urg. It could be |
| 3460 | * moved inline now as tcp_urg is only called from one | 3559 | * moved inline now as tcp_urg is only called from one |
| 3461 | * place. We handle URGent data wrong. We have to - as | 3560 | * place. We handle URGent data wrong. We have to - as |
| 3462 | * BSD still doesn't use the correction from RFC961. | 3561 | * BSD still doesn't use the correction from RFC961. |
| @@ -3501,7 +3600,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) | |||
| 3501 | * urgent. To do this requires some care. We cannot just ignore | 3600 | * urgent. To do this requires some care. We cannot just ignore |
| 3502 | * tp->copied_seq since we would read the last urgent byte again | 3601 | * tp->copied_seq since we would read the last urgent byte again |
| 3503 | * as data, nor can we alter copied_seq until this data arrives | 3602 | * as data, nor can we alter copied_seq until this data arrives |
| 3504 | * or we break the sematics of SIOCATMARK (and thus sockatmark()) | 3603 | * or we break the semantics of SIOCATMARK (and thus sockatmark()) |
| 3505 | * | 3604 | * |
| 3506 | * NOTE. Double Dutch. Rendering to plain English: author of comment | 3605 | * NOTE. Double Dutch. Rendering to plain English: author of comment |
| 3507 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); | 3606 | * above did something sort of send("A", MSG_OOB); send("B", MSG_OOB); |
| @@ -3646,7 +3745,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
| 3646 | tp->rx_opt.saw_tstamp = 0; | 3745 | tp->rx_opt.saw_tstamp = 0; |
| 3647 | 3746 | ||
| 3648 | /* pred_flags is 0xS?10 << 16 + snd_wnd | 3747 | /* pred_flags is 0xS?10 << 16 + snd_wnd |
| 3649 | * if header_predition is to be made | 3748 | * if header_prediction is to be made |
| 3650 | * 'S' will always be tp->tcp_header_len >> 2 | 3749 | * 'S' will always be tp->tcp_header_len >> 2 |
| 3651 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to | 3750 | * '?' will be 0 for the fast path, otherwise pred_flags is 0 to |
| 3652 | * turn it off (when there are holes in the receive | 3751 | * turn it off (when there are holes in the receive |
| @@ -4242,7 +4341,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
| 4242 | */ | 4341 | */ |
| 4243 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && | 4342 | if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && |
| 4244 | !tp->srtt) | 4343 | !tp->srtt) |
| 4245 | tcp_ack_saw_tstamp(sk, NULL, 0); | 4344 | tcp_ack_saw_tstamp(sk, 0); |
| 4246 | 4345 | ||
| 4247 | if (tp->rx_opt.tstamp_ok) | 4346 | if (tp->rx_opt.tstamp_ok) |
| 4248 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; | 4347 | tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; |
| @@ -4372,6 +4471,7 @@ discard: | |||
| 4372 | 4471 | ||
| 4373 | EXPORT_SYMBOL(sysctl_tcp_ecn); | 4472 | EXPORT_SYMBOL(sysctl_tcp_ecn); |
| 4374 | EXPORT_SYMBOL(sysctl_tcp_reordering); | 4473 | EXPORT_SYMBOL(sysctl_tcp_reordering); |
| 4474 | EXPORT_SYMBOL(sysctl_tcp_abc); | ||
| 4375 | EXPORT_SYMBOL(tcp_parse_options); | 4475 | EXPORT_SYMBOL(tcp_parse_options); |
| 4376 | EXPORT_SYMBOL(tcp_rcv_established); | 4476 | EXPORT_SYMBOL(tcp_rcv_established); |
| 4377 | EXPORT_SYMBOL(tcp_rcv_state_process); | 4477 | EXPORT_SYMBOL(tcp_rcv_state_process); |
