diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 84 |
1 files changed, 53 insertions, 31 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 25a89eaa669d..5d083855c111 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -355,6 +355,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk) | |||
355 | rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * | 355 | rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * |
356 | tcp_default_init_rwnd(mss); | 356 | tcp_default_init_rwnd(mss); |
357 | 357 | ||
358 | /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency | ||
359 | * Allow enough cushion so that sender is not limited by our window | ||
360 | */ | ||
361 | if (sysctl_tcp_moderate_rcvbuf) | ||
362 | rcvmem <<= 2; | ||
363 | |||
358 | if (sk->sk_rcvbuf < rcvmem) | 364 | if (sk->sk_rcvbuf < rcvmem) |
359 | sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); | 365 | sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); |
360 | } | 366 | } |
@@ -373,6 +379,8 @@ void tcp_init_buffer_space(struct sock *sk) | |||
373 | tcp_fixup_sndbuf(sk); | 379 | tcp_fixup_sndbuf(sk); |
374 | 380 | ||
375 | tp->rcvq_space.space = tp->rcv_wnd; | 381 | tp->rcvq_space.space = tp->rcv_wnd; |
382 | tp->rcvq_space.time = tcp_time_stamp; | ||
383 | tp->rcvq_space.seq = tp->copied_seq; | ||
376 | 384 | ||
377 | maxwin = tcp_full_space(sk); | 385 | maxwin = tcp_full_space(sk); |
378 | 386 | ||
@@ -512,48 +520,62 @@ void tcp_rcv_space_adjust(struct sock *sk) | |||
512 | { | 520 | { |
513 | struct tcp_sock *tp = tcp_sk(sk); | 521 | struct tcp_sock *tp = tcp_sk(sk); |
514 | int time; | 522 | int time; |
515 | int space; | 523 | int copied; |
516 | |||
517 | if (tp->rcvq_space.time == 0) | ||
518 | goto new_measure; | ||
519 | 524 | ||
520 | time = tcp_time_stamp - tp->rcvq_space.time; | 525 | time = tcp_time_stamp - tp->rcvq_space.time; |
521 | if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) | 526 | if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) |
522 | return; | 527 | return; |
523 | 528 | ||
524 | space = 2 * (tp->copied_seq - tp->rcvq_space.seq); | 529 | /* Number of bytes copied to user in last RTT */ |
530 | copied = tp->copied_seq - tp->rcvq_space.seq; | ||
531 | if (copied <= tp->rcvq_space.space) | ||
532 | goto new_measure; | ||
533 | |||
534 | /* A bit of theory : | ||
535 | * copied = bytes received in previous RTT, our base window | ||
536 | * To cope with packet losses, we need a 2x factor | ||
537 | * To cope with slow start, and sender growing its cwin by 100 % | ||
538 | * every RTT, we need a 4x factor, because the ACK we are sending | ||
539 | * now is for the next RTT, not the current one : | ||
540 | * <prev RTT . ><current RTT .. ><next RTT .... > | ||
541 | */ | ||
542 | |||
543 | if (sysctl_tcp_moderate_rcvbuf && | ||
544 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { | ||
545 | int rcvwin, rcvmem, rcvbuf; | ||
525 | 546 | ||
526 | space = max(tp->rcvq_space.space, space); | 547 | /* minimal window to cope with packet losses, assuming |
548 | * steady state. Add some cushion because of small variations. | ||
549 | */ | ||
550 | rcvwin = (copied << 1) + 16 * tp->advmss; | ||
527 | 551 | ||
528 | if (tp->rcvq_space.space != space) { | 552 | /* If rate increased by 25%, |
529 | int rcvmem; | 553 | * assume slow start, rcvwin = 3 * copied |
554 | * If rate increased by 50%, | ||
555 | * assume sender can use 2x growth, rcvwin = 4 * copied | ||
556 | */ | ||
557 | if (copied >= | ||
558 | tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) { | ||
559 | if (copied >= | ||
560 | tp->rcvq_space.space + (tp->rcvq_space.space >> 1)) | ||
561 | rcvwin <<= 1; | ||
562 | else | ||
563 | rcvwin += (rcvwin >> 1); | ||
564 | } | ||
530 | 565 | ||
531 | tp->rcvq_space.space = space; | 566 | rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); |
567 | while (tcp_win_from_space(rcvmem) < tp->advmss) | ||
568 | rcvmem += 128; | ||
532 | 569 | ||
533 | if (sysctl_tcp_moderate_rcvbuf && | 570 | rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); |
534 | !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { | 571 | if (rcvbuf > sk->sk_rcvbuf) { |
535 | int new_clamp = space; | 572 | sk->sk_rcvbuf = rcvbuf; |
536 | 573 | ||
537 | /* Receive space grows, normalize in order to | 574 | /* Make the window clamp follow along. */ |
538 | * take into account packet headers and sk_buff | 575 | tp->window_clamp = rcvwin; |
539 | * structure overhead. | ||
540 | */ | ||
541 | space /= tp->advmss; | ||
542 | if (!space) | ||
543 | space = 1; | ||
544 | rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); | ||
545 | while (tcp_win_from_space(rcvmem) < tp->advmss) | ||
546 | rcvmem += 128; | ||
547 | space *= rcvmem; | ||
548 | space = min(space, sysctl_tcp_rmem[2]); | ||
549 | if (space > sk->sk_rcvbuf) { | ||
550 | sk->sk_rcvbuf = space; | ||
551 | |||
552 | /* Make the window clamp follow along. */ | ||
553 | tp->window_clamp = new_clamp; | ||
554 | } | ||
555 | } | 576 | } |
556 | } | 577 | } |
578 | tp->rcvq_space.space = copied; | ||
557 | 579 | ||
558 | new_measure: | 580 | new_measure: |
559 | tp->rcvq_space.seq = tp->copied_seq; | 581 | tp->rcvq_space.seq = tp->copied_seq; |
@@ -5674,8 +5696,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5674 | tcp_init_congestion_control(sk); | 5696 | tcp_init_congestion_control(sk); |
5675 | 5697 | ||
5676 | tcp_mtup_init(sk); | 5698 | tcp_mtup_init(sk); |
5677 | tcp_init_buffer_space(sk); | ||
5678 | tp->copied_seq = tp->rcv_nxt; | 5699 | tp->copied_seq = tp->rcv_nxt; |
5700 | tcp_init_buffer_space(sk); | ||
5679 | } | 5701 | } |
5680 | smp_mb(); | 5702 | smp_mb(); |
5681 | tcp_set_state(sk, TCP_ESTABLISHED); | 5703 | tcp_set_state(sk, TCP_ESTABLISHED); |