aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c84
1 files changed, 53 insertions, 31 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 25a89eaa669d..5d083855c111 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -355,6 +355,12 @@ static void tcp_fixup_rcvbuf(struct sock *sk)
355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) * 355 rcvmem = 2 * SKB_TRUESIZE(mss + MAX_TCP_HEADER) *
356 tcp_default_init_rwnd(mss); 356 tcp_default_init_rwnd(mss);
357 357
358 /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency
359 * Allow enough cushion so that sender is not limited by our window
360 */
361 if (sysctl_tcp_moderate_rcvbuf)
362 rcvmem <<= 2;
363
358 if (sk->sk_rcvbuf < rcvmem) 364 if (sk->sk_rcvbuf < rcvmem)
359 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); 365 sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]);
360} 366}
@@ -373,6 +379,8 @@ void tcp_init_buffer_space(struct sock *sk)
373 tcp_fixup_sndbuf(sk); 379 tcp_fixup_sndbuf(sk);
374 380
375 tp->rcvq_space.space = tp->rcv_wnd; 381 tp->rcvq_space.space = tp->rcv_wnd;
382 tp->rcvq_space.time = tcp_time_stamp;
383 tp->rcvq_space.seq = tp->copied_seq;
376 384
377 maxwin = tcp_full_space(sk); 385 maxwin = tcp_full_space(sk);
378 386
@@ -512,48 +520,62 @@ void tcp_rcv_space_adjust(struct sock *sk)
512{ 520{
513 struct tcp_sock *tp = tcp_sk(sk); 521 struct tcp_sock *tp = tcp_sk(sk);
514 int time; 522 int time;
515 int space; 523 int copied;
516
517 if (tp->rcvq_space.time == 0)
518 goto new_measure;
519 524
520 time = tcp_time_stamp - tp->rcvq_space.time; 525 time = tcp_time_stamp - tp->rcvq_space.time;
521 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0) 526 if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
522 return; 527 return;
523 528
524 space = 2 * (tp->copied_seq - tp->rcvq_space.seq); 529 /* Number of bytes copied to user in last RTT */
530 copied = tp->copied_seq - tp->rcvq_space.seq;
531 if (copied <= tp->rcvq_space.space)
532 goto new_measure;
533
534 /* A bit of theory :
535 * copied = bytes received in previous RTT, our base window
536 * To cope with packet losses, we need a 2x factor
537 * To cope with slow start, and sender growing its cwin by 100 %
538 * every RTT, we need a 4x factor, because the ACK we are sending
539 * now is for the next RTT, not the current one :
540 * <prev RTT . ><current RTT .. ><next RTT .... >
541 */
542
543 if (sysctl_tcp_moderate_rcvbuf &&
544 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
545 int rcvwin, rcvmem, rcvbuf;
525 546
526 space = max(tp->rcvq_space.space, space); 547 /* minimal window to cope with packet losses, assuming
548 * steady state. Add some cushion because of small variations.
549 */
550 rcvwin = (copied << 1) + 16 * tp->advmss;
527 551
528 if (tp->rcvq_space.space != space) { 552 /* If rate increased by 25%,
529 int rcvmem; 553 * assume slow start, rcvwin = 3 * copied
554 * If rate increased by 50%,
555 * assume sender can use 2x growth, rcvwin = 4 * copied
556 */
557 if (copied >=
558 tp->rcvq_space.space + (tp->rcvq_space.space >> 2)) {
559 if (copied >=
560 tp->rcvq_space.space + (tp->rcvq_space.space >> 1))
561 rcvwin <<= 1;
562 else
563 rcvwin += (rcvwin >> 1);
564 }
530 565
531 tp->rcvq_space.space = space; 566 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
567 while (tcp_win_from_space(rcvmem) < tp->advmss)
568 rcvmem += 128;
532 569
533 if (sysctl_tcp_moderate_rcvbuf && 570 rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]);
534 !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { 571 if (rcvbuf > sk->sk_rcvbuf) {
535 int new_clamp = space; 572 sk->sk_rcvbuf = rcvbuf;
536 573
537 /* Receive space grows, normalize in order to 574 /* Make the window clamp follow along. */
538 * take into account packet headers and sk_buff 575 tp->window_clamp = rcvwin;
539 * structure overhead.
540 */
541 space /= tp->advmss;
542 if (!space)
543 space = 1;
544 rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER);
545 while (tcp_win_from_space(rcvmem) < tp->advmss)
546 rcvmem += 128;
547 space *= rcvmem;
548 space = min(space, sysctl_tcp_rmem[2]);
549 if (space > sk->sk_rcvbuf) {
550 sk->sk_rcvbuf = space;
551
552 /* Make the window clamp follow along. */
553 tp->window_clamp = new_clamp;
554 }
555 } 576 }
556 } 577 }
578 tp->rcvq_space.space = copied;
557 579
558new_measure: 580new_measure:
559 tp->rcvq_space.seq = tp->copied_seq; 581 tp->rcvq_space.seq = tp->copied_seq;
@@ -5674,8 +5696,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5674 tcp_init_congestion_control(sk); 5696 tcp_init_congestion_control(sk);
5675 5697
5676 tcp_mtup_init(sk); 5698 tcp_mtup_init(sk);
5677 tcp_init_buffer_space(sk);
5678 tp->copied_seq = tp->rcv_nxt; 5699 tp->copied_seq = tp->rcv_nxt;
5700 tcp_init_buffer_space(sk);
5679 } 5701 }
5680 smp_mb(); 5702 smp_mb();
5681 tcp_set_state(sk, TCP_ESTABLISHED); 5703 tcp_set_state(sk, TCP_ESTABLISHED);