diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 106 |
1 files changed, 65 insertions, 41 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index de3bd8458588..dfa5beb0c1c8 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0; | |||
| 55 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; | 55 | int sysctl_tcp_tso_win_divisor __read_mostly = 3; |
| 56 | 56 | ||
| 57 | int sysctl_tcp_mtu_probing __read_mostly = 0; | 57 | int sysctl_tcp_mtu_probing __read_mostly = 0; |
| 58 | int sysctl_tcp_base_mss __read_mostly = 512; | 58 | int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; |
| 59 | 59 | ||
| 60 | /* By default, RFC2861 behavior. */ | 60 | /* By default, RFC2861 behavior. */ |
| 61 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 61 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
| @@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk) | |||
| 119 | struct dst_entry *dst = __sk_dst_get(sk); | 119 | struct dst_entry *dst = __sk_dst_get(sk); |
| 120 | int mss = tp->advmss; | 120 | int mss = tp->advmss; |
| 121 | 121 | ||
| 122 | if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { | 122 | if (dst) { |
| 123 | mss = dst_metric(dst, RTAX_ADVMSS); | 123 | unsigned int metric = dst_metric_advmss(dst); |
| 124 | tp->advmss = mss; | 124 | |
| 125 | if (metric < mss) { | ||
| 126 | mss = metric; | ||
| 127 | tp->advmss = mss; | ||
| 128 | } | ||
| 125 | } | 129 | } |
| 126 | 130 | ||
| 127 | return (__u16)mss; | 131 | return (__u16)mss; |
| @@ -224,24 +228,22 @@ void tcp_select_initial_window(int __space, __u32 mss, | |||
| 224 | } | 228 | } |
| 225 | } | 229 | } |
| 226 | 230 | ||
| 227 | /* Set initial window to value enough for senders, | 231 | /* Set initial window to a value enough for senders starting with |
| 228 | * following RFC2414. Senders, not following this RFC, | 232 | * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place |
| 229 | * will be satisfied with 2. | 233 | * a limit on the initial window when mss is larger than 1460. |
| 230 | */ | 234 | */ |
| 231 | if (mss > (1 << *rcv_wscale)) { | 235 | if (mss > (1 << *rcv_wscale)) { |
| 232 | int init_cwnd = 4; | 236 | int init_cwnd = TCP_DEFAULT_INIT_RCVWND; |
| 233 | if (mss > 1460 * 3) | 237 | if (mss > 1460) |
| 234 | init_cwnd = 2; | 238 | init_cwnd = |
| 235 | else if (mss > 1460) | 239 | max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2); |
| 236 | init_cwnd = 3; | ||
| 237 | /* when initializing use the value from init_rcv_wnd | 240 | /* when initializing use the value from init_rcv_wnd |
| 238 | * rather than the default from above | 241 | * rather than the default from above |
| 239 | */ | 242 | */ |
| 240 | if (init_rcv_wnd && | 243 | if (init_rcv_wnd) |
| 241 | (*rcv_wnd > init_rcv_wnd * mss)) | 244 | *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); |
| 242 | *rcv_wnd = init_rcv_wnd * mss; | 245 | else |
| 243 | else if (*rcv_wnd > init_cwnd * mss) | 246 | *rcv_wnd = min(*rcv_wnd, init_cwnd * mss); |
| 244 | *rcv_wnd = init_cwnd * mss; | ||
| 245 | } | 247 | } |
| 246 | 248 | ||
| 247 | /* Set the clamp no higher than max representable value */ | 249 | /* Set the clamp no higher than max representable value */ |
| @@ -392,27 +394,30 @@ struct tcp_out_options { | |||
| 392 | */ | 394 | */ |
| 393 | static u8 tcp_cookie_size_check(u8 desired) | 395 | static u8 tcp_cookie_size_check(u8 desired) |
| 394 | { | 396 | { |
| 395 | if (desired > 0) { | 397 | int cookie_size; |
| 398 | |||
| 399 | if (desired > 0) | ||
| 396 | /* previously specified */ | 400 | /* previously specified */ |
| 397 | return desired; | 401 | return desired; |
| 398 | } | 402 | |
| 399 | if (sysctl_tcp_cookie_size <= 0) { | 403 | cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size); |
| 404 | if (cookie_size <= 0) | ||
| 400 | /* no default specified */ | 405 | /* no default specified */ |
| 401 | return 0; | 406 | return 0; |
| 402 | } | 407 | |
| 403 | if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { | 408 | if (cookie_size <= TCP_COOKIE_MIN) |
| 404 | /* value too small, specify minimum */ | 409 | /* value too small, specify minimum */ |
| 405 | return TCP_COOKIE_MIN; | 410 | return TCP_COOKIE_MIN; |
| 406 | } | 411 | |
| 407 | if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { | 412 | if (cookie_size >= TCP_COOKIE_MAX) |
| 408 | /* value too large, specify maximum */ | 413 | /* value too large, specify maximum */ |
| 409 | return TCP_COOKIE_MAX; | 414 | return TCP_COOKIE_MAX; |
| 410 | } | 415 | |
| 411 | if (0x1 & sysctl_tcp_cookie_size) { | 416 | if (cookie_size & 1) |
| 412 | /* 8-bit multiple, illegal, fix it */ | 417 | /* 8-bit multiple, illegal, fix it */ |
| 413 | return (u8)(sysctl_tcp_cookie_size + 0x1); | 418 | cookie_size++; |
| 414 | } | 419 | |
| 415 | return (u8)sysctl_tcp_cookie_size; | 420 | return (u8)cookie_size; |
| 416 | } | 421 | } |
| 417 | 422 | ||
| 418 | /* Write previously computed TCP options to the packet. | 423 | /* Write previously computed TCP options to the packet. |
| @@ -828,8 +833,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 828 | &md5); | 833 | &md5); |
| 829 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); | 834 | tcp_header_size = tcp_options_size + sizeof(struct tcphdr); |
| 830 | 835 | ||
| 831 | if (tcp_packets_in_flight(tp) == 0) | 836 | if (tcp_packets_in_flight(tp) == 0) { |
| 832 | tcp_ca_event(sk, CA_EVENT_TX_START); | 837 | tcp_ca_event(sk, CA_EVENT_TX_START); |
| 838 | skb->ooo_okay = 1; | ||
| 839 | } else | ||
| 840 | skb->ooo_okay = 0; | ||
| 833 | 841 | ||
| 834 | skb_push(skb, tcp_header_size); | 842 | skb_push(skb, tcp_header_size); |
| 835 | skb_reset_transport_header(skb); | 843 | skb_reset_transport_header(skb); |
| @@ -1342,7 +1350,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, | |||
| 1342 | return 0; | 1350 | return 0; |
| 1343 | } | 1351 | } |
| 1344 | 1352 | ||
| 1345 | /* Intialize TSO state of a skb. | 1353 | /* Initialize TSO state of a skb. |
| 1346 | * This must be invoked the first time we consider transmitting | 1354 | * This must be invoked the first time we consider transmitting |
| 1347 | * SKB onto the wire. | 1355 | * SKB onto the wire. |
| 1348 | */ | 1356 | */ |
| @@ -1376,9 +1384,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, | |||
| 1376 | const struct sk_buff *skb, | 1384 | const struct sk_buff *skb, |
| 1377 | unsigned mss_now, int nonagle) | 1385 | unsigned mss_now, int nonagle) |
| 1378 | { | 1386 | { |
| 1379 | return (skb->len < mss_now && | 1387 | return skb->len < mss_now && |
| 1380 | ((nonagle & TCP_NAGLE_CORK) || | 1388 | ((nonagle & TCP_NAGLE_CORK) || |
| 1381 | (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); | 1389 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); |
| 1382 | } | 1390 | } |
| 1383 | 1391 | ||
| 1384 | /* Return non-zero if the Nagle test allows this packet to be | 1392 | /* Return non-zero if the Nagle test allows this packet to be |
| @@ -1449,10 +1457,10 @@ int tcp_may_send_now(struct sock *sk) | |||
| 1449 | struct tcp_sock *tp = tcp_sk(sk); | 1457 | struct tcp_sock *tp = tcp_sk(sk); |
| 1450 | struct sk_buff *skb = tcp_send_head(sk); | 1458 | struct sk_buff *skb = tcp_send_head(sk); |
| 1451 | 1459 | ||
| 1452 | return (skb && | 1460 | return skb && |
| 1453 | tcp_snd_test(sk, skb, tcp_current_mss(sk), | 1461 | tcp_snd_test(sk, skb, tcp_current_mss(sk), |
| 1454 | (tcp_skb_is_last(sk, skb) ? | 1462 | (tcp_skb_is_last(sk, skb) ? |
| 1455 | tp->nonagle : TCP_NAGLE_PUSH))); | 1463 | tp->nonagle : TCP_NAGLE_PUSH)); |
| 1456 | } | 1464 | } |
| 1457 | 1465 | ||
| 1458 | /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet | 1466 | /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet |
| @@ -1519,6 +1527,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1519 | struct tcp_sock *tp = tcp_sk(sk); | 1527 | struct tcp_sock *tp = tcp_sk(sk); |
| 1520 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1528 | const struct inet_connection_sock *icsk = inet_csk(sk); |
| 1521 | u32 send_win, cong_win, limit, in_flight; | 1529 | u32 send_win, cong_win, limit, in_flight; |
| 1530 | int win_divisor; | ||
| 1522 | 1531 | ||
| 1523 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) | 1532 | if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) |
| 1524 | goto send_now; | 1533 | goto send_now; |
| @@ -1550,13 +1559,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1550 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) | 1559 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) |
| 1551 | goto send_now; | 1560 | goto send_now; |
| 1552 | 1561 | ||
| 1553 | if (sysctl_tcp_tso_win_divisor) { | 1562 | win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); |
| 1563 | if (win_divisor) { | ||
| 1554 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); | 1564 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); |
| 1555 | 1565 | ||
| 1556 | /* If at least some fraction of a window is available, | 1566 | /* If at least some fraction of a window is available, |
| 1557 | * just use it. | 1567 | * just use it. |
| 1558 | */ | 1568 | */ |
| 1559 | chunk /= sysctl_tcp_tso_win_divisor; | 1569 | chunk /= win_divisor; |
| 1560 | if (limit >= chunk) | 1570 | if (limit >= chunk) |
| 1561 | goto send_now; | 1571 | goto send_now; |
| 1562 | } else { | 1572 | } else { |
| @@ -2152,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2152 | if (!tp->retrans_stamp) | 2162 | if (!tp->retrans_stamp) |
| 2153 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; | 2163 | tp->retrans_stamp = TCP_SKB_CB(skb)->when; |
| 2154 | 2164 | ||
| 2155 | tp->undo_retrans++; | 2165 | tp->undo_retrans += tcp_skb_pcount(skb); |
| 2156 | 2166 | ||
| 2157 | /* snd_nxt is stored to detect loss of retransmitted segment, | 2167 | /* snd_nxt is stored to detect loss of retransmitted segment, |
| 2158 | * see tcp_input.c tcp_sacktag_write_queue(). | 2168 | * see tcp_input.c tcp_sacktag_write_queue(). |
| @@ -2421,7 +2431,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2421 | 2431 | ||
| 2422 | skb_dst_set(skb, dst_clone(dst)); | 2432 | skb_dst_set(skb, dst_clone(dst)); |
| 2423 | 2433 | ||
| 2424 | mss = dst_metric(dst, RTAX_ADVMSS); | 2434 | mss = dst_metric_advmss(dst); |
| 2425 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) | 2435 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) |
| 2426 | mss = tp->rx_opt.user_mss; | 2436 | mss = tp->rx_opt.user_mss; |
| 2427 | 2437 | ||
| @@ -2429,6 +2439,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2429 | __u8 rcv_wscale; | 2439 | __u8 rcv_wscale; |
| 2430 | /* Set this up on the first call only */ | 2440 | /* Set this up on the first call only */ |
| 2431 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); | 2441 | req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); |
| 2442 | |||
| 2443 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
| 2444 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
| 2445 | (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) | ||
| 2446 | req->window_clamp = tcp_full_space(sk); | ||
| 2447 | |||
| 2432 | /* tcp_full_space because it is guaranteed to be the first packet */ | 2448 | /* tcp_full_space because it is guaranteed to be the first packet */ |
| 2433 | tcp_select_initial_window(tcp_full_space(sk), | 2449 | tcp_select_initial_window(tcp_full_space(sk), |
| 2434 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), | 2450 | mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), |
| @@ -2549,12 +2565,17 @@ static void tcp_connect_init(struct sock *sk) | |||
| 2549 | 2565 | ||
| 2550 | if (!tp->window_clamp) | 2566 | if (!tp->window_clamp) |
| 2551 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); | 2567 | tp->window_clamp = dst_metric(dst, RTAX_WINDOW); |
| 2552 | tp->advmss = dst_metric(dst, RTAX_ADVMSS); | 2568 | tp->advmss = dst_metric_advmss(dst); |
| 2553 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) | 2569 | if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) |
| 2554 | tp->advmss = tp->rx_opt.user_mss; | 2570 | tp->advmss = tp->rx_opt.user_mss; |
| 2555 | 2571 | ||
| 2556 | tcp_initialize_rcv_mss(sk); | 2572 | tcp_initialize_rcv_mss(sk); |
| 2557 | 2573 | ||
| 2574 | /* limit the window selection if the user enforce a smaller rx buffer */ | ||
| 2575 | if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && | ||
| 2576 | (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) | ||
| 2577 | tp->window_clamp = tcp_full_space(sk); | ||
| 2578 | |||
| 2558 | tcp_select_initial_window(tcp_full_space(sk), | 2579 | tcp_select_initial_window(tcp_full_space(sk), |
| 2559 | tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), | 2580 | tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), |
| 2560 | &tp->rcv_wnd, | 2581 | &tp->rcv_wnd, |
| @@ -2587,6 +2608,7 @@ int tcp_connect(struct sock *sk) | |||
| 2587 | { | 2608 | { |
| 2588 | struct tcp_sock *tp = tcp_sk(sk); | 2609 | struct tcp_sock *tp = tcp_sk(sk); |
| 2589 | struct sk_buff *buff; | 2610 | struct sk_buff *buff; |
| 2611 | int err; | ||
| 2590 | 2612 | ||
| 2591 | tcp_connect_init(sk); | 2613 | tcp_connect_init(sk); |
| 2592 | 2614 | ||
| @@ -2609,7 +2631,9 @@ int tcp_connect(struct sock *sk) | |||
| 2609 | sk->sk_wmem_queued += buff->truesize; | 2631 | sk->sk_wmem_queued += buff->truesize; |
| 2610 | sk_mem_charge(sk, buff->truesize); | 2632 | sk_mem_charge(sk, buff->truesize); |
| 2611 | tp->packets_out += tcp_skb_pcount(buff); | 2633 | tp->packets_out += tcp_skb_pcount(buff); |
| 2612 | tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); | 2634 | err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); |
| 2635 | if (err == -ECONNREFUSED) | ||
| 2636 | return err; | ||
| 2613 | 2637 | ||
| 2614 | /* We change tp->snd_nxt after the tcp_transmit_skb() call | 2638 | /* We change tp->snd_nxt after the tcp_transmit_skb() call |
| 2615 | * in order to make this packet get counted in tcpOutSegs. | 2639 | * in order to make this packet get counted in tcpOutSegs. |
