aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c106
1 files changed, 65 insertions, 41 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..dfa5beb0c1c8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
55int sysctl_tcp_tso_win_divisor __read_mostly = 3; 55int sysctl_tcp_tso_win_divisor __read_mostly = 3;
56 56
57int sysctl_tcp_mtu_probing __read_mostly = 0; 57int sysctl_tcp_mtu_probing __read_mostly = 0;
58int sysctl_tcp_base_mss __read_mostly = 512; 58int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
59 59
60/* By default, RFC2861 behavior. */ 60/* By default, RFC2861 behavior. */
61int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 61int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
@@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk)
119 struct dst_entry *dst = __sk_dst_get(sk); 119 struct dst_entry *dst = __sk_dst_get(sk);
120 int mss = tp->advmss; 120 int mss = tp->advmss;
121 121
122 if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { 122 if (dst) {
123 mss = dst_metric(dst, RTAX_ADVMSS); 123 unsigned int metric = dst_metric_advmss(dst);
124 tp->advmss = mss; 124
125 if (metric < mss) {
126 mss = metric;
127 tp->advmss = mss;
128 }
125 } 129 }
126 130
127 return (__u16)mss; 131 return (__u16)mss;
@@ -224,24 +228,22 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 228 }
225 } 229 }
226 230
227 /* Set initial window to value enough for senders, 231 /* Set initial window to a value enough for senders starting with
228 * following RFC2414. Senders, not following this RFC, 232 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
229 * will be satisfied with 2. 233 * a limit on the initial window when mss is larger than 1460.
230 */ 234 */
231 if (mss > (1 << *rcv_wscale)) { 235 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 236 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
233 if (mss > 1460 * 3) 237 if (mss > 1460)
234 init_cwnd = 2; 238 init_cwnd =
235 else if (mss > 1460) 239 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 240 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 241 * rather than the default from above
239 */ 242 */
240 if (init_rcv_wnd && 243 if (init_rcv_wnd)
241 (*rcv_wnd > init_rcv_wnd * mss)) 244 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
242 *rcv_wnd = init_rcv_wnd * mss; 245 else
243 else if (*rcv_wnd > init_cwnd * mss) 246 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
244 *rcv_wnd = init_cwnd * mss;
245 } 247 }
246 248
247 /* Set the clamp no higher than max representable value */ 249 /* Set the clamp no higher than max representable value */
@@ -392,27 +394,30 @@ struct tcp_out_options {
392 */ 394 */
393static u8 tcp_cookie_size_check(u8 desired) 395static u8 tcp_cookie_size_check(u8 desired)
394{ 396{
395 if (desired > 0) { 397 int cookie_size;
398
399 if (desired > 0)
396 /* previously specified */ 400 /* previously specified */
397 return desired; 401 return desired;
398 } 402
399 if (sysctl_tcp_cookie_size <= 0) { 403 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
404 if (cookie_size <= 0)
400 /* no default specified */ 405 /* no default specified */
401 return 0; 406 return 0;
402 } 407
403 if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { 408 if (cookie_size <= TCP_COOKIE_MIN)
404 /* value too small, specify minimum */ 409 /* value too small, specify minimum */
405 return TCP_COOKIE_MIN; 410 return TCP_COOKIE_MIN;
406 } 411
407 if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { 412 if (cookie_size >= TCP_COOKIE_MAX)
408 /* value too large, specify maximum */ 413 /* value too large, specify maximum */
409 return TCP_COOKIE_MAX; 414 return TCP_COOKIE_MAX;
410 } 415
411 if (0x1 & sysctl_tcp_cookie_size) { 416 if (cookie_size & 1)
412 /* 8-bit multiple, illegal, fix it */ 417 /* 8-bit multiple, illegal, fix it */
413 return (u8)(sysctl_tcp_cookie_size + 0x1); 418 cookie_size++;
414 } 419
415 return (u8)sysctl_tcp_cookie_size; 420 return (u8)cookie_size;
416} 421}
417 422
418/* Write previously computed TCP options to the packet. 423/* Write previously computed TCP options to the packet.
@@ -828,8 +833,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
828 &md5); 833 &md5);
829 tcp_header_size = tcp_options_size + sizeof(struct tcphdr); 834 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
830 835
831 if (tcp_packets_in_flight(tp) == 0) 836 if (tcp_packets_in_flight(tp) == 0) {
832 tcp_ca_event(sk, CA_EVENT_TX_START); 837 tcp_ca_event(sk, CA_EVENT_TX_START);
838 skb->ooo_okay = 1;
839 } else
840 skb->ooo_okay = 0;
833 841
834 skb_push(skb, tcp_header_size); 842 skb_push(skb, tcp_header_size);
835 skb_reset_transport_header(skb); 843 skb_reset_transport_header(skb);
@@ -1342,7 +1350,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1342 return 0; 1350 return 0;
1343} 1351}
1344 1352
1345/* Intialize TSO state of a skb. 1353/* Initialize TSO state of a skb.
1346 * This must be invoked the first time we consider transmitting 1354 * This must be invoked the first time we consider transmitting
1347 * SKB onto the wire. 1355 * SKB onto the wire.
1348 */ 1356 */
@@ -1376,9 +1384,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1384 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1385 unsigned mss_now, int nonagle)
1378{ 1386{
1379 return (skb->len < mss_now && 1387 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1388 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1389 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1390}
1383 1391
1384/* Return non-zero if the Nagle test allows this packet to be 1392/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1457,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1457 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1458 struct sk_buff *skb = tcp_send_head(sk);
1451 1459
1452 return (skb && 1460 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1461 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1462 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1463 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1464}
1457 1465
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1466/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -1519,6 +1527,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1519 struct tcp_sock *tp = tcp_sk(sk); 1527 struct tcp_sock *tp = tcp_sk(sk);
1520 const struct inet_connection_sock *icsk = inet_csk(sk); 1528 const struct inet_connection_sock *icsk = inet_csk(sk);
1521 u32 send_win, cong_win, limit, in_flight; 1529 u32 send_win, cong_win, limit, in_flight;
1530 int win_divisor;
1522 1531
1523 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) 1532 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1524 goto send_now; 1533 goto send_now;
@@ -1550,13 +1559,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1550 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) 1559 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1551 goto send_now; 1560 goto send_now;
1552 1561
1553 if (sysctl_tcp_tso_win_divisor) { 1562 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1563 if (win_divisor) {
1554 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1564 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1555 1565
1556 /* If at least some fraction of a window is available, 1566 /* If at least some fraction of a window is available,
1557 * just use it. 1567 * just use it.
1558 */ 1568 */
1559 chunk /= sysctl_tcp_tso_win_divisor; 1569 chunk /= win_divisor;
1560 if (limit >= chunk) 1570 if (limit >= chunk)
1561 goto send_now; 1571 goto send_now;
1562 } else { 1572 } else {
@@ -2152,7 +2162,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2152 if (!tp->retrans_stamp) 2162 if (!tp->retrans_stamp)
2153 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2163 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2154 2164
2155 tp->undo_retrans++; 2165 tp->undo_retrans += tcp_skb_pcount(skb);
2156 2166
2157 /* snd_nxt is stored to detect loss of retransmitted segment, 2167 /* snd_nxt is stored to detect loss of retransmitted segment,
2158 * see tcp_input.c tcp_sacktag_write_queue(). 2168 * see tcp_input.c tcp_sacktag_write_queue().
@@ -2421,7 +2431,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2421 2431
2422 skb_dst_set(skb, dst_clone(dst)); 2432 skb_dst_set(skb, dst_clone(dst));
2423 2433
2424 mss = dst_metric(dst, RTAX_ADVMSS); 2434 mss = dst_metric_advmss(dst);
2425 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2435 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2426 mss = tp->rx_opt.user_mss; 2436 mss = tp->rx_opt.user_mss;
2427 2437
@@ -2429,6 +2439,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2439 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2440 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2441 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2442
2443 /* limit the window selection if the user enforce a smaller rx buffer */
2444 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2445 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2446 req->window_clamp = tcp_full_space(sk);
2447
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2448 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2449 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2450 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2549,12 +2565,17 @@ static void tcp_connect_init(struct sock *sk)
2549 2565
2550 if (!tp->window_clamp) 2566 if (!tp->window_clamp)
2551 tp->window_clamp = dst_metric(dst, RTAX_WINDOW); 2567 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2552 tp->advmss = dst_metric(dst, RTAX_ADVMSS); 2568 tp->advmss = dst_metric_advmss(dst);
2553 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) 2569 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2554 tp->advmss = tp->rx_opt.user_mss; 2570 tp->advmss = tp->rx_opt.user_mss;
2555 2571
2556 tcp_initialize_rcv_mss(sk); 2572 tcp_initialize_rcv_mss(sk);
2557 2573
2574 /* limit the window selection if the user enforce a smaller rx buffer */
2575 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2576 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2577 tp->window_clamp = tcp_full_space(sk);
2578
2558 tcp_select_initial_window(tcp_full_space(sk), 2579 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2580 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2581 &tp->rcv_wnd,
@@ -2587,6 +2608,7 @@ int tcp_connect(struct sock *sk)
2587{ 2608{
2588 struct tcp_sock *tp = tcp_sk(sk); 2609 struct tcp_sock *tp = tcp_sk(sk);
2589 struct sk_buff *buff; 2610 struct sk_buff *buff;
2611 int err;
2590 2612
2591 tcp_connect_init(sk); 2613 tcp_connect_init(sk);
2592 2614
@@ -2609,7 +2631,9 @@ int tcp_connect(struct sock *sk)
2609 sk->sk_wmem_queued += buff->truesize; 2631 sk->sk_wmem_queued += buff->truesize;
2610 sk_mem_charge(sk, buff->truesize); 2632 sk_mem_charge(sk, buff->truesize);
2611 tp->packets_out += tcp_skb_pcount(buff); 2633 tp->packets_out += tcp_skb_pcount(buff);
2612 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2634 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2635 if (err == -ECONNREFUSED)
2636 return err;
2613 2637
2614 /* We change tp->snd_nxt after the tcp_transmit_skb() call 2638 /* We change tp->snd_nxt after the tcp_transmit_skb() call
2615 * in order to make this packet get counted in tcpOutSegs. 2639 * in order to make this packet get counted in tcpOutSegs.