aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/ipv4/tcp_output.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c113
1 files changed, 69 insertions, 44 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..882e0b0964d0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -55,7 +55,7 @@ int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
55int sysctl_tcp_tso_win_divisor __read_mostly = 3; 55int sysctl_tcp_tso_win_divisor __read_mostly = 3;
56 56
57int sysctl_tcp_mtu_probing __read_mostly = 0; 57int sysctl_tcp_mtu_probing __read_mostly = 0;
58int sysctl_tcp_base_mss __read_mostly = 512; 58int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
59 59
60/* By default, RFC2861 behavior. */ 60/* By default, RFC2861 behavior. */
61int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 61int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
@@ -73,7 +73,7 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
73 tcp_advance_send_head(sk, skb); 73 tcp_advance_send_head(sk, skb);
74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; 74 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
75 75
76 /* Don't override Nagle indefinately with F-RTO */ 76 /* Don't override Nagle indefinitely with F-RTO */
77 if (tp->frto_counter == 2) 77 if (tp->frto_counter == 2)
78 tp->frto_counter = 3; 78 tp->frto_counter = 3;
79 79
@@ -119,9 +119,13 @@ static __u16 tcp_advertise_mss(struct sock *sk)
119 struct dst_entry *dst = __sk_dst_get(sk); 119 struct dst_entry *dst = __sk_dst_get(sk);
120 int mss = tp->advmss; 120 int mss = tp->advmss;
121 121
122 if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) { 122 if (dst) {
123 mss = dst_metric(dst, RTAX_ADVMSS); 123 unsigned int metric = dst_metric_advmss(dst);
124 tp->advmss = mss; 124
125 if (metric < mss) {
126 mss = metric;
127 tp->advmss = mss;
128 }
125 } 129 }
126 130
127 return (__u16)mss; 131 return (__u16)mss;
@@ -224,24 +228,22 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 228 }
225 } 229 }
226 230
227 /* Set initial window to value enough for senders, 231 /* Set initial window to a value enough for senders starting with
228 * following RFC2414. Senders, not following this RFC, 232 * initial congestion window of TCP_DEFAULT_INIT_RCVWND. Place
229 * will be satisfied with 2. 233 * a limit on the initial window when mss is larger than 1460.
230 */ 234 */
231 if (mss > (1 << *rcv_wscale)) { 235 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 236 int init_cwnd = TCP_DEFAULT_INIT_RCVWND;
233 if (mss > 1460 * 3) 237 if (mss > 1460)
234 init_cwnd = 2; 238 init_cwnd =
235 else if (mss > 1460) 239 max_t(u32, (1460 * TCP_DEFAULT_INIT_RCVWND) / mss, 2);
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 240 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 241 * rather than the default from above
239 */ 242 */
240 if (init_rcv_wnd && 243 if (init_rcv_wnd)
241 (*rcv_wnd > init_rcv_wnd * mss)) 244 *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
242 *rcv_wnd = init_rcv_wnd * mss; 245 else
243 else if (*rcv_wnd > init_cwnd * mss) 246 *rcv_wnd = min(*rcv_wnd, init_cwnd * mss);
244 *rcv_wnd = init_cwnd * mss;
245 } 247 }
246 248
247 /* Set the clamp no higher than max representable value */ 249 /* Set the clamp no higher than max representable value */
@@ -392,27 +394,30 @@ struct tcp_out_options {
392 */ 394 */
393static u8 tcp_cookie_size_check(u8 desired) 395static u8 tcp_cookie_size_check(u8 desired)
394{ 396{
395 if (desired > 0) { 397 int cookie_size;
398
399 if (desired > 0)
396 /* previously specified */ 400 /* previously specified */
397 return desired; 401 return desired;
398 } 402
399 if (sysctl_tcp_cookie_size <= 0) { 403 cookie_size = ACCESS_ONCE(sysctl_tcp_cookie_size);
404 if (cookie_size <= 0)
400 /* no default specified */ 405 /* no default specified */
401 return 0; 406 return 0;
402 } 407
403 if (sysctl_tcp_cookie_size <= TCP_COOKIE_MIN) { 408 if (cookie_size <= TCP_COOKIE_MIN)
404 /* value too small, specify minimum */ 409 /* value too small, specify minimum */
405 return TCP_COOKIE_MIN; 410 return TCP_COOKIE_MIN;
406 } 411
407 if (sysctl_tcp_cookie_size >= TCP_COOKIE_MAX) { 412 if (cookie_size >= TCP_COOKIE_MAX)
408 /* value too large, specify maximum */ 413 /* value too large, specify maximum */
409 return TCP_COOKIE_MAX; 414 return TCP_COOKIE_MAX;
410 } 415
411 if (0x1 & sysctl_tcp_cookie_size) { 416 if (cookie_size & 1)
412 /* 8-bit multiple, illegal, fix it */ 417 /* 8-bit multiple, illegal, fix it */
413 return (u8)(sysctl_tcp_cookie_size + 0x1); 418 cookie_size++;
414 } 419
415 return (u8)sysctl_tcp_cookie_size; 420 return (u8)cookie_size;
416} 421}
417 422
418/* Write previously computed TCP options to the packet. 423/* Write previously computed TCP options to the packet.
@@ -828,8 +833,11 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
828 &md5); 833 &md5);
829 tcp_header_size = tcp_options_size + sizeof(struct tcphdr); 834 tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
830 835
831 if (tcp_packets_in_flight(tp) == 0) 836 if (tcp_packets_in_flight(tp) == 0) {
832 tcp_ca_event(sk, CA_EVENT_TX_START); 837 tcp_ca_event(sk, CA_EVENT_TX_START);
838 skb->ooo_okay = 1;
839 } else
840 skb->ooo_okay = 0;
833 841
834 skb_push(skb, tcp_header_size); 842 skb_push(skb, tcp_header_size);
835 skb_reset_transport_header(skb); 843 skb_reset_transport_header(skb);
@@ -891,7 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
891 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, 899 TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
892 tcp_skb_pcount(skb)); 900 tcp_skb_pcount(skb));
893 901
894 err = icsk->icsk_af_ops->queue_xmit(skb); 902 err = icsk->icsk_af_ops->queue_xmit(skb, &inet->cork.fl);
895 if (likely(err <= 0)) 903 if (likely(err <= 0))
896 return err; 904 return err;
897 905
@@ -995,7 +1003,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
995 int nlen; 1003 int nlen;
996 u8 flags; 1004 u8 flags;
997 1005
998 BUG_ON(len > skb->len); 1006 if (WARN_ON(len > skb->len))
1007 return -EINVAL;
999 1008
1000 nsize = skb_headlen(skb) - len; 1009 nsize = skb_headlen(skb) - len;
1001 if (nsize < 0) 1010 if (nsize < 0)
@@ -1342,7 +1351,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp,
1342 return 0; 1351 return 0;
1343} 1352}
1344 1353
1345/* Intialize TSO state of a skb. 1354/* Initialize TSO state of a skb.
1346 * This must be invoked the first time we consider transmitting 1355 * This must be invoked the first time we consider transmitting
1347 * SKB onto the wire. 1356 * SKB onto the wire.
1348 */ 1357 */
@@ -1376,9 +1385,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1385 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1386 unsigned mss_now, int nonagle)
1378{ 1387{
1379 return (skb->len < mss_now && 1388 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1389 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1390 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1391}
1383 1392
1384/* Return non-zero if the Nagle test allows this packet to be 1393/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1458,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1458 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1459 struct sk_buff *skb = tcp_send_head(sk);
1451 1460
1452 return (skb && 1461 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1462 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1463 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1464 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1465}
1457 1466
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1467/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -1519,6 +1528,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1519 struct tcp_sock *tp = tcp_sk(sk); 1528 struct tcp_sock *tp = tcp_sk(sk);
1520 const struct inet_connection_sock *icsk = inet_csk(sk); 1529 const struct inet_connection_sock *icsk = inet_csk(sk);
1521 u32 send_win, cong_win, limit, in_flight; 1530 u32 send_win, cong_win, limit, in_flight;
1531 int win_divisor;
1522 1532
1523 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN) 1533 if (TCP_SKB_CB(skb)->flags & TCPHDR_FIN)
1524 goto send_now; 1534 goto send_now;
@@ -1550,13 +1560,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1550 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) 1560 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1551 goto send_now; 1561 goto send_now;
1552 1562
1553 if (sysctl_tcp_tso_win_divisor) { 1563 win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor);
1564 if (win_divisor) {
1554 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1565 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1555 1566
1556 /* If at least some fraction of a window is available, 1567 /* If at least some fraction of a window is available,
1557 * just use it. 1568 * just use it.
1558 */ 1569 */
1559 chunk /= sysctl_tcp_tso_win_divisor; 1570 chunk /= win_divisor;
1560 if (limit >= chunk) 1571 if (limit >= chunk)
1561 goto send_now; 1572 goto send_now;
1562 } else { 1573 } else {
@@ -2152,7 +2163,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2152 if (!tp->retrans_stamp) 2163 if (!tp->retrans_stamp)
2153 tp->retrans_stamp = TCP_SKB_CB(skb)->when; 2164 tp->retrans_stamp = TCP_SKB_CB(skb)->when;
2154 2165
2155 tp->undo_retrans++; 2166 tp->undo_retrans += tcp_skb_pcount(skb);
2156 2167
2157 /* snd_nxt is stored to detect loss of retransmitted segment, 2168 /* snd_nxt is stored to detect loss of retransmitted segment,
2158 * see tcp_input.c tcp_sacktag_write_queue(). 2169 * see tcp_input.c tcp_sacktag_write_queue().
@@ -2421,7 +2432,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2421 2432
2422 skb_dst_set(skb, dst_clone(dst)); 2433 skb_dst_set(skb, dst_clone(dst));
2423 2434
2424 mss = dst_metric(dst, RTAX_ADVMSS); 2435 mss = dst_metric_advmss(dst);
2425 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) 2436 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
2426 mss = tp->rx_opt.user_mss; 2437 mss = tp->rx_opt.user_mss;
2427 2438
@@ -2429,6 +2440,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2440 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2441 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2442 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2443
2444 /* limit the window selection if the user enforce a smaller rx buffer */
2445 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2446 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2447 req->window_clamp = tcp_full_space(sk);
2448
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2449 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2450 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2451 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2549,12 +2566,17 @@ static void tcp_connect_init(struct sock *sk)
2549 2566
2550 if (!tp->window_clamp) 2567 if (!tp->window_clamp)
2551 tp->window_clamp = dst_metric(dst, RTAX_WINDOW); 2568 tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
2552 tp->advmss = dst_metric(dst, RTAX_ADVMSS); 2569 tp->advmss = dst_metric_advmss(dst);
2553 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) 2570 if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss)
2554 tp->advmss = tp->rx_opt.user_mss; 2571 tp->advmss = tp->rx_opt.user_mss;
2555 2572
2556 tcp_initialize_rcv_mss(sk); 2573 tcp_initialize_rcv_mss(sk);
2557 2574
2575 /* limit the window selection if the user enforce a smaller rx buffer */
2576 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2577 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2578 tp->window_clamp = tcp_full_space(sk);
2579
2558 tcp_select_initial_window(tcp_full_space(sk), 2580 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2581 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2582 &tp->rcv_wnd,
@@ -2587,6 +2609,7 @@ int tcp_connect(struct sock *sk)
2587{ 2609{
2588 struct tcp_sock *tp = tcp_sk(sk); 2610 struct tcp_sock *tp = tcp_sk(sk);
2589 struct sk_buff *buff; 2611 struct sk_buff *buff;
2612 int err;
2590 2613
2591 tcp_connect_init(sk); 2614 tcp_connect_init(sk);
2592 2615
@@ -2609,7 +2632,9 @@ int tcp_connect(struct sock *sk)
2609 sk->sk_wmem_queued += buff->truesize; 2632 sk->sk_wmem_queued += buff->truesize;
2610 sk_mem_charge(sk, buff->truesize); 2633 sk_mem_charge(sk, buff->truesize);
2611 tp->packets_out += tcp_skb_pcount(buff); 2634 tp->packets_out += tcp_skb_pcount(buff);
2612 tcp_transmit_skb(sk, buff, 1, sk->sk_allocation); 2635 err = tcp_transmit_skb(sk, buff, 1, sk->sk_allocation);
2636 if (err == -ECONNREFUSED)
2637 return err;
2613 2638
2614 /* We change tp->snd_nxt after the tcp_transmit_skb() call 2639 /* We change tp->snd_nxt after the tcp_transmit_skb() call
2615 * in order to make this packet get counted in tcpOutSegs. 2640 * in order to make this packet get counted in tcpOutSegs.