diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
| -rw-r--r-- | net/ipv4/tcp_output.c | 69 |
1 files changed, 33 insertions, 36 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 170737a9d56d..672854664ff5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
| @@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS; | |||
| 65 | /* By default, RFC2861 behavior. */ | 65 | /* By default, RFC2861 behavior. */ |
| 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; | 66 | int sysctl_tcp_slow_start_after_idle __read_mostly = 1; |
| 67 | 67 | ||
| 68 | unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX; | ||
| 69 | EXPORT_SYMBOL(sysctl_tcp_notsent_lowat); | ||
| 70 | |||
| 68 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | 71 | static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, |
| 69 | int push_one, gfp_t gfp); | 72 | int push_one, gfp_t gfp); |
| 70 | 73 | ||
| @@ -634,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
| 634 | unsigned int size = 0; | 637 | unsigned int size = 0; |
| 635 | unsigned int eff_sacks; | 638 | unsigned int eff_sacks; |
| 636 | 639 | ||
| 640 | opts->options = 0; | ||
| 641 | |||
| 637 | #ifdef CONFIG_TCP_MD5SIG | 642 | #ifdef CONFIG_TCP_MD5SIG |
| 638 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 643 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
| 639 | if (unlikely(*md5)) { | 644 | if (unlikely(*md5)) { |
| @@ -845,15 +850,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 845 | 850 | ||
| 846 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 851 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
| 847 | 852 | ||
| 848 | /* If congestion control is doing timestamping, we must | 853 | if (clone_it) { |
| 849 | * take such a timestamp before we potentially clone/copy. | ||
| 850 | */ | ||
| 851 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) | ||
| 852 | __net_timestamp(skb); | ||
| 853 | |||
| 854 | if (likely(clone_it)) { | ||
| 855 | const struct sk_buff *fclone = skb + 1; | 854 | const struct sk_buff *fclone = skb + 1; |
| 856 | 855 | ||
| 856 | /* If congestion control is doing timestamping, we must | ||
| 857 | * take such a timestamp before we potentially clone/copy. | ||
| 858 | */ | ||
| 859 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) | ||
| 860 | __net_timestamp(skb); | ||
| 861 | |||
| 857 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 862 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
| 858 | fclone->fclone == SKB_FCLONE_CLONE)) | 863 | fclone->fclone == SKB_FCLONE_CLONE)) |
| 859 | NET_INC_STATS_BH(sock_net(sk), | 864 | NET_INC_STATS_BH(sock_net(sk), |
| @@ -892,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
| 892 | 897 | ||
| 893 | skb_orphan(skb); | 898 | skb_orphan(skb); |
| 894 | skb->sk = sk; | 899 | skb->sk = sk; |
| 895 | skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? | 900 | skb->destructor = tcp_wfree; |
| 896 | tcp_wfree : sock_wfree; | ||
| 897 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 901 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
| 898 | 902 | ||
| 899 | /* Build TCP header and checksum it. */ | 903 | /* Build TCP header and checksum it. */ |
| @@ -982,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
| 982 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | 986 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, |
| 983 | unsigned int mss_now) | 987 | unsigned int mss_now) |
| 984 | { | 988 | { |
| 985 | if (skb->len <= mss_now || !sk_can_gso(sk) || | 989 | /* Make sure we own this skb before messing gso_size/gso_segs */ |
| 986 | skb->ip_summed == CHECKSUM_NONE) { | 990 | WARN_ON_ONCE(skb_cloned(skb)); |
| 991 | |||
| 992 | if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { | ||
| 987 | /* Avoid the costly divide in the normal | 993 | /* Avoid the costly divide in the normal |
| 988 | * non-TSO case. | 994 | * non-TSO case. |
| 989 | */ | 995 | */ |
| @@ -1063,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
| 1063 | if (nsize < 0) | 1069 | if (nsize < 0) |
| 1064 | nsize = 0; | 1070 | nsize = 0; |
| 1065 | 1071 | ||
| 1066 | if (skb_cloned(skb) && | 1072 | if (skb_unclone(skb, GFP_ATOMIC)) |
| 1067 | skb_is_nonlinear(skb) && | ||
| 1068 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | ||
| 1069 | return -ENOMEM; | 1073 | return -ENOMEM; |
| 1070 | 1074 | ||
| 1071 | /* Get a new skb... force flag on. */ | 1075 | /* Get a new skb... force flag on. */ |
| @@ -1628,7 +1632,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
| 1628 | 1632 | ||
| 1629 | /* If a full-sized TSO skb can be sent, do it. */ | 1633 | /* If a full-sized TSO skb can be sent, do it. */ |
| 1630 | if (limit >= min_t(unsigned int, sk->sk_gso_max_size, | 1634 | if (limit >= min_t(unsigned int, sk->sk_gso_max_size, |
| 1631 | sk->sk_gso_max_segs * tp->mss_cache)) | 1635 | tp->xmit_size_goal_segs * tp->mss_cache)) |
| 1632 | goto send_now; | 1636 | goto send_now; |
| 1633 | 1637 | ||
| 1634 | /* Middle in queue won't get any more data, full sendable already? */ | 1638 | /* Middle in queue won't get any more data, full sendable already? */ |
| @@ -1837,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1837 | while ((skb = tcp_send_head(sk))) { | 1841 | while ((skb = tcp_send_head(sk))) { |
| 1838 | unsigned int limit; | 1842 | unsigned int limit; |
| 1839 | 1843 | ||
| 1840 | |||
| 1841 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1844 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
| 1842 | BUG_ON(!tso_segs); | 1845 | BUG_ON(!tso_segs); |
| 1843 | 1846 | ||
| @@ -1866,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
| 1866 | break; | 1869 | break; |
| 1867 | } | 1870 | } |
| 1868 | 1871 | ||
| 1869 | /* TSQ : sk_wmem_alloc accounts skb truesize, | 1872 | /* TCP Small Queues : |
| 1870 | * including skb overhead. But thats OK. | 1873 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
| 1874 | * This allows for : | ||
| 1875 | * - better RTT estimation and ACK scheduling | ||
| 1876 | * - faster recovery | ||
| 1877 | * - high rates | ||
| 1871 | */ | 1878 | */ |
| 1872 | if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { | 1879 | limit = max(skb->truesize, sk->sk_pacing_rate >> 10); |
| 1880 | |||
| 1881 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | ||
| 1873 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1882 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
| 1874 | break; | 1883 | break; |
| 1875 | } | 1884 | } |
| 1885 | |||
| 1876 | limit = mss_now; | 1886 | limit = mss_now; |
| 1877 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 1887 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
| 1878 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1888 | limit = tcp_mss_split_point(sk, skb, mss_now, |
| @@ -2334,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2334 | int oldpcount = tcp_skb_pcount(skb); | 2344 | int oldpcount = tcp_skb_pcount(skb); |
| 2335 | 2345 | ||
| 2336 | if (unlikely(oldpcount > 1)) { | 2346 | if (unlikely(oldpcount > 1)) { |
| 2347 | if (skb_unclone(skb, GFP_ATOMIC)) | ||
| 2348 | return -ENOMEM; | ||
| 2337 | tcp_init_tso_segs(sk, skb, cur_mss); | 2349 | tcp_init_tso_segs(sk, skb, cur_mss); |
| 2338 | tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); | 2350 | tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); |
| 2339 | } | 2351 | } |
| @@ -2341,21 +2353,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
| 2341 | 2353 | ||
| 2342 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 2354 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
| 2343 | 2355 | ||
| 2344 | /* Some Solaris stacks overoptimize and ignore the FIN on a | ||
| 2345 | * retransmit when old data is attached. So strip it off | ||
| 2346 | * since it is cheap to do so and saves bytes on the network. | ||
| 2347 | */ | ||
| 2348 | if (skb->len > 0 && | ||
| 2349 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && | ||
| 2350 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { | ||
| 2351 | if (!pskb_trim(skb, 0)) { | ||
| 2352 | /* Reuse, even though it does some unnecessary work */ | ||
| 2353 | tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, | ||
| 2354 | TCP_SKB_CB(skb)->tcp_flags); | ||
| 2355 | skb->ip_summed = CHECKSUM_NONE; | ||
| 2356 | } | ||
| 2357 | } | ||
| 2358 | |||
| 2359 | /* Make a copy, if the first transmission SKB clone we made | 2356 | /* Make a copy, if the first transmission SKB clone we made |
| 2360 | * is still in somebody's hands, else make a clone. | 2357 | * is still in somebody's hands, else make a clone. |
| 2361 | */ | 2358 | */ |
| @@ -2724,8 +2721,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
| 2724 | th->syn = 1; | 2721 | th->syn = 1; |
| 2725 | th->ack = 1; | 2722 | th->ack = 1; |
| 2726 | TCP_ECN_make_synack(req, th); | 2723 | TCP_ECN_make_synack(req, th); |
| 2727 | th->source = ireq->loc_port; | 2724 | th->source = htons(ireq->ir_num); |
| 2728 | th->dest = ireq->rmt_port; | 2725 | th->dest = ireq->ir_rmt_port; |
| 2729 | /* Setting of flags are superfluous here for callers (and ECE is | 2726 | /* Setting of flags are superfluous here for callers (and ECE is |
| 2730 | * not even correctly set) | 2727 | * not even correctly set) |
| 2731 | */ | 2728 | */ |
