diff options
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 64 |
1 files changed, 29 insertions, 35 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7c83cb8bf137..672854664ff5 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -637,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb | |||
637 | unsigned int size = 0; | 637 | unsigned int size = 0; |
638 | unsigned int eff_sacks; | 638 | unsigned int eff_sacks; |
639 | 639 | ||
640 | opts->options = 0; | ||
641 | |||
640 | #ifdef CONFIG_TCP_MD5SIG | 642 | #ifdef CONFIG_TCP_MD5SIG |
641 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 643 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
642 | if (unlikely(*md5)) { | 644 | if (unlikely(*md5)) { |
@@ -848,15 +850,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
848 | 850 | ||
849 | BUG_ON(!skb || !tcp_skb_pcount(skb)); | 851 | BUG_ON(!skb || !tcp_skb_pcount(skb)); |
850 | 852 | ||
851 | /* If congestion control is doing timestamping, we must | 853 | if (clone_it) { |
852 | * take such a timestamp before we potentially clone/copy. | ||
853 | */ | ||
854 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) | ||
855 | __net_timestamp(skb); | ||
856 | |||
857 | if (likely(clone_it)) { | ||
858 | const struct sk_buff *fclone = skb + 1; | 854 | const struct sk_buff *fclone = skb + 1; |
859 | 855 | ||
856 | /* If congestion control is doing timestamping, we must | ||
857 | * take such a timestamp before we potentially clone/copy. | ||
858 | */ | ||
859 | if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP) | ||
860 | __net_timestamp(skb); | ||
861 | |||
860 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && | 862 | if (unlikely(skb->fclone == SKB_FCLONE_ORIG && |
861 | fclone->fclone == SKB_FCLONE_CLONE)) | 863 | fclone->fclone == SKB_FCLONE_CLONE)) |
862 | NET_INC_STATS_BH(sock_net(sk), | 864 | NET_INC_STATS_BH(sock_net(sk), |
@@ -895,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
895 | 897 | ||
896 | skb_orphan(skb); | 898 | skb_orphan(skb); |
897 | skb->sk = sk; | 899 | skb->sk = sk; |
898 | skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? | 900 | skb->destructor = tcp_wfree; |
899 | tcp_wfree : sock_wfree; | ||
900 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); | 901 | atomic_add(skb->truesize, &sk->sk_wmem_alloc); |
901 | 902 | ||
902 | /* Build TCP header and checksum it. */ | 903 | /* Build TCP header and checksum it. */ |
@@ -985,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) | |||
985 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, | 986 | static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, |
986 | unsigned int mss_now) | 987 | unsigned int mss_now) |
987 | { | 988 | { |
988 | if (skb->len <= mss_now || !sk_can_gso(sk) || | 989 | /* Make sure we own this skb before messing gso_size/gso_segs */ |
989 | skb->ip_summed == CHECKSUM_NONE) { | 990 | WARN_ON_ONCE(skb_cloned(skb)); |
991 | |||
992 | if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) { | ||
990 | /* Avoid the costly divide in the normal | 993 | /* Avoid the costly divide in the normal |
991 | * non-TSO case. | 994 | * non-TSO case. |
992 | */ | 995 | */ |
@@ -1066,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
1066 | if (nsize < 0) | 1069 | if (nsize < 0) |
1067 | nsize = 0; | 1070 | nsize = 0; |
1068 | 1071 | ||
1069 | if (skb_cloned(skb) && | 1072 | if (skb_unclone(skb, GFP_ATOMIC)) |
1070 | skb_is_nonlinear(skb) && | ||
1071 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | ||
1072 | return -ENOMEM; | 1073 | return -ENOMEM; |
1073 | 1074 | ||
1074 | /* Get a new skb... force flag on. */ | 1075 | /* Get a new skb... force flag on. */ |
@@ -1840,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1840 | while ((skb = tcp_send_head(sk))) { | 1841 | while ((skb = tcp_send_head(sk))) { |
1841 | unsigned int limit; | 1842 | unsigned int limit; |
1842 | 1843 | ||
1843 | |||
1844 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); | 1844 | tso_segs = tcp_init_tso_segs(sk, skb, mss_now); |
1845 | BUG_ON(!tso_segs); | 1845 | BUG_ON(!tso_segs); |
1846 | 1846 | ||
@@ -1869,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1869 | break; | 1869 | break; |
1870 | } | 1870 | } |
1871 | 1871 | ||
1872 | /* TSQ : sk_wmem_alloc accounts skb truesize, | 1872 | /* TCP Small Queues : |
1873 | * including skb overhead. But thats OK. | 1873 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
1874 | * This allows for : | ||
1875 | * - better RTT estimation and ACK scheduling | ||
1876 | * - faster recovery | ||
1877 | * - high rates | ||
1874 | */ | 1878 | */ |
1875 | if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { | 1879 | limit = max(skb->truesize, sk->sk_pacing_rate >> 10); |
1880 | |||
1881 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | ||
1876 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 1882 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
1877 | break; | 1883 | break; |
1878 | } | 1884 | } |
1885 | |||
1879 | limit = mss_now; | 1886 | limit = mss_now; |
1880 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | 1887 | if (tso_segs > 1 && !tcp_urg_mode(tp)) |
1881 | limit = tcp_mss_split_point(sk, skb, mss_now, | 1888 | limit = tcp_mss_split_point(sk, skb, mss_now, |
@@ -2337,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2337 | int oldpcount = tcp_skb_pcount(skb); | 2344 | int oldpcount = tcp_skb_pcount(skb); |
2338 | 2345 | ||
2339 | if (unlikely(oldpcount > 1)) { | 2346 | if (unlikely(oldpcount > 1)) { |
2347 | if (skb_unclone(skb, GFP_ATOMIC)) | ||
2348 | return -ENOMEM; | ||
2340 | tcp_init_tso_segs(sk, skb, cur_mss); | 2349 | tcp_init_tso_segs(sk, skb, cur_mss); |
2341 | tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); | 2350 | tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); |
2342 | } | 2351 | } |
@@ -2344,21 +2353,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
2344 | 2353 | ||
2345 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 2354 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
2346 | 2355 | ||
2347 | /* Some Solaris stacks overoptimize and ignore the FIN on a | ||
2348 | * retransmit when old data is attached. So strip it off | ||
2349 | * since it is cheap to do so and saves bytes on the network. | ||
2350 | */ | ||
2351 | if (skb->len > 0 && | ||
2352 | (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) && | ||
2353 | tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { | ||
2354 | if (!pskb_trim(skb, 0)) { | ||
2355 | /* Reuse, even though it does some unnecessary work */ | ||
2356 | tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1, | ||
2357 | TCP_SKB_CB(skb)->tcp_flags); | ||
2358 | skb->ip_summed = CHECKSUM_NONE; | ||
2359 | } | ||
2360 | } | ||
2361 | |||
2362 | /* Make a copy, if the first transmission SKB clone we made | 2356 | /* Make a copy, if the first transmission SKB clone we made |
2363 | * is still in somebody's hands, else make a clone. | 2357 | * is still in somebody's hands, else make a clone. |
2364 | */ | 2358 | */ |
@@ -2727,8 +2721,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, | |||
2727 | th->syn = 1; | 2721 | th->syn = 1; |
2728 | th->ack = 1; | 2722 | th->ack = 1; |
2729 | TCP_ECN_make_synack(req, th); | 2723 | TCP_ECN_make_synack(req, th); |
2730 | th->source = ireq->loc_port; | 2724 | th->source = htons(ireq->ir_num); |
2731 | th->dest = ireq->rmt_port; | 2725 | th->dest = ireq->ir_rmt_port; |
2732 | /* Setting of flags are superfluous here for callers (and ECE is | 2726 | /* Setting of flags are superfluous here for callers (and ECE is |
2733 | * not even correctly set) | 2727 | * not even correctly set) |
2734 | */ | 2728 | */ |