aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c69
1 files changed, 33 insertions, 36 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 170737a9d56d..672854664ff5 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -65,6 +65,9 @@ int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
65/* By default, RFC2861 behavior. */ 65/* By default, RFC2861 behavior. */
66int sysctl_tcp_slow_start_after_idle __read_mostly = 1; 66int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
67 67
68unsigned int sysctl_tcp_notsent_lowat __read_mostly = UINT_MAX;
69EXPORT_SYMBOL(sysctl_tcp_notsent_lowat);
70
68static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, 71static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
69 int push_one, gfp_t gfp); 72 int push_one, gfp_t gfp);
70 73
@@ -634,6 +637,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
634 unsigned int size = 0; 637 unsigned int size = 0;
635 unsigned int eff_sacks; 638 unsigned int eff_sacks;
636 639
640 opts->options = 0;
641
637#ifdef CONFIG_TCP_MD5SIG 642#ifdef CONFIG_TCP_MD5SIG
638 *md5 = tp->af_specific->md5_lookup(sk, sk); 643 *md5 = tp->af_specific->md5_lookup(sk, sk);
639 if (unlikely(*md5)) { 644 if (unlikely(*md5)) {
@@ -845,15 +850,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
845 850
846 BUG_ON(!skb || !tcp_skb_pcount(skb)); 851 BUG_ON(!skb || !tcp_skb_pcount(skb));
847 852
848 /* If congestion control is doing timestamping, we must 853 if (clone_it) {
849 * take such a timestamp before we potentially clone/copy.
850 */
851 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
852 __net_timestamp(skb);
853
854 if (likely(clone_it)) {
855 const struct sk_buff *fclone = skb + 1; 854 const struct sk_buff *fclone = skb + 1;
856 855
856 /* If congestion control is doing timestamping, we must
857 * take such a timestamp before we potentially clone/copy.
858 */
859 if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
860 __net_timestamp(skb);
861
857 if (unlikely(skb->fclone == SKB_FCLONE_ORIG && 862 if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
858 fclone->fclone == SKB_FCLONE_CLONE)) 863 fclone->fclone == SKB_FCLONE_CLONE))
859 NET_INC_STATS_BH(sock_net(sk), 864 NET_INC_STATS_BH(sock_net(sk),
@@ -892,8 +897,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
892 897
893 skb_orphan(skb); 898 skb_orphan(skb);
894 skb->sk = sk; 899 skb->sk = sk;
895 skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? 900 skb->destructor = tcp_wfree;
896 tcp_wfree : sock_wfree;
897 atomic_add(skb->truesize, &sk->sk_wmem_alloc); 901 atomic_add(skb->truesize, &sk->sk_wmem_alloc);
898 902
899 /* Build TCP header and checksum it. */ 903 /* Build TCP header and checksum it. */
@@ -982,8 +986,10 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
982static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, 986static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
983 unsigned int mss_now) 987 unsigned int mss_now)
984{ 988{
985 if (skb->len <= mss_now || !sk_can_gso(sk) || 989 /* Make sure we own this skb before messing gso_size/gso_segs */
986 skb->ip_summed == CHECKSUM_NONE) { 990 WARN_ON_ONCE(skb_cloned(skb));
991
992 if (skb->len <= mss_now || skb->ip_summed == CHECKSUM_NONE) {
987 /* Avoid the costly divide in the normal 993 /* Avoid the costly divide in the normal
988 * non-TSO case. 994 * non-TSO case.
989 */ 995 */
@@ -1063,9 +1069,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1063 if (nsize < 0) 1069 if (nsize < 0)
1064 nsize = 0; 1070 nsize = 0;
1065 1071
1066 if (skb_cloned(skb) && 1072 if (skb_unclone(skb, GFP_ATOMIC))
1067 skb_is_nonlinear(skb) &&
1068 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
1069 return -ENOMEM; 1073 return -ENOMEM;
1070 1074
1071 /* Get a new skb... force flag on. */ 1075 /* Get a new skb... force flag on. */
@@ -1628,7 +1632,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1628 1632
1629 /* If a full-sized TSO skb can be sent, do it. */ 1633 /* If a full-sized TSO skb can be sent, do it. */
1630 if (limit >= min_t(unsigned int, sk->sk_gso_max_size, 1634 if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
1631 sk->sk_gso_max_segs * tp->mss_cache)) 1635 tp->xmit_size_goal_segs * tp->mss_cache))
1632 goto send_now; 1636 goto send_now;
1633 1637
1634 /* Middle in queue won't get any more data, full sendable already? */ 1638 /* Middle in queue won't get any more data, full sendable already? */
@@ -1837,7 +1841,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1837 while ((skb = tcp_send_head(sk))) { 1841 while ((skb = tcp_send_head(sk))) {
1838 unsigned int limit; 1842 unsigned int limit;
1839 1843
1840
1841 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1844 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1842 BUG_ON(!tso_segs); 1845 BUG_ON(!tso_segs);
1843 1846
@@ -1866,13 +1869,20 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1866 break; 1869 break;
1867 } 1870 }
1868 1871
1869 /* TSQ : sk_wmem_alloc accounts skb truesize, 1872 /* TCP Small Queues :
1870 * including skb overhead. But thats OK. 1873 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
1874 * This allows for :
1875 * - better RTT estimation and ACK scheduling
1876 * - faster recovery
1877 * - high rates
1871 */ 1878 */
1872 if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { 1879 limit = max(skb->truesize, sk->sk_pacing_rate >> 10);
1880
1881 if (atomic_read(&sk->sk_wmem_alloc) > limit) {
1873 set_bit(TSQ_THROTTLED, &tp->tsq_flags); 1882 set_bit(TSQ_THROTTLED, &tp->tsq_flags);
1874 break; 1883 break;
1875 } 1884 }
1885
1876 limit = mss_now; 1886 limit = mss_now;
1877 if (tso_segs > 1 && !tcp_urg_mode(tp)) 1887 if (tso_segs > 1 && !tcp_urg_mode(tp))
1878 limit = tcp_mss_split_point(sk, skb, mss_now, 1888 limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2334,6 +2344,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2334 int oldpcount = tcp_skb_pcount(skb); 2344 int oldpcount = tcp_skb_pcount(skb);
2335 2345
2336 if (unlikely(oldpcount > 1)) { 2346 if (unlikely(oldpcount > 1)) {
2347 if (skb_unclone(skb, GFP_ATOMIC))
2348 return -ENOMEM;
2337 tcp_init_tso_segs(sk, skb, cur_mss); 2349 tcp_init_tso_segs(sk, skb, cur_mss);
2338 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); 2350 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
2339 } 2351 }
@@ -2341,21 +2353,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
2341 2353
2342 tcp_retrans_try_collapse(sk, skb, cur_mss); 2354 tcp_retrans_try_collapse(sk, skb, cur_mss);
2343 2355
2344 /* Some Solaris stacks overoptimize and ignore the FIN on a
2345 * retransmit when old data is attached. So strip it off
2346 * since it is cheap to do so and saves bytes on the network.
2347 */
2348 if (skb->len > 0 &&
2349 (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
2350 tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
2351 if (!pskb_trim(skb, 0)) {
2352 /* Reuse, even though it does some unnecessary work */
2353 tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
2354 TCP_SKB_CB(skb)->tcp_flags);
2355 skb->ip_summed = CHECKSUM_NONE;
2356 }
2357 }
2358
2359 /* Make a copy, if the first transmission SKB clone we made 2356 /* Make a copy, if the first transmission SKB clone we made
2360 * is still in somebody's hands, else make a clone. 2357 * is still in somebody's hands, else make a clone.
2361 */ 2358 */
@@ -2724,8 +2721,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2724 th->syn = 1; 2721 th->syn = 1;
2725 th->ack = 1; 2722 th->ack = 1;
2726 TCP_ECN_make_synack(req, th); 2723 TCP_ECN_make_synack(req, th);
2727 th->source = ireq->loc_port; 2724 th->source = htons(ireq->ir_num);
2728 th->dest = ireq->rmt_port; 2725 th->dest = ireq->ir_rmt_port;
2729 /* Setting of flags are superfluous here for callers (and ECE is 2726 /* Setting of flags are superfluous here for callers (and ECE is
2730 * not even correctly set) 2727 * not even correctly set)
2731 */ 2728 */