diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp.c | 60 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 59 |
2 files changed, 62 insertions, 57 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc13a3657e8e..427aee33ffc0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | |||
835 | int large_allowed) | 835 | int large_allowed) |
836 | { | 836 | { |
837 | struct tcp_sock *tp = tcp_sk(sk); | 837 | struct tcp_sock *tp = tcp_sk(sk); |
838 | u32 xmit_size_goal, old_size_goal; | 838 | u32 new_size_goal, size_goal, hlen; |
839 | 839 | ||
840 | xmit_size_goal = mss_now; | 840 | if (!large_allowed || !sk_can_gso(sk)) |
841 | 841 | return mss_now; | |
842 | if (large_allowed && sk_can_gso(sk)) { | 842 | |
843 | u32 gso_size, hlen; | 843 | /* Maybe we should/could use sk->sk_prot->max_header here ? */ |
844 | 844 | hlen = inet_csk(sk)->icsk_af_ops->net_header_len + | |
845 | /* Maybe we should/could use sk->sk_prot->max_header here ? */ | 845 | inet_csk(sk)->icsk_ext_hdr_len + |
846 | hlen = inet_csk(sk)->icsk_af_ops->net_header_len + | 846 | tp->tcp_header_len; |
847 | inet_csk(sk)->icsk_ext_hdr_len + | 847 | |
848 | tp->tcp_header_len; | 848 | new_size_goal = sk->sk_gso_max_size - 1 - hlen; |
849 | 849 | new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); | |
850 | /* Goal is to send at least one packet per ms, | 850 | |
851 | * not one big TSO packet every 100 ms. | 851 | /* We try hard to avoid divides here */ |
852 | * This preserves ACK clocking and is consistent | 852 | size_goal = tp->gso_segs * mss_now; |
853 | * with tcp_tso_should_defer() heuristic. | 853 | if (unlikely(new_size_goal < size_goal || |
854 | */ | 854 | new_size_goal >= size_goal + mss_now)) { |
855 | gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); | 855 | tp->gso_segs = min_t(u16, new_size_goal / mss_now, |
856 | gso_size = max_t(u32, gso_size, | 856 | sk->sk_gso_max_segs); |
857 | sysctl_tcp_min_tso_segs * mss_now); | 857 | size_goal = tp->gso_segs * mss_now; |
858 | |||
859 | xmit_size_goal = min_t(u32, gso_size, | ||
860 | sk->sk_gso_max_size - 1 - hlen); | ||
861 | |||
862 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
863 | |||
864 | /* We try hard to avoid divides here */ | ||
865 | old_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
866 | |||
867 | if (likely(old_size_goal <= xmit_size_goal && | ||
868 | old_size_goal + mss_now > xmit_size_goal)) { | ||
869 | xmit_size_goal = old_size_goal; | ||
870 | } else { | ||
871 | tp->xmit_size_goal_segs = | ||
872 | min_t(u16, xmit_size_goal / mss_now, | ||
873 | sk->sk_gso_max_segs); | ||
874 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
875 | } | ||
876 | } | 858 | } |
877 | 859 | ||
878 | return max(xmit_size_goal, mss_now); | 860 | return max(size_goal, mss_now); |
879 | } | 861 | } |
880 | 862 | ||
881 | static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) | 863 | static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f5bd4bd3f7e6..f37ecf53ee8a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1524,6 +1524,27 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, | |||
1524 | ((nonagle & TCP_NAGLE_CORK) || | 1524 | ((nonagle & TCP_NAGLE_CORK) || |
1525 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); | 1525 | (!nonagle && tp->packets_out && tcp_minshall_check(tp))); |
1526 | } | 1526 | } |
1527 | |||
1528 | /* Return how many segs we'd like on a TSO packet, | ||
1529 | * to send one TSO packet per ms | ||
1530 | */ | ||
1531 | static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) | ||
1532 | { | ||
1533 | u32 bytes, segs; | ||
1534 | |||
1535 | bytes = min(sk->sk_pacing_rate >> 10, | ||
1536 | sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); | ||
1537 | |||
1538 | /* Goal is to send at least one packet per ms, | ||
1539 | * not one big TSO packet every 100 ms. | ||
1540 | * This preserves ACK clocking and is consistent | ||
1541 | * with tcp_tso_should_defer() heuristic. | ||
1542 | */ | ||
1543 | segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); | ||
1544 | |||
1545 | return min_t(u32, segs, sk->sk_gso_max_segs); | ||
1546 | } | ||
1547 | |||
1527 | /* Returns the portion of skb which can be sent right away */ | 1548 | /* Returns the portion of skb which can be sent right away */ |
1528 | static unsigned int tcp_mss_split_point(const struct sock *sk, | 1549 | static unsigned int tcp_mss_split_point(const struct sock *sk, |
1529 | const struct sk_buff *skb, | 1550 | const struct sk_buff *skb, |
@@ -1731,7 +1752,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1731 | * This algorithm is from John Heffner. | 1752 | * This algorithm is from John Heffner. |
1732 | */ | 1753 | */ |
1733 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | 1754 | static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, |
1734 | bool *is_cwnd_limited) | 1755 | bool *is_cwnd_limited, u32 max_segs) |
1735 | { | 1756 | { |
1736 | struct tcp_sock *tp = tcp_sk(sk); | 1757 | struct tcp_sock *tp = tcp_sk(sk); |
1737 | const struct inet_connection_sock *icsk = inet_csk(sk); | 1758 | const struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -1761,8 +1782,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, | |||
1761 | limit = min(send_win, cong_win); | 1782 | limit = min(send_win, cong_win); |
1762 | 1783 | ||
1763 | /* If a full-sized TSO skb can be sent, do it. */ | 1784 | /* If a full-sized TSO skb can be sent, do it. */ |
1764 | if (limit >= min_t(unsigned int, sk->sk_gso_max_size, | 1785 | if (limit >= max_segs * tp->mss_cache) |
1765 | tp->xmit_size_goal_segs * tp->mss_cache)) | ||
1766 | goto send_now; | 1786 | goto send_now; |
1767 | 1787 | ||
1768 | /* Middle in queue won't get any more data, full sendable already? */ | 1788 | /* Middle in queue won't get any more data, full sendable already? */ |
@@ -1959,6 +1979,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1959 | int cwnd_quota; | 1979 | int cwnd_quota; |
1960 | int result; | 1980 | int result; |
1961 | bool is_cwnd_limited = false; | 1981 | bool is_cwnd_limited = false; |
1982 | u32 max_segs; | ||
1962 | 1983 | ||
1963 | sent_pkts = 0; | 1984 | sent_pkts = 0; |
1964 | 1985 | ||
@@ -1972,6 +1993,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
1972 | } | 1993 | } |
1973 | } | 1994 | } |
1974 | 1995 | ||
1996 | max_segs = tcp_tso_autosize(sk, mss_now); | ||
1975 | while ((skb = tcp_send_head(sk))) { | 1997 | while ((skb = tcp_send_head(sk))) { |
1976 | unsigned int limit; | 1998 | unsigned int limit; |
1977 | 1999 | ||
@@ -2004,10 +2026,23 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2004 | break; | 2026 | break; |
2005 | } else { | 2027 | } else { |
2006 | if (!push_one && | 2028 | if (!push_one && |
2007 | tcp_tso_should_defer(sk, skb, &is_cwnd_limited)) | 2029 | tcp_tso_should_defer(sk, skb, &is_cwnd_limited, |
2030 | max_segs)) | ||
2008 | break; | 2031 | break; |
2009 | } | 2032 | } |
2010 | 2033 | ||
2034 | limit = mss_now; | ||
2035 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | ||
2036 | limit = tcp_mss_split_point(sk, skb, mss_now, | ||
2037 | min_t(unsigned int, | ||
2038 | cwnd_quota, | ||
2039 | max_segs), | ||
2040 | nonagle); | ||
2041 | |||
2042 | if (skb->len > limit && | ||
2043 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | ||
2044 | break; | ||
2045 | |||
2011 | /* TCP Small Queues : | 2046 | /* TCP Small Queues : |
2012 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. | 2047 | * Control number of packets in qdisc/devices to two packets / or ~1 ms. |
2013 | * This allows for : | 2048 | * This allows for : |
@@ -2018,8 +2053,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2018 | * of queued bytes to ensure line rate. | 2053 | * of queued bytes to ensure line rate. |
2019 | * One example is wifi aggregation (802.11 AMPDU) | 2054 | * One example is wifi aggregation (802.11 AMPDU) |
2020 | */ | 2055 | */ |
2021 | limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, | 2056 | limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); |
2022 | sk->sk_pacing_rate >> 10); | 2057 | limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); |
2023 | 2058 | ||
2024 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { | 2059 | if (atomic_read(&sk->sk_wmem_alloc) > limit) { |
2025 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); | 2060 | set_bit(TSQ_THROTTLED, &tp->tsq_flags); |
@@ -2032,18 +2067,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, | |||
2032 | break; | 2067 | break; |
2033 | } | 2068 | } |
2034 | 2069 | ||
2035 | limit = mss_now; | ||
2036 | if (tso_segs > 1 && !tcp_urg_mode(tp)) | ||
2037 | limit = tcp_mss_split_point(sk, skb, mss_now, | ||
2038 | min_t(unsigned int, | ||
2039 | cwnd_quota, | ||
2040 | sk->sk_gso_max_segs), | ||
2041 | nonagle); | ||
2042 | |||
2043 | if (skb->len > limit && | ||
2044 | unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) | ||
2045 | break; | ||
2046 | |||
2047 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) | 2070 | if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) |
2048 | break; | 2071 | break; |
2049 | 2072 | ||