diff options
-rw-r--r-- | net/ipv4/tcp_output.c | 96 |
1 files changed, 59 insertions, 37 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a524627923ae..86ef98975e94 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -1766,46 +1766,22 @@ u32 __tcp_select_window(struct sock *sk) | |||
1766 | return window; | 1766 | return window; |
1767 | } | 1767 | } |
1768 | 1768 | ||
1769 | /* Attempt to collapse two adjacent SKB's during retransmission. */ | 1769 | /* Collapses two adjacent SKB's during retransmission. */ |
1770 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, | 1770 | static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) |
1771 | int mss_now) | ||
1772 | { | 1771 | { |
1773 | struct tcp_sock *tp = tcp_sk(sk); | 1772 | struct tcp_sock *tp = tcp_sk(sk); |
1774 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 1773 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1775 | int skb_size, next_skb_size; | 1774 | int skb_size, next_skb_size; |
1776 | u16 flags; | 1775 | u16 flags; |
1777 | 1776 | ||
1778 | /* The first test we must make is that neither of these two | ||
1779 | * SKB's are still referenced by someone else. | ||
1780 | */ | ||
1781 | if (skb_cloned(skb) || skb_cloned(next_skb)) | ||
1782 | return; | ||
1783 | |||
1784 | skb_size = skb->len; | 1777 | skb_size = skb->len; |
1785 | next_skb_size = next_skb->len; | 1778 | next_skb_size = next_skb->len; |
1786 | flags = TCP_SKB_CB(skb)->flags; | 1779 | flags = TCP_SKB_CB(skb)->flags; |
1787 | 1780 | ||
1788 | /* Also punt if next skb has been SACK'd. */ | ||
1789 | if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED) | ||
1790 | return; | ||
1791 | |||
1792 | /* Next skb is out of window. */ | ||
1793 | if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp))) | ||
1794 | return; | ||
1795 | |||
1796 | /* Punt if not enough space exists in the first SKB for | ||
1797 | * the data in the second, or the total combined payload | ||
1798 | * would exceed the MSS. | ||
1799 | */ | ||
1800 | if ((next_skb_size > skb_tailroom(skb)) || | ||
1801 | ((skb_size + next_skb_size) > mss_now)) | ||
1802 | return; | ||
1803 | |||
1804 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 1781 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
1805 | 1782 | ||
1806 | tcp_highest_sack_combine(sk, next_skb, skb); | 1783 | tcp_highest_sack_combine(sk, next_skb, skb); |
1807 | 1784 | ||
1808 | /* Ok. We will be able to collapse the packet. */ | ||
1809 | tcp_unlink_write_queue(next_skb, sk); | 1785 | tcp_unlink_write_queue(next_skb, sk); |
1810 | 1786 | ||
1811 | skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), | 1787 | skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), |
@@ -1847,6 +1823,62 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, | |||
1847 | sk_wmem_free_skb(sk, next_skb); | 1823 | sk_wmem_free_skb(sk, next_skb); |
1848 | } | 1824 | } |
1849 | 1825 | ||
1826 | static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) | ||
1827 | { | ||
1828 | if (tcp_skb_pcount(skb) > 1) | ||
1829 | return 0; | ||
1830 | /* TODO: SACK collapsing could be used to remove this condition */ | ||
1831 | if (skb_shinfo(skb)->nr_frags != 0) | ||
1832 | return 0; | ||
1833 | if (skb_cloned(skb)) | ||
1834 | return 0; | ||
1835 | if (skb == tcp_send_head(sk)) | ||
1836 | return 0; | ||
1837 | /* Some heurestics for collapsing over SACK'd could be invented */ | ||
1838 | if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) | ||
1839 | return 0; | ||
1840 | |||
1841 | return 1; | ||
1842 | } | ||
1843 | |||
1844 | static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, | ||
1845 | int space) | ||
1846 | { | ||
1847 | struct tcp_sock *tp = tcp_sk(sk); | ||
1848 | struct sk_buff *skb = to, *tmp; | ||
1849 | int first = 1; | ||
1850 | |||
1851 | if (!sysctl_tcp_retrans_collapse) | ||
1852 | return; | ||
1853 | if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) | ||
1854 | return; | ||
1855 | |||
1856 | tcp_for_write_queue_from_safe(skb, tmp, sk) { | ||
1857 | if (!tcp_can_collapse(sk, skb)) | ||
1858 | break; | ||
1859 | |||
1860 | space -= skb->len; | ||
1861 | |||
1862 | if (first) { | ||
1863 | first = 0; | ||
1864 | continue; | ||
1865 | } | ||
1866 | |||
1867 | if (space < 0) | ||
1868 | break; | ||
1869 | /* Punt if not enough space exists in the first SKB for | ||
1870 | * the data in the second | ||
1871 | */ | ||
1872 | if (skb->len > skb_tailroom(to)) | ||
1873 | break; | ||
1874 | |||
1875 | if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp))) | ||
1876 | break; | ||
1877 | |||
1878 | tcp_collapse_retrans(sk, to); | ||
1879 | } | ||
1880 | } | ||
1881 | |||
1850 | /* Do a simple retransmit without using the backoff mechanisms in | 1882 | /* Do a simple retransmit without using the backoff mechanisms in |
1851 | * tcp_timer. This is used for path mtu discovery. | 1883 | * tcp_timer. This is used for path mtu discovery. |
1852 | * The socket is already locked here. | 1884 | * The socket is already locked here. |
@@ -1946,17 +1978,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1946 | return -ENOMEM; /* We'll try again later. */ | 1978 | return -ENOMEM; /* We'll try again later. */ |
1947 | } | 1979 | } |
1948 | 1980 | ||
1949 | /* Collapse two adjacent packets if worthwhile and we can. */ | 1981 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
1950 | if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && | ||
1951 | (skb->len < (cur_mss >> 1)) && | ||
1952 | (!tcp_skb_is_last(sk, skb)) && | ||
1953 | (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && | ||
1954 | (skb_shinfo(skb)->nr_frags == 0 && | ||
1955 | skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && | ||
1956 | (tcp_skb_pcount(skb) == 1 && | ||
1957 | tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) && | ||
1958 | (sysctl_tcp_retrans_collapse != 0)) | ||
1959 | tcp_retrans_try_collapse(sk, skb, cur_mss); | ||
1960 | 1982 | ||
1961 | /* Some Solaris stacks overoptimize and ignore the FIN on a | 1983 | /* Some Solaris stacks overoptimize and ignore the FIN on a |
1962 | * retransmit when old data is attached. So strip it off | 1984 | * retransmit when old data is attached. So strip it off |