aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_output.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r--net/ipv4/tcp_output.c219
1 files changed, 88 insertions, 131 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index fe3b4bdfd251..557fe16cbfb0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -42,7 +42,7 @@
42/* People can turn this off for buggy TCP's found in printers etc. */ 42/* People can turn this off for buggy TCP's found in printers etc. */
43int sysctl_tcp_retrans_collapse __read_mostly = 1; 43int sysctl_tcp_retrans_collapse __read_mostly = 1;
44 44
45/* People can turn this on to work with those rare, broken TCPs that 45/* People can turn this on to work with those rare, broken TCPs that
46 * interpret the window field as a signed quantity. 46 * interpret the window field as a signed quantity.
47 */ 47 */
48int sysctl_tcp_workaround_signed_windows __read_mostly = 0; 48int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
@@ -484,7 +484,7 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
484 } 484 }
485 if (likely(sysctl_tcp_window_scaling)) { 485 if (likely(sysctl_tcp_window_scaling)) {
486 opts->ws = tp->rx_opt.rcv_wscale; 486 opts->ws = tp->rx_opt.rcv_wscale;
487 if(likely(opts->ws)) 487 if (likely(opts->ws))
488 size += TCPOLEN_WSCALE_ALIGNED; 488 size += TCPOLEN_WSCALE_ALIGNED;
489 } 489 }
490 if (likely(sysctl_tcp_sack)) { 490 if (likely(sysctl_tcp_sack)) {
@@ -526,7 +526,7 @@ static unsigned tcp_synack_options(struct sock *sk,
526 526
527 if (likely(ireq->wscale_ok)) { 527 if (likely(ireq->wscale_ok)) {
528 opts->ws = ireq->rcv_wscale; 528 opts->ws = ireq->rcv_wscale;
529 if(likely(opts->ws)) 529 if (likely(opts->ws))
530 size += TCPOLEN_WSCALE_ALIGNED; 530 size += TCPOLEN_WSCALE_ALIGNED;
531 } 531 }
532 if (likely(doing_ts)) { 532 if (likely(doing_ts)) {
@@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
663 th->urg_ptr = 0; 663 th->urg_ptr = 0;
664 664
665 /* The urg_mode check is necessary during a below snd_una win probe */ 665 /* The urg_mode check is necessary during a below snd_una win probe */
666 if (unlikely(tcp_urg_mode(tp) && 666 if (unlikely(tcp_urg_mode(tp))) {
667 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { 667 if (between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF)) {
668 th->urg_ptr = htons(tp->snd_up - tcb->seq); 668 th->urg_ptr = htons(tp->snd_up - tcb->seq);
669 th->urg = 1; 669 th->urg = 1;
670 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
671 th->urg_ptr = 0xFFFF;
672 th->urg = 1;
673 }
670 } 674 }
671 675
672 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); 676 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
@@ -1168,7 +1172,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb,
1168 1172
1169static inline int tcp_minshall_check(const struct tcp_sock *tp) 1173static inline int tcp_minshall_check(const struct tcp_sock *tp)
1170{ 1174{
1171 return after(tp->snd_sml,tp->snd_una) && 1175 return after(tp->snd_sml, tp->snd_una) &&
1172 !after(tp->snd_sml, tp->snd_nxt); 1176 !after(tp->snd_sml, tp->snd_nxt);
1173} 1177}
1174 1178
@@ -1334,7 +1338,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1334 1338
1335 /* Defer for less than two clock ticks. */ 1339 /* Defer for less than two clock ticks. */
1336 if (tp->tso_deferred && 1340 if (tp->tso_deferred &&
1337 ((jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1) 1341 (((u32)jiffies << 1) >> 1) - (tp->tso_deferred >> 1) > 1)
1338 goto send_now; 1342 goto send_now;
1339 1343
1340 in_flight = tcp_packets_in_flight(tp); 1344 in_flight = tcp_packets_in_flight(tp);
@@ -1519,7 +1523,8 @@ static int tcp_mtu_probe(struct sock *sk)
1519 * Returns 1, if no segments are in flight and we have queued segments, but 1523 * Returns 1, if no segments are in flight and we have queued segments, but
1520 * cannot send anything now because of SWS or another problem. 1524 * cannot send anything now because of SWS or another problem.
1521 */ 1525 */
1522static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) 1526static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1527 int push_one, gfp_t gfp)
1523{ 1528{
1524 struct tcp_sock *tp = tcp_sk(sk); 1529 struct tcp_sock *tp = tcp_sk(sk);
1525 struct sk_buff *skb; 1530 struct sk_buff *skb;
@@ -1527,20 +1532,16 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1527 int cwnd_quota; 1532 int cwnd_quota;
1528 int result; 1533 int result;
1529 1534
1530 /* If we are closed, the bytes will have to remain here.
1531 * In time closedown will finish, we empty the write queue and all
1532 * will be happy.
1533 */
1534 if (unlikely(sk->sk_state == TCP_CLOSE))
1535 return 0;
1536
1537 sent_pkts = 0; 1535 sent_pkts = 0;
1538 1536
1539 /* Do MTU probing. */ 1537 if (!push_one) {
1540 if ((result = tcp_mtu_probe(sk)) == 0) { 1538 /* Do MTU probing. */
1541 return 0; 1539 result = tcp_mtu_probe(sk);
1542 } else if (result > 0) { 1540 if (!result) {
1543 sent_pkts = 1; 1541 return 0;
1542 } else if (result > 0) {
1543 sent_pkts = 1;
1544 }
1544 } 1545 }
1545 1546
1546 while ((skb = tcp_send_head(sk))) { 1547 while ((skb = tcp_send_head(sk))) {
@@ -1562,7 +1563,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1562 nonagle : TCP_NAGLE_PUSH)))) 1563 nonagle : TCP_NAGLE_PUSH))))
1563 break; 1564 break;
1564 } else { 1565 } else {
1565 if (tcp_tso_should_defer(sk, skb)) 1566 if (!push_one && tcp_tso_should_defer(sk, skb))
1566 break; 1567 break;
1567 } 1568 }
1568 1569
@@ -1577,7 +1578,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1577 1578
1578 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1579 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1579 1580
1580 if (unlikely(tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC))) 1581 if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
1581 break; 1582 break;
1582 1583
1583 /* Advance the send_head. This one is sent out. 1584 /* Advance the send_head. This one is sent out.
@@ -1587,6 +1588,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
1587 1588
1588 tcp_minshall_update(tp, mss_now, skb); 1589 tcp_minshall_update(tp, mss_now, skb);
1589 sent_pkts++; 1590 sent_pkts++;
1591
1592 if (push_one)
1593 break;
1590 } 1594 }
1591 1595
1592 if (likely(sent_pkts)) { 1596 if (likely(sent_pkts)) {
@@ -1605,10 +1609,18 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1605{ 1609{
1606 struct sk_buff *skb = tcp_send_head(sk); 1610 struct sk_buff *skb = tcp_send_head(sk);
1607 1611
1608 if (skb) { 1612 if (!skb)
1609 if (tcp_write_xmit(sk, cur_mss, nonagle)) 1613 return;
1610 tcp_check_probe_timer(sk); 1614
1611 } 1615 /* If we are closed, the bytes will have to remain here.
1616 * In time closedown will finish, we empty the write queue and
1617 * all will be happy.
1618 */
1619 if (unlikely(sk->sk_state == TCP_CLOSE))
1620 return;
1621
1622 if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC))
1623 tcp_check_probe_timer(sk);
1612} 1624}
1613 1625
1614/* Send _single_ skb sitting at the send head. This function requires 1626/* Send _single_ skb sitting at the send head. This function requires
@@ -1616,38 +1628,11 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
1616 */ 1628 */
1617void tcp_push_one(struct sock *sk, unsigned int mss_now) 1629void tcp_push_one(struct sock *sk, unsigned int mss_now)
1618{ 1630{
1619 struct tcp_sock *tp = tcp_sk(sk);
1620 struct sk_buff *skb = tcp_send_head(sk); 1631 struct sk_buff *skb = tcp_send_head(sk);
1621 unsigned int tso_segs, cwnd_quota;
1622 1632
1623 BUG_ON(!skb || skb->len < mss_now); 1633 BUG_ON(!skb || skb->len < mss_now);
1624 1634
1625 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1635 tcp_write_xmit(sk, mss_now, TCP_NAGLE_PUSH, 1, sk->sk_allocation);
1626 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1627
1628 if (likely(cwnd_quota)) {
1629 unsigned int limit;
1630
1631 BUG_ON(!tso_segs);
1632
1633 limit = mss_now;
1634 if (tso_segs > 1 && !tcp_urg_mode(tp))
1635 limit = tcp_mss_split_point(sk, skb, mss_now,
1636 cwnd_quota);
1637
1638 if (skb->len > limit &&
1639 unlikely(tso_fragment(sk, skb, limit, mss_now)))
1640 return;
1641
1642 /* Send it out now. */
1643 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1644
1645 if (likely(!tcp_transmit_skb(sk, skb, 1, sk->sk_allocation))) {
1646 tcp_event_new_data_sent(sk, skb);
1647 tcp_cwnd_validate(sk);
1648 return;
1649 }
1650 }
1651} 1636}
1652 1637
1653/* This function returns the amount that we can raise the 1638/* This function returns the amount that we can raise the
@@ -1767,46 +1752,22 @@ u32 __tcp_select_window(struct sock *sk)
1767 return window; 1752 return window;
1768} 1753}
1769 1754
1770/* Attempt to collapse two adjacent SKB's during retransmission. */ 1755/* Collapses two adjacent SKB's during retransmission. */
1771static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, 1756static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1772 int mss_now)
1773{ 1757{
1774 struct tcp_sock *tp = tcp_sk(sk); 1758 struct tcp_sock *tp = tcp_sk(sk);
1775 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1759 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1776 int skb_size, next_skb_size; 1760 int skb_size, next_skb_size;
1777 u16 flags; 1761 u16 flags;
1778 1762
1779 /* The first test we must make is that neither of these two
1780 * SKB's are still referenced by someone else.
1781 */
1782 if (skb_cloned(skb) || skb_cloned(next_skb))
1783 return;
1784
1785 skb_size = skb->len; 1763 skb_size = skb->len;
1786 next_skb_size = next_skb->len; 1764 next_skb_size = next_skb->len;
1787 flags = TCP_SKB_CB(skb)->flags; 1765 flags = TCP_SKB_CB(skb)->flags;
1788 1766
1789 /* Also punt if next skb has been SACK'd. */
1790 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_ACKED)
1791 return;
1792
1793 /* Next skb is out of window. */
1794 if (after(TCP_SKB_CB(next_skb)->end_seq, tcp_wnd_end(tp)))
1795 return;
1796
1797 /* Punt if not enough space exists in the first SKB for
1798 * the data in the second, or the total combined payload
1799 * would exceed the MSS.
1800 */
1801 if ((next_skb_size > skb_tailroom(skb)) ||
1802 ((skb_size + next_skb_size) > mss_now))
1803 return;
1804
1805 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); 1767 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1806 1768
1807 tcp_highest_sack_combine(sk, next_skb, skb); 1769 tcp_highest_sack_combine(sk, next_skb, skb);
1808 1770
1809 /* Ok. We will be able to collapse the packet. */
1810 tcp_unlink_write_queue(next_skb, sk); 1771 tcp_unlink_write_queue(next_skb, sk);
1811 1772
1812 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size), 1773 skb_copy_from_linear_data(next_skb, skb_put(skb, next_skb_size),
@@ -1848,54 +1809,60 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb,
1848 sk_wmem_free_skb(sk, next_skb); 1809 sk_wmem_free_skb(sk, next_skb);
1849} 1810}
1850 1811
1851/* Do a simple retransmit without using the backoff mechanisms in 1812static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb)
1852 * tcp_timer. This is used for path mtu discovery. 1813{
1853 * The socket is already locked here. 1814 if (tcp_skb_pcount(skb) > 1)
1854 */ 1815 return 0;
1855void tcp_simple_retransmit(struct sock *sk) 1816 /* TODO: SACK collapsing could be used to remove this condition */
1817 if (skb_shinfo(skb)->nr_frags != 0)
1818 return 0;
1819 if (skb_cloned(skb))
1820 return 0;
1821 if (skb == tcp_send_head(sk))
1822 return 0;
1823 /* Some heurestics for collapsing over SACK'd could be invented */
1824 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
1825 return 0;
1826
1827 return 1;
1828}
1829
1830static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
1831 int space)
1856{ 1832{
1857 const struct inet_connection_sock *icsk = inet_csk(sk);
1858 struct tcp_sock *tp = tcp_sk(sk); 1833 struct tcp_sock *tp = tcp_sk(sk);
1859 struct sk_buff *skb; 1834 struct sk_buff *skb = to, *tmp;
1860 unsigned int mss = tcp_current_mss(sk, 0); 1835 int first = 1;
1861 u32 prior_lost = tp->lost_out;
1862 1836
1863 tcp_for_write_queue(skb, sk) { 1837 if (!sysctl_tcp_retrans_collapse)
1864 if (skb == tcp_send_head(sk)) 1838 return;
1839 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN)
1840 return;
1841
1842 tcp_for_write_queue_from_safe(skb, tmp, sk) {
1843 if (!tcp_can_collapse(sk, skb))
1865 break; 1844 break;
1866 if (skb->len > mss &&
1867 !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
1868 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) {
1869 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1870 tp->retrans_out -= tcp_skb_pcount(skb);
1871 }
1872 tcp_skb_mark_lost_uncond_verify(tp, skb);
1873 }
1874 }
1875 1845
1876 tcp_clear_retrans_hints_partial(tp); 1846 space -= skb->len;
1877 1847
1878 if (prior_lost == tp->lost_out) 1848 if (first) {
1879 return; 1849 first = 0;
1850 continue;
1851 }
1880 1852
1881 if (tcp_is_reno(tp)) 1853 if (space < 0)
1882 tcp_limit_reno_sacked(tp); 1854 break;
1855 /* Punt if not enough space exists in the first SKB for
1856 * the data in the second
1857 */
1858 if (skb->len > skb_tailroom(to))
1859 break;
1883 1860
1884 tcp_verify_left_out(tp); 1861 if (after(TCP_SKB_CB(skb)->end_seq, tcp_wnd_end(tp)))
1862 break;
1885 1863
1886 /* Don't muck with the congestion window here. 1864 tcp_collapse_retrans(sk, to);
1887 * Reason is that we do not increase amount of _data_
1888 * in network, but units changed and effective
1889 * cwnd/ssthresh really reduced now.
1890 */
1891 if (icsk->icsk_ca_state != TCP_CA_Loss) {
1892 tp->high_seq = tp->snd_nxt;
1893 tp->snd_ssthresh = tcp_current_ssthresh(sk);
1894 tp->prior_ssthresh = 0;
1895 tp->undo_marker = 0;
1896 tcp_set_ca_state(sk, TCP_CA_Loss);
1897 } 1865 }
1898 tcp_xmit_retransmit_queue(sk);
1899} 1866}
1900 1867
1901/* This retransmits one SKB. Policy decisions and retransmit queue 1868/* This retransmits one SKB. Policy decisions and retransmit queue
@@ -1947,17 +1914,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1947 return -ENOMEM; /* We'll try again later. */ 1914 return -ENOMEM; /* We'll try again later. */
1948 } 1915 }
1949 1916
1950 /* Collapse two adjacent packets if worthwhile and we can. */ 1917 tcp_retrans_try_collapse(sk, skb, cur_mss);
1951 if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) &&
1952 (skb->len < (cur_mss >> 1)) &&
1953 (!tcp_skb_is_last(sk, skb)) &&
1954 (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) &&
1955 (skb_shinfo(skb)->nr_frags == 0 &&
1956 skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) &&
1957 (tcp_skb_pcount(skb) == 1 &&
1958 tcp_skb_pcount(tcp_write_queue_next(sk, skb)) == 1) &&
1959 (sysctl_tcp_retrans_collapse != 0))
1960 tcp_retrans_try_collapse(sk, skb, cur_mss);
1961 1918
1962 /* Some Solaris stacks overoptimize and ignore the FIN on a 1919 /* Some Solaris stacks overoptimize and ignore the FIN on a
1963 * retransmit when old data is attached. So strip it off 1920 * retransmit when old data is attached. So strip it off