aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/sock.c6
-rw-r--r--net/core/user_dma.c131
-rw-r--r--net/dccp/proto.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c149
-rw-r--r--net/ipv4/tcp_input.c83
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/llc/af_llc.c10
-rw-r--r--net/mac802154/wpan.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c2
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/sunrpc/clnt.c5
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svc_xprt.c81
-rw-r--r--net/sunrpc/svcsock.c25
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h7
-rw-r--r--net/sunrpc/xprtsock.c121
23 files changed, 158 insertions, 529 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 71093d94ad2b..235e6c50708d 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y += net-sysfs.o
16obj-$(CONFIG_PROC_FS) += net-procfs.o 16obj-$(CONFIG_PROC_FS) += net-procfs.o
17obj-$(CONFIG_NET_PKTGEN) += pktgen.o 17obj-$(CONFIG_NET_PKTGEN) += pktgen.o
18obj-$(CONFIG_NETPOLL) += netpoll.o 18obj-$(CONFIG_NETPOLL) += netpoll.o
19obj-$(CONFIG_NET_DMA) += user_dma.o
20obj-$(CONFIG_FIB_RULES) += fib_rules.o 19obj-$(CONFIG_FIB_RULES) += fib_rules.o
21obj-$(CONFIG_TRACEPOINTS) += net-traces.o 20obj-$(CONFIG_TRACEPOINTS) += net-traces.o
22obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o 21obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 3c5bdaa44486..4699dcfdc4ab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1286,7 +1286,6 @@ static int __dev_open(struct net_device *dev)
1286 clear_bit(__LINK_STATE_START, &dev->state); 1286 clear_bit(__LINK_STATE_START, &dev->state);
1287 else { 1287 else {
1288 dev->flags |= IFF_UP; 1288 dev->flags |= IFF_UP;
1289 net_dmaengine_get();
1290 dev_set_rx_mode(dev); 1289 dev_set_rx_mode(dev);
1291 dev_activate(dev); 1290 dev_activate(dev);
1292 add_device_randomness(dev->dev_addr, dev->addr_len); 1291 add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1365,7 +1364,6 @@ static int __dev_close_many(struct list_head *head)
1365 ops->ndo_stop(dev); 1364 ops->ndo_stop(dev);
1366 1365
1367 dev->flags &= ~IFF_UP; 1366 dev->flags &= ~IFF_UP;
1368 net_dmaengine_put();
1369 netpoll_poll_enable(dev); 1367 netpoll_poll_enable(dev);
1370 } 1368 }
1371 1369
@@ -4579,14 +4577,6 @@ static void net_rx_action(struct softirq_action *h)
4579out: 4577out:
4580 net_rps_action_and_irq_enable(sd); 4578 net_rps_action_and_irq_enable(sd);
4581 4579
4582#ifdef CONFIG_NET_DMA
4583 /*
4584 * There may not be any more sk_buffs coming right now, so push
4585 * any pending DMA copies to hardware
4586 */
4587 dma_issue_pending_all();
4588#endif
4589
4590 return; 4580 return;
4591 4581
4592softnet_break: 4582softnet_break:
diff --git a/net/core/sock.c b/net/core/sock.c
index e5ad7d31c3c2..b4f3ea2fce60 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1481,9 +1481,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1481 atomic_set(&newsk->sk_omem_alloc, 0); 1481 atomic_set(&newsk->sk_omem_alloc, 0);
1482 skb_queue_head_init(&newsk->sk_receive_queue); 1482 skb_queue_head_init(&newsk->sk_receive_queue);
1483 skb_queue_head_init(&newsk->sk_write_queue); 1483 skb_queue_head_init(&newsk->sk_write_queue);
1484#ifdef CONFIG_NET_DMA
1485 skb_queue_head_init(&newsk->sk_async_wait_queue);
1486#endif
1487 1484
1488 spin_lock_init(&newsk->sk_dst_lock); 1485 spin_lock_init(&newsk->sk_dst_lock);
1489 rwlock_init(&newsk->sk_callback_lock); 1486 rwlock_init(&newsk->sk_callback_lock);
@@ -2258,9 +2255,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
2258 skb_queue_head_init(&sk->sk_receive_queue); 2255 skb_queue_head_init(&sk->sk_receive_queue);
2259 skb_queue_head_init(&sk->sk_write_queue); 2256 skb_queue_head_init(&sk->sk_write_queue);
2260 skb_queue_head_init(&sk->sk_error_queue); 2257 skb_queue_head_init(&sk->sk_error_queue);
2261#ifdef CONFIG_NET_DMA
2262 skb_queue_head_init(&sk->sk_async_wait_queue);
2263#endif
2264 2258
2265 sk->sk_send_head = NULL; 2259 sk->sk_send_head = NULL;
2266 2260
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
deleted file mode 100644
index 1b5fefdb8198..000000000000
--- a/net/core/user_dma.c
+++ /dev/null
@@ -1,131 +0,0 @@
1/*
2 * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
3 * Portions based on net/core/datagram.c and copyrighted by their authors.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License as published by the Free
7 * Software Foundation; either version 2 of the License, or (at your option)
8 * any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59
17 * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 *
19 * The full GNU General Public License is included in this distribution in the
20 * file called COPYING.
21 */
22
23/*
24 * This code allows the net stack to make use of a DMA engine for
25 * skb to iovec copies.
26 */
27
28#include <linux/dmaengine.h>
29#include <linux/socket.h>
30#include <linux/export.h>
31#include <net/tcp.h>
32#include <net/netdma.h>
33
34#define NET_DMA_DEFAULT_COPYBREAK 4096
35
36int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
37EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
38
39/**
40 * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
41 * @skb - buffer to copy
42 * @offset - offset in the buffer to start copying from
43 * @iovec - io vector to copy to
44 * @len - amount of data to copy from buffer to iovec
45 * @pinned_list - locked iovec buffer data
46 *
47 * Note: the iovec is modified during the copy.
48 */
49int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
50 struct sk_buff *skb, int offset, struct iovec *to,
51 size_t len, struct dma_pinned_list *pinned_list)
52{
53 int start = skb_headlen(skb);
54 int i, copy = start - offset;
55 struct sk_buff *frag_iter;
56 dma_cookie_t cookie = 0;
57
58 /* Copy header. */
59 if (copy > 0) {
60 if (copy > len)
61 copy = len;
62 cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
63 skb->data + offset, copy);
64 if (cookie < 0)
65 goto fault;
66 len -= copy;
67 if (len == 0)
68 goto end;
69 offset += copy;
70 }
71
72 /* Copy paged appendix. Hmm... why does this look so complicated? */
73 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
74 int end;
75 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
76
77 WARN_ON(start > offset + len);
78
79 end = start + skb_frag_size(frag);
80 copy = end - offset;
81 if (copy > 0) {
82 struct page *page = skb_frag_page(frag);
83
84 if (copy > len)
85 copy = len;
86
87 cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
88 frag->page_offset + offset - start, copy);
89 if (cookie < 0)
90 goto fault;
91 len -= copy;
92 if (len == 0)
93 goto end;
94 offset += copy;
95 }
96 start = end;
97 }
98
99 skb_walk_frags(skb, frag_iter) {
100 int end;
101
102 WARN_ON(start > offset + len);
103
104 end = start + frag_iter->len;
105 copy = end - offset;
106 if (copy > 0) {
107 if (copy > len)
108 copy = len;
109 cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
110 offset - start,
111 to, copy,
112 pinned_list);
113 if (cookie < 0)
114 goto fault;
115 len -= copy;
116 if (len == 0)
117 goto end;
118 offset += copy;
119 }
120 start = end;
121 }
122
123end:
124 if (!len) {
125 skb->dma_cookie = cookie;
126 return cookie;
127 }
128
129fault:
130 return -EFAULT;
131}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 1ad150ed57cf..97b0fcc79547 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
848 default: 848 default:
849 dccp_pr_debug("packet_type=%s\n", 849 dccp_pr_debug("packet_type=%s\n",
850 dccp_packet_name(dh->dccph_type)); 850 dccp_packet_name(dh->dccph_type));
851 sk_eat_skb(sk, skb, false); 851 sk_eat_skb(sk, skb);
852 } 852 }
853verify_sock_status: 853verify_sock_status:
854 if (sock_flag(sk, SOCK_DONE)) { 854 if (sock_flag(sk, SOCK_DONE)) {
@@ -905,7 +905,7 @@ verify_sock_status:
905 len = skb->len; 905 len = skb->len;
906 found_fin_ok: 906 found_fin_ok:
907 if (!(flags & MSG_PEEK)) 907 if (!(flags & MSG_PEEK))
908 sk_eat_skb(sk, skb, false); 908 sk_eat_skb(sk, skb);
909 break; 909 break;
910 } while (1); 910 } while (1);
911out: 911out:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 8a25509c35b3..b3c53c8b331e 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -631,15 +631,6 @@ static struct ctl_table ipv4_table[] = {
631 .mode = 0644, 631 .mode = 0644,
632 .proc_handler = proc_dointvec 632 .proc_handler = proc_dointvec
633 }, 633 },
634#ifdef CONFIG_NET_DMA
635 {
636 .procname = "tcp_dma_copybreak",
637 .data = &sysctl_tcp_dma_copybreak,
638 .maxlen = sizeof(int),
639 .mode = 0644,
640 .proc_handler = proc_dointvec
641 },
642#endif
643 { 634 {
644 .procname = "tcp_slow_start_after_idle", 635 .procname = "tcp_slow_start_after_idle",
645 .data = &sysctl_tcp_slow_start_after_idle, 636 .data = &sysctl_tcp_slow_start_after_idle,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 26a6f113f00c..461003d258ba 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -274,7 +274,6 @@
274#include <net/tcp.h> 274#include <net/tcp.h>
275#include <net/xfrm.h> 275#include <net/xfrm.h>
276#include <net/ip.h> 276#include <net/ip.h>
277#include <net/netdma.h>
278#include <net/sock.h> 277#include <net/sock.h>
279 278
280#include <asm/uaccess.h> 279#include <asm/uaccess.h>
@@ -1394,7 +1393,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
1394 * calculation of whether or not we must ACK for the sake of 1393 * calculation of whether or not we must ACK for the sake of
1395 * a window update. 1394 * a window update.
1396 */ 1395 */
1397void tcp_cleanup_rbuf(struct sock *sk, int copied) 1396static void tcp_cleanup_rbuf(struct sock *sk, int copied)
1398{ 1397{
1399 struct tcp_sock *tp = tcp_sk(sk); 1398 struct tcp_sock *tp = tcp_sk(sk);
1400 bool time_to_ack = false; 1399 bool time_to_ack = false;
@@ -1470,39 +1469,6 @@ static void tcp_prequeue_process(struct sock *sk)
1470 tp->ucopy.memory = 0; 1469 tp->ucopy.memory = 0;
1471} 1470}
1472 1471
1473#ifdef CONFIG_NET_DMA
1474static void tcp_service_net_dma(struct sock *sk, bool wait)
1475{
1476 dma_cookie_t done, used;
1477 dma_cookie_t last_issued;
1478 struct tcp_sock *tp = tcp_sk(sk);
1479
1480 if (!tp->ucopy.dma_chan)
1481 return;
1482
1483 last_issued = tp->ucopy.dma_cookie;
1484 dma_async_issue_pending(tp->ucopy.dma_chan);
1485
1486 do {
1487 if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
1488 last_issued, &done,
1489 &used) == DMA_COMPLETE) {
1490 /* Safe to free early-copied skbs now */
1491 __skb_queue_purge(&sk->sk_async_wait_queue);
1492 break;
1493 } else {
1494 struct sk_buff *skb;
1495 while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
1496 (dma_async_is_complete(skb->dma_cookie, done,
1497 used) == DMA_COMPLETE)) {
1498 __skb_dequeue(&sk->sk_async_wait_queue);
1499 kfree_skb(skb);
1500 }
1501 }
1502 } while (wait);
1503}
1504#endif
1505
1506static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) 1472static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1507{ 1473{
1508 struct sk_buff *skb; 1474 struct sk_buff *skb;
@@ -1520,7 +1486,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
1520 * splitted a fat GRO packet, while we released socket lock 1486 * splitted a fat GRO packet, while we released socket lock
1521 * in skb_splice_bits() 1487 * in skb_splice_bits()
1522 */ 1488 */
1523 sk_eat_skb(sk, skb, false); 1489 sk_eat_skb(sk, skb);
1524 } 1490 }
1525 return NULL; 1491 return NULL;
1526} 1492}
@@ -1586,11 +1552,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
1586 continue; 1552 continue;
1587 } 1553 }
1588 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { 1554 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) {
1589 sk_eat_skb(sk, skb, false); 1555 sk_eat_skb(sk, skb);
1590 ++seq; 1556 ++seq;
1591 break; 1557 break;
1592 } 1558 }
1593 sk_eat_skb(sk, skb, false); 1559 sk_eat_skb(sk, skb);
1594 if (!desc->count) 1560 if (!desc->count)
1595 break; 1561 break;
1596 tp->copied_seq = seq; 1562 tp->copied_seq = seq;
@@ -1628,7 +1594,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1628 int target; /* Read at least this many bytes */ 1594 int target; /* Read at least this many bytes */
1629 long timeo; 1595 long timeo;
1630 struct task_struct *user_recv = NULL; 1596 struct task_struct *user_recv = NULL;
1631 bool copied_early = false;
1632 struct sk_buff *skb; 1597 struct sk_buff *skb;
1633 u32 urg_hole = 0; 1598 u32 urg_hole = 0;
1634 1599
@@ -1674,28 +1639,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1674 1639
1675 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); 1640 target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1676 1641
1677#ifdef CONFIG_NET_DMA
1678 tp->ucopy.dma_chan = NULL;
1679 preempt_disable();
1680 skb = skb_peek_tail(&sk->sk_receive_queue);
1681 {
1682 int available = 0;
1683
1684 if (skb)
1685 available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
1686 if ((available < target) &&
1687 (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
1688 !sysctl_tcp_low_latency &&
1689 net_dma_find_channel()) {
1690 preempt_enable();
1691 tp->ucopy.pinned_list =
1692 dma_pin_iovec_pages(msg->msg_iov, len);
1693 } else {
1694 preempt_enable();
1695 }
1696 }
1697#endif
1698
1699 do { 1642 do {
1700 u32 offset; 1643 u32 offset;
1701 1644
@@ -1826,16 +1769,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1826 /* __ Set realtime policy in scheduler __ */ 1769 /* __ Set realtime policy in scheduler __ */
1827 } 1770 }
1828 1771
1829#ifdef CONFIG_NET_DMA
1830 if (tp->ucopy.dma_chan) {
1831 if (tp->rcv_wnd == 0 &&
1832 !skb_queue_empty(&sk->sk_async_wait_queue)) {
1833 tcp_service_net_dma(sk, true);
1834 tcp_cleanup_rbuf(sk, copied);
1835 } else
1836 dma_async_issue_pending(tp->ucopy.dma_chan);
1837 }
1838#endif
1839 if (copied >= target) { 1772 if (copied >= target) {
1840 /* Do not sleep, just process backlog. */ 1773 /* Do not sleep, just process backlog. */
1841 release_sock(sk); 1774 release_sock(sk);
@@ -1843,11 +1776,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1843 } else 1776 } else
1844 sk_wait_data(sk, &timeo); 1777 sk_wait_data(sk, &timeo);
1845 1778
1846#ifdef CONFIG_NET_DMA
1847 tcp_service_net_dma(sk, false); /* Don't block */
1848 tp->ucopy.wakeup = 0;
1849#endif
1850
1851 if (user_recv) { 1779 if (user_recv) {
1852 int chunk; 1780 int chunk;
1853 1781
@@ -1905,43 +1833,13 @@ do_prequeue:
1905 } 1833 }
1906 1834
1907 if (!(flags & MSG_TRUNC)) { 1835 if (!(flags & MSG_TRUNC)) {
1908#ifdef CONFIG_NET_DMA 1836 err = skb_copy_datagram_iovec(skb, offset,
1909 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list) 1837 msg->msg_iov, used);
1910 tp->ucopy.dma_chan = net_dma_find_channel(); 1838 if (err) {
1911 1839 /* Exception. Bailout! */
1912 if (tp->ucopy.dma_chan) { 1840 if (!copied)
1913 tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec( 1841 copied = -EFAULT;
1914 tp->ucopy.dma_chan, skb, offset, 1842 break;
1915 msg->msg_iov, used,
1916 tp->ucopy.pinned_list);
1917
1918 if (tp->ucopy.dma_cookie < 0) {
1919
1920 pr_alert("%s: dma_cookie < 0\n",
1921 __func__);
1922
1923 /* Exception. Bailout! */
1924 if (!copied)
1925 copied = -EFAULT;
1926 break;
1927 }
1928
1929 dma_async_issue_pending(tp->ucopy.dma_chan);
1930
1931 if ((offset + used) == skb->len)
1932 copied_early = true;
1933
1934 } else
1935#endif
1936 {
1937 err = skb_copy_datagram_iovec(skb, offset,
1938 msg->msg_iov, used);
1939 if (err) {
1940 /* Exception. Bailout! */
1941 if (!copied)
1942 copied = -EFAULT;
1943 break;
1944 }
1945 } 1843 }
1946 } 1844 }
1947 1845
@@ -1961,19 +1859,15 @@ skip_copy:
1961 1859
1962 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) 1860 if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
1963 goto found_fin_ok; 1861 goto found_fin_ok;
1964 if (!(flags & MSG_PEEK)) { 1862 if (!(flags & MSG_PEEK))
1965 sk_eat_skb(sk, skb, copied_early); 1863 sk_eat_skb(sk, skb);
1966 copied_early = false;
1967 }
1968 continue; 1864 continue;
1969 1865
1970 found_fin_ok: 1866 found_fin_ok:
1971 /* Process the FIN. */ 1867 /* Process the FIN. */
1972 ++*seq; 1868 ++*seq;
1973 if (!(flags & MSG_PEEK)) { 1869 if (!(flags & MSG_PEEK))
1974 sk_eat_skb(sk, skb, copied_early); 1870 sk_eat_skb(sk, skb);
1975 copied_early = false;
1976 }
1977 break; 1871 break;
1978 } while (len > 0); 1872 } while (len > 0);
1979 1873
@@ -1996,16 +1890,6 @@ skip_copy:
1996 tp->ucopy.len = 0; 1890 tp->ucopy.len = 0;
1997 } 1891 }
1998 1892
1999#ifdef CONFIG_NET_DMA
2000 tcp_service_net_dma(sk, true); /* Wait for queue to drain */
2001 tp->ucopy.dma_chan = NULL;
2002
2003 if (tp->ucopy.pinned_list) {
2004 dma_unpin_iovec_pages(tp->ucopy.pinned_list);
2005 tp->ucopy.pinned_list = NULL;
2006 }
2007#endif
2008
2009 /* According to UNIX98, msg_name/msg_namelen are ignored 1893 /* According to UNIX98, msg_name/msg_namelen are ignored
2010 * on connected socket. I was just happy when found this 8) --ANK 1894 * on connected socket. I was just happy when found this 8) --ANK
2011 */ 1895 */
@@ -2351,9 +2235,6 @@ int tcp_disconnect(struct sock *sk, int flags)
2351 __skb_queue_purge(&sk->sk_receive_queue); 2235 __skb_queue_purge(&sk->sk_receive_queue);
2352 tcp_write_queue_purge(sk); 2236 tcp_write_queue_purge(sk);
2353 __skb_queue_purge(&tp->out_of_order_queue); 2237 __skb_queue_purge(&tp->out_of_order_queue);
2354#ifdef CONFIG_NET_DMA
2355 __skb_queue_purge(&sk->sk_async_wait_queue);
2356#endif
2357 2238
2358 inet->inet_dport = 0; 2239 inet->inet_dport = 0;
2359 2240
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aa38f98b7884..00a41499d52c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -73,7 +73,6 @@
73#include <net/inet_common.h> 73#include <net/inet_common.h>
74#include <linux/ipsec.h> 74#include <linux/ipsec.h>
75#include <asm/unaligned.h> 75#include <asm/unaligned.h>
76#include <net/netdma.h>
77#include <linux/errqueue.h> 76#include <linux/errqueue.h>
78 77
79int sysctl_tcp_timestamps __read_mostly = 1; 78int sysctl_tcp_timestamps __read_mostly = 1;
@@ -4958,53 +4957,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
4958 __tcp_checksum_complete_user(sk, skb); 4957 __tcp_checksum_complete_user(sk, skb);
4959} 4958}
4960 4959
4961#ifdef CONFIG_NET_DMA
4962static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
4963 int hlen)
4964{
4965 struct tcp_sock *tp = tcp_sk(sk);
4966 int chunk = skb->len - hlen;
4967 int dma_cookie;
4968 bool copied_early = false;
4969
4970 if (tp->ucopy.wakeup)
4971 return false;
4972
4973 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
4974 tp->ucopy.dma_chan = net_dma_find_channel();
4975
4976 if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
4977
4978 dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
4979 skb, hlen,
4980 tp->ucopy.iov, chunk,
4981 tp->ucopy.pinned_list);
4982
4983 if (dma_cookie < 0)
4984 goto out;
4985
4986 tp->ucopy.dma_cookie = dma_cookie;
4987 copied_early = true;
4988
4989 tp->ucopy.len -= chunk;
4990 tp->copied_seq += chunk;
4991 tcp_rcv_space_adjust(sk);
4992
4993 if ((tp->ucopy.len == 0) ||
4994 (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
4995 (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
4996 tp->ucopy.wakeup = 1;
4997 sk->sk_data_ready(sk);
4998 }
4999 } else if (chunk > 0) {
5000 tp->ucopy.wakeup = 1;
5001 sk->sk_data_ready(sk);
5002 }
5003out:
5004 return copied_early;
5005}
5006#endif /* CONFIG_NET_DMA */
5007
5008/* Does PAWS and seqno based validation of an incoming segment, flags will 4960/* Does PAWS and seqno based validation of an incoming segment, flags will
5009 * play significant role here. 4961 * play significant role here.
5010 */ 4962 */
@@ -5184,27 +5136,15 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5184 } 5136 }
5185 } else { 5137 } else {
5186 int eaten = 0; 5138 int eaten = 0;
5187 int copied_early = 0;
5188 bool fragstolen = false; 5139 bool fragstolen = false;
5189 5140
5190 if (tp->copied_seq == tp->rcv_nxt && 5141 if (tp->ucopy.task == current &&
5191 len - tcp_header_len <= tp->ucopy.len) { 5142 tp->copied_seq == tp->rcv_nxt &&
5192#ifdef CONFIG_NET_DMA 5143 len - tcp_header_len <= tp->ucopy.len &&
5193 if (tp->ucopy.task == current && 5144 sock_owned_by_user(sk)) {
5194 sock_owned_by_user(sk) && 5145 __set_current_state(TASK_RUNNING);
5195 tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
5196 copied_early = 1;
5197 eaten = 1;
5198 }
5199#endif
5200 if (tp->ucopy.task == current &&
5201 sock_owned_by_user(sk) && !copied_early) {
5202 __set_current_state(TASK_RUNNING);
5203 5146
5204 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) 5147 if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
5205 eaten = 1;
5206 }
5207 if (eaten) {
5208 /* Predicted packet is in window by definition. 5148 /* Predicted packet is in window by definition.
5209 * seq == rcv_nxt and rcv_wup <= rcv_nxt. 5149 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
5210 * Hence, check seq<=rcv_wup reduces to: 5150 * Hence, check seq<=rcv_wup reduces to:
@@ -5220,9 +5160,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5220 __skb_pull(skb, tcp_header_len); 5160 __skb_pull(skb, tcp_header_len);
5221 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; 5161 tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
5222 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER); 5162 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
5163 eaten = 1;
5223 } 5164 }
5224 if (copied_early)
5225 tcp_cleanup_rbuf(sk, skb->len);
5226 } 5165 }
5227 if (!eaten) { 5166 if (!eaten) {
5228 if (tcp_checksum_complete_user(sk, skb)) 5167 if (tcp_checksum_complete_user(sk, skb))
@@ -5259,14 +5198,8 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5259 goto no_ack; 5198 goto no_ack;
5260 } 5199 }
5261 5200
5262 if (!copied_early || tp->rcv_nxt != tp->rcv_wup) 5201 __tcp_ack_snd_check(sk, 0);
5263 __tcp_ack_snd_check(sk, 0);
5264no_ack: 5202no_ack:
5265#ifdef CONFIG_NET_DMA
5266 if (copied_early)
5267 __skb_queue_tail(&sk->sk_async_wait_queue, skb);
5268 else
5269#endif
5270 if (eaten) 5203 if (eaten)
5271 kfree_skb_partial(skb, fragstolen); 5204 kfree_skb_partial(skb, fragstolen);
5272 sk->sk_data_ready(sk); 5205 sk->sk_data_ready(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 9ce3eac02957..552e87e3c269 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,7 +72,6 @@
72#include <net/inet_common.h> 72#include <net/inet_common.h>
73#include <net/timewait_sock.h> 73#include <net/timewait_sock.h>
74#include <net/xfrm.h> 74#include <net/xfrm.h>
75#include <net/netdma.h>
76#include <net/secure_seq.h> 75#include <net/secure_seq.h>
77#include <net/tcp_memcontrol.h> 76#include <net/tcp_memcontrol.h>
78#include <net/busy_poll.h> 77#include <net/busy_poll.h>
@@ -1688,18 +1687,8 @@ process:
1688 bh_lock_sock_nested(sk); 1687 bh_lock_sock_nested(sk);
1689 ret = 0; 1688 ret = 0;
1690 if (!sock_owned_by_user(sk)) { 1689 if (!sock_owned_by_user(sk)) {
1691#ifdef CONFIG_NET_DMA 1690 if (!tcp_prequeue(sk, skb))
1692 struct tcp_sock *tp = tcp_sk(sk);
1693 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1694 tp->ucopy.dma_chan = net_dma_find_channel();
1695 if (tp->ucopy.dma_chan)
1696 ret = tcp_v4_do_rcv(sk, skb); 1691 ret = tcp_v4_do_rcv(sk, skb);
1697 else
1698#endif
1699 {
1700 if (!tcp_prequeue(sk, skb))
1701 ret = tcp_v4_do_rcv(sk, skb);
1702 }
1703 } else if (unlikely(sk_add_backlog(sk, skb, 1692 } else if (unlikely(sk_add_backlog(sk, skb,
1704 sk->sk_rcvbuf + sk->sk_sndbuf))) { 1693 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1705 bh_unlock_sock(sk); 1694 bh_unlock_sock(sk);
@@ -1861,11 +1850,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
1861 } 1850 }
1862#endif 1851#endif
1863 1852
1864#ifdef CONFIG_NET_DMA
1865 /* Cleans up our sk_async_wait_queue */
1866 __skb_queue_purge(&sk->sk_async_wait_queue);
1867#endif
1868
1869 /* Clean prequeue, it must be empty really */ 1853 /* Clean prequeue, it must be empty really */
1870 __skb_queue_purge(&tp->ucopy.prequeue); 1854 __skb_queue_purge(&tp->ucopy.prequeue);
1871 1855
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 132bac137aed..cf2e45ab2fa4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -59,7 +59,6 @@
59#include <net/snmp.h> 59#include <net/snmp.h>
60#include <net/dsfield.h> 60#include <net/dsfield.h>
61#include <net/timewait_sock.h> 61#include <net/timewait_sock.h>
62#include <net/netdma.h>
63#include <net/inet_common.h> 62#include <net/inet_common.h>
64#include <net/secure_seq.h> 63#include <net/secure_seq.h>
65#include <net/tcp_memcontrol.h> 64#include <net/tcp_memcontrol.h>
@@ -1458,18 +1457,8 @@ process:
1458 bh_lock_sock_nested(sk); 1457 bh_lock_sock_nested(sk);
1459 ret = 0; 1458 ret = 0;
1460 if (!sock_owned_by_user(sk)) { 1459 if (!sock_owned_by_user(sk)) {
1461#ifdef CONFIG_NET_DMA 1460 if (!tcp_prequeue(sk, skb))
1462 struct tcp_sock *tp = tcp_sk(sk);
1463 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1464 tp->ucopy.dma_chan = net_dma_find_channel();
1465 if (tp->ucopy.dma_chan)
1466 ret = tcp_v6_do_rcv(sk, skb); 1461 ret = tcp_v6_do_rcv(sk, skb);
1467 else
1468#endif
1469 {
1470 if (!tcp_prequeue(sk, skb))
1471 ret = tcp_v6_do_rcv(sk, skb);
1472 }
1473 } else if (unlikely(sk_add_backlog(sk, skb, 1462 } else if (unlikely(sk_add_backlog(sk, skb,
1474 sk->sk_rcvbuf + sk->sk_sndbuf))) { 1463 sk->sk_rcvbuf + sk->sk_sndbuf))) {
1475 bh_unlock_sock(sk); 1464 bh_unlock_sock(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0080d2b0a8ae..bb9cbc17d926 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -839,7 +839,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
839 839
840 if (!(flags & MSG_PEEK)) { 840 if (!(flags & MSG_PEEK)) {
841 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); 841 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
842 sk_eat_skb(sk, skb, false); 842 sk_eat_skb(sk, skb);
843 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); 843 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
844 *seq = 0; 844 *seq = 0;
845 } 845 }
@@ -861,10 +861,10 @@ copy_uaddr:
861 llc_cmsg_rcv(msg, skb); 861 llc_cmsg_rcv(msg, skb);
862 862
863 if (!(flags & MSG_PEEK)) { 863 if (!(flags & MSG_PEEK)) {
864 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags); 864 spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
865 sk_eat_skb(sk, skb, false); 865 sk_eat_skb(sk, skb);
866 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags); 866 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
867 *seq = 0; 867 *seq = 0;
868 } 868 }
869 869
870 goto out; 870 goto out;
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index b7961129ce4d..4ab86a57dca5 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -90,7 +90,7 @@ mac802154_wpan_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
90 } 90 }
91 case SIOCSIFADDR: 91 case SIOCSIFADDR:
92 dev_warn(&dev->dev, 92 dev_warn(&dev->dev,
93 "Using DEBUGing ioctl SIOCSIFADDR isn't recommened!\n"); 93 "Using DEBUGing ioctl SIOCSIFADDR isn't recommended!\n");
94 if (sa->family != AF_IEEE802154 || 94 if (sa->family != AF_IEEE802154 ||
95 sa->addr.addr_type != IEEE802154_ADDR_SHORT || 95 sa->addr.addr_type != IEEE802154_ADDR_SHORT ||
96 sa->addr.pan_id == IEEE802154_PANID_BROADCAST || 96 sa->addr.pan_id == IEEE802154_PANID_BROADCAST ||
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 26c795e6b57f..912e5a05b79d 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -502,7 +502,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
502 502
503 if (ret == -EAGAIN) { 503 if (ret == -EAGAIN) {
504 /* Type requests element to be completed */ 504 /* Type requests element to be completed */
505 pr_debug("element must be competed, ADD is triggered\n"); 505 pr_debug("element must be completed, ADD is triggered\n");
506 write_lock_bh(&set->lock); 506 write_lock_bh(&set->lock);
507 set->variant->kadt(set, skb, par, IPSET_ADD, opt); 507 set->variant->kadt(set, skb, par, IPSET_ADD, opt);
508 write_unlock_bh(&set->lock); 508 write_unlock_bh(&set->lock);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 424ff622ab5f..10443377fb9d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -83,7 +83,7 @@ static int rds_release(struct socket *sock)
83 83
84 /* 84 /*
85 * the binding lookup hash uses rcu, we need to 85 * the binding lookup hash uses rcu, we need to
86 * make sure we sychronize_rcu before we free our 86 * make sure we synchronize_rcu before we free our
87 * entry 87 * entry
88 */ 88 */
89 rds_remove_bound(rs); 89 rds_remove_bound(rs);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 488ddeed9363..9acd6ce88db7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -461,6 +461,8 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
461 461
462 if (args->flags & RPC_CLNT_CREATE_AUTOBIND) 462 if (args->flags & RPC_CLNT_CREATE_AUTOBIND)
463 clnt->cl_autobind = 1; 463 clnt->cl_autobind = 1;
464 if (args->flags & RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT)
465 clnt->cl_noretranstimeo = 1;
464 if (args->flags & RPC_CLNT_CREATE_DISCRTRY) 466 if (args->flags & RPC_CLNT_CREATE_DISCRTRY)
465 clnt->cl_discrtry = 1; 467 clnt->cl_discrtry = 1;
466 if (!(args->flags & RPC_CLNT_CREATE_QUIET)) 468 if (!(args->flags & RPC_CLNT_CREATE_QUIET))
@@ -579,6 +581,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
579 /* Turn off autobind on clones */ 581 /* Turn off autobind on clones */
580 new->cl_autobind = 0; 582 new->cl_autobind = 0;
581 new->cl_softrtry = clnt->cl_softrtry; 583 new->cl_softrtry = clnt->cl_softrtry;
584 new->cl_noretranstimeo = clnt->cl_noretranstimeo;
582 new->cl_discrtry = clnt->cl_discrtry; 585 new->cl_discrtry = clnt->cl_discrtry;
583 new->cl_chatty = clnt->cl_chatty; 586 new->cl_chatty = clnt->cl_chatty;
584 return new; 587 return new;
@@ -1913,6 +1916,7 @@ call_transmit_status(struct rpc_task *task)
1913 case -EHOSTDOWN: 1916 case -EHOSTDOWN:
1914 case -EHOSTUNREACH: 1917 case -EHOSTUNREACH:
1915 case -ENETUNREACH: 1918 case -ENETUNREACH:
1919 case -EPERM:
1916 if (RPC_IS_SOFTCONN(task)) { 1920 if (RPC_IS_SOFTCONN(task)) {
1917 xprt_end_transmit(task); 1921 xprt_end_transmit(task);
1918 rpc_exit(task, task->tk_status); 1922 rpc_exit(task, task->tk_status);
@@ -2018,6 +2022,7 @@ call_status(struct rpc_task *task)
2018 case -EHOSTDOWN: 2022 case -EHOSTDOWN:
2019 case -EHOSTUNREACH: 2023 case -EHOSTUNREACH:
2020 case -ENETUNREACH: 2024 case -ENETUNREACH:
2025 case -EPERM:
2021 if (RPC_IS_SOFTCONN(task)) { 2026 if (RPC_IS_SOFTCONN(task)) {
2022 rpc_exit(task, status); 2027 rpc_exit(task, status);
2023 break; 2028 break;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 9358c79fd589..fe3441abdbe5 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -821,9 +821,7 @@ void rpc_execute(struct rpc_task *task)
821 821
822static void rpc_async_schedule(struct work_struct *work) 822static void rpc_async_schedule(struct work_struct *work)
823{ 823{
824 current->flags |= PF_FSTRANS;
825 __rpc_execute(container_of(work, struct rpc_task, u.tk_work)); 824 __rpc_execute(container_of(work, struct rpc_task, u.tk_work));
826 current->flags &= ~PF_FSTRANS;
827} 825}
828 826
829/** 827/**
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1db5007ddbce..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
612 if (!rqstp) 612 if (!rqstp)
613 goto out_enomem; 613 goto out_enomem;
614 614
615 init_waitqueue_head(&rqstp->rq_wait);
616
617 serv->sv_nrthreads++; 615 serv->sv_nrthreads++;
618 spin_lock_bh(&pool->sp_lock); 616 spin_lock_bh(&pool->sp_lock);
619 pool->sp_nrthreads++; 617 pool->sp_nrthreads++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 6666c6745858..c179ca2a5aa4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -346,20 +346,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
346 if (!svc_xprt_has_something_to_do(xprt)) 346 if (!svc_xprt_has_something_to_do(xprt))
347 return; 347 return;
348 348
349 cpu = get_cpu();
350 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
351 put_cpu();
352
353 spin_lock_bh(&pool->sp_lock);
354
355 if (!list_empty(&pool->sp_threads) &&
356 !list_empty(&pool->sp_sockets))
357 printk(KERN_ERR
358 "svc_xprt_enqueue: "
359 "threads and transports both waiting??\n");
360
361 pool->sp_stats.packets++;
362
363 /* Mark transport as busy. It will remain in this state until 349 /* Mark transport as busy. It will remain in this state until
364 * the provider calls svc_xprt_received. We update XPT_BUSY 350 * the provider calls svc_xprt_received. We update XPT_BUSY
365 * atomically because it also guards against trying to enqueue 351 * atomically because it also guards against trying to enqueue
@@ -368,9 +354,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
368 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) { 354 if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) {
369 /* Don't enqueue transport while already enqueued */ 355 /* Don't enqueue transport while already enqueued */
370 dprintk("svc: transport %p busy, not enqueued\n", xprt); 356 dprintk("svc: transport %p busy, not enqueued\n", xprt);
371 goto out_unlock; 357 return;
372 } 358 }
373 359
360 cpu = get_cpu();
361 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
362 spin_lock_bh(&pool->sp_lock);
363
364 pool->sp_stats.packets++;
365
374 if (!list_empty(&pool->sp_threads)) { 366 if (!list_empty(&pool->sp_threads)) {
375 rqstp = list_entry(pool->sp_threads.next, 367 rqstp = list_entry(pool->sp_threads.next,
376 struct svc_rqst, 368 struct svc_rqst,
@@ -382,18 +374,23 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
382 printk(KERN_ERR 374 printk(KERN_ERR
383 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n", 375 "svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
384 rqstp, rqstp->rq_xprt); 376 rqstp, rqstp->rq_xprt);
385 rqstp->rq_xprt = xprt; 377 /* Note the order of the following 3 lines:
378 * We want to assign xprt to rqstp->rq_xprt only _after_
379 * we've woken up the process, so that we don't race with
380 * the lockless check in svc_get_next_xprt().
381 */
386 svc_xprt_get(xprt); 382 svc_xprt_get(xprt);
383 wake_up_process(rqstp->rq_task);
384 rqstp->rq_xprt = xprt;
387 pool->sp_stats.threads_woken++; 385 pool->sp_stats.threads_woken++;
388 wake_up(&rqstp->rq_wait);
389 } else { 386 } else {
390 dprintk("svc: transport %p put into queue\n", xprt); 387 dprintk("svc: transport %p put into queue\n", xprt);
391 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 388 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
392 pool->sp_stats.sockets_queued++; 389 pool->sp_stats.sockets_queued++;
393 } 390 }
394 391
395out_unlock:
396 spin_unlock_bh(&pool->sp_lock); 392 spin_unlock_bh(&pool->sp_lock);
393 put_cpu();
397} 394}
398 395
399/* 396/*
@@ -509,7 +506,7 @@ void svc_wake_up(struct svc_serv *serv)
509 svc_thread_dequeue(pool, rqstp); 506 svc_thread_dequeue(pool, rqstp);
510 rqstp->rq_xprt = NULL; 507 rqstp->rq_xprt = NULL;
511 */ 508 */
512 wake_up(&rqstp->rq_wait); 509 wake_up_process(rqstp->rq_task);
513 } else 510 } else
514 pool->sp_task_pending = 1; 511 pool->sp_task_pending = 1;
515 spin_unlock_bh(&pool->sp_lock); 512 spin_unlock_bh(&pool->sp_lock);
@@ -628,8 +625,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
628{ 625{
629 struct svc_xprt *xprt; 626 struct svc_xprt *xprt;
630 struct svc_pool *pool = rqstp->rq_pool; 627 struct svc_pool *pool = rqstp->rq_pool;
631 DECLARE_WAITQUEUE(wait, current); 628 long time_left = 0;
632 long time_left;
633 629
634 /* Normally we will wait up to 5 seconds for any required 630 /* Normally we will wait up to 5 seconds for any required
635 * cache information to be provided. 631 * cache information to be provided.
@@ -651,40 +647,32 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
651 } else { 647 } else {
652 if (pool->sp_task_pending) { 648 if (pool->sp_task_pending) {
653 pool->sp_task_pending = 0; 649 pool->sp_task_pending = 0;
654 spin_unlock_bh(&pool->sp_lock); 650 xprt = ERR_PTR(-EAGAIN);
655 return ERR_PTR(-EAGAIN); 651 goto out;
656 } 652 }
657 /* No data pending. Go to sleep */
658 svc_thread_enqueue(pool, rqstp);
659
660 /* 653 /*
661 * We have to be able to interrupt this wait 654 * We have to be able to interrupt this wait
662 * to bring down the daemons ... 655 * to bring down the daemons ...
663 */ 656 */
664 set_current_state(TASK_INTERRUPTIBLE); 657 set_current_state(TASK_INTERRUPTIBLE);
665 658
666 /* 659 /* No data pending. Go to sleep */
667 * checking kthread_should_stop() here allows us to avoid 660 svc_thread_enqueue(pool, rqstp);
668 * locking and signalling when stopping kthreads that call
669 * svc_recv. If the thread has already been woken up, then
670 * we can exit here without sleeping. If not, then it
671 * it'll be woken up quickly during the schedule_timeout
672 */
673 if (kthread_should_stop()) {
674 set_current_state(TASK_RUNNING);
675 spin_unlock_bh(&pool->sp_lock);
676 return ERR_PTR(-EINTR);
677 }
678
679 add_wait_queue(&rqstp->rq_wait, &wait);
680 spin_unlock_bh(&pool->sp_lock); 661 spin_unlock_bh(&pool->sp_lock);
681 662
682 time_left = schedule_timeout(timeout); 663 if (!(signalled() || kthread_should_stop())) {
664 time_left = schedule_timeout(timeout);
665 __set_current_state(TASK_RUNNING);
683 666
684 try_to_freeze(); 667 try_to_freeze();
668
669 xprt = rqstp->rq_xprt;
670 if (xprt != NULL)
671 return xprt;
672 } else
673 __set_current_state(TASK_RUNNING);
685 674
686 spin_lock_bh(&pool->sp_lock); 675 spin_lock_bh(&pool->sp_lock);
687 remove_wait_queue(&rqstp->rq_wait, &wait);
688 if (!time_left) 676 if (!time_left)
689 pool->sp_stats.threads_timedout++; 677 pool->sp_stats.threads_timedout++;
690 678
@@ -699,6 +687,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
699 return ERR_PTR(-EAGAIN); 687 return ERR_PTR(-EAGAIN);
700 } 688 }
701 } 689 }
690out:
702 spin_unlock_bh(&pool->sp_lock); 691 spin_unlock_bh(&pool->sp_lock);
703 return xprt; 692 return xprt;
704} 693}
@@ -744,7 +733,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
744 svc_add_new_temp_xprt(serv, newxpt); 733 svc_add_new_temp_xprt(serv, newxpt);
745 else 734 else
746 module_put(xprt->xpt_class->xcl_owner); 735 module_put(xprt->xpt_class->xcl_owner);
747 } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { 736 } else {
748 /* XPT_DATA|XPT_DEFERRED case: */ 737 /* XPT_DATA|XPT_DEFERRED case: */
749 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", 738 dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
750 rqstp, rqstp->rq_pool->sp_id, xprt, 739 rqstp, rqstp->rq_pool->sp_id, xprt,
@@ -781,10 +770,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
781 printk(KERN_ERR 770 printk(KERN_ERR
782 "svc_recv: service %p, transport not NULL!\n", 771 "svc_recv: service %p, transport not NULL!\n",
783 rqstp); 772 rqstp);
784 if (waitqueue_active(&rqstp->rq_wait))
785 printk(KERN_ERR
786 "svc_recv: service %p, wait queue active!\n",
787 rqstp);
788 773
789 err = svc_alloc_arg(rqstp); 774 err = svc_alloc_arg(rqstp);
790 if (err) 775 if (err)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c24a8ff33f8f..3f959c681885 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -312,19 +312,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
312} 312}
313 313
314/* 314/*
315 * Check input queue length
316 */
317static int svc_recv_available(struct svc_sock *svsk)
318{
319 struct socket *sock = svsk->sk_sock;
320 int avail, err;
321
322 err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail);
323
324 return (err >= 0)? avail : err;
325}
326
327/*
328 * Generic recvfrom routine. 315 * Generic recvfrom routine.
329 */ 316 */
330static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, 317static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
@@ -339,8 +326,14 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
339 326
340 rqstp->rq_xprt_hlen = 0; 327 rqstp->rq_xprt_hlen = 0;
341 328
329 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
342 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen, 330 len = kernel_recvmsg(svsk->sk_sock, &msg, iov, nr, buflen,
343 msg.msg_flags); 331 msg.msg_flags);
332 /* If we read a full record, then assume there may be more
333 * data to read (stream based sockets only!)
334 */
335 if (len == buflen)
336 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
344 337
345 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", 338 dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n",
346 svsk, iov[0].iov_base, iov[0].iov_len, len); 339 svsk, iov[0].iov_base, iov[0].iov_len, len);
@@ -980,8 +973,6 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
980 unsigned int want; 973 unsigned int want;
981 int len; 974 int len;
982 975
983 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
984
985 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { 976 if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
986 struct kvec iov; 977 struct kvec iov;
987 978
@@ -1036,7 +1027,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
1036 "%s: Got unrecognized reply: " 1027 "%s: Got unrecognized reply: "
1037 "calldir 0x%x xpt_bc_xprt %p xid %08x\n", 1028 "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
1038 __func__, ntohl(calldir), 1029 __func__, ntohl(calldir),
1039 bc_xprt, xid); 1030 bc_xprt, ntohl(xid));
1040 return -EAGAIN; 1031 return -EAGAIN;
1041 } 1032 }
1042 1033
@@ -1073,8 +1064,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
1073static void svc_tcp_fragment_received(struct svc_sock *svsk) 1064static void svc_tcp_fragment_received(struct svc_sock *svsk)
1074{ 1065{
1075 /* If we have more data, signal svc_xprt_enqueue() to try again */ 1066 /* If we have more data, signal svc_xprt_enqueue() to try again */
1076 if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
1077 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1078 dprintk("svc: TCP %s record (%d bytes)\n", 1067 dprintk("svc: TCP %s record (%d bytes)\n",
1079 svc_sock_final_rec(svsk) ? "final" : "nonfinal", 1068 svc_sock_final_rec(svsk) ? "final" : "nonfinal",
1080 svc_sock_reclen(svsk)); 1069 svc_sock_reclen(svsk));
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 374feb44afea..4e618808bc98 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -91,7 +91,7 @@ struct svc_xprt_class svc_rdma_class = {
91 .xcl_name = "rdma", 91 .xcl_name = "rdma",
92 .xcl_owner = THIS_MODULE, 92 .xcl_owner = THIS_MODULE,
93 .xcl_ops = &svc_rdma_ops, 93 .xcl_ops = &svc_rdma_ops,
94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, 94 .xcl_max_payload = RPCSVC_MAXPAYLOAD_RDMA,
95 .xcl_ident = XPRT_TRANSPORT_RDMA, 95 .xcl_ident = XPRT_TRANSPORT_RDMA,
96}; 96};
97 97
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 2faac4940563..6a4615dd0261 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -205,7 +205,6 @@ xprt_rdma_connect_worker(struct work_struct *work)
205 struct rpc_xprt *xprt = &r_xprt->xprt; 205 struct rpc_xprt *xprt = &r_xprt->xprt;
206 int rc = 0; 206 int rc = 0;
207 207
208 current->flags |= PF_FSTRANS;
209 xprt_clear_connected(xprt); 208 xprt_clear_connected(xprt);
210 209
211 dprintk("RPC: %s: %sconnect\n", __func__, 210 dprintk("RPC: %s: %sconnect\n", __func__,
@@ -216,7 +215,6 @@ xprt_rdma_connect_worker(struct work_struct *work)
216 215
217 dprintk("RPC: %s: exit\n", __func__); 216 dprintk("RPC: %s: exit\n", __func__);
218 xprt_clear_connecting(xprt); 217 xprt_clear_connecting(xprt);
219 current->flags &= ~PF_FSTRANS;
220} 218}
221 219
222/* 220/*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index c419498b8f46..ac7fc9a31342 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -51,6 +51,7 @@
51#include <linux/sunrpc/clnt.h> /* rpc_xprt */ 51#include <linux/sunrpc/clnt.h> /* rpc_xprt */
52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ 52#include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */
53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ 53#include <linux/sunrpc/xprtrdma.h> /* xprt parameters */
54#include <linux/sunrpc/svc.h> /* RPCSVC_MAXPAYLOAD */
54 55
55#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ 56#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */
56#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ 57#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */
@@ -392,4 +393,10 @@ extern struct kmem_cache *svc_rdma_ctxt_cachep;
392/* Workqueue created in svc_rdma.c */ 393/* Workqueue created in svc_rdma.c */
393extern struct workqueue_struct *svc_rdma_wq; 394extern struct workqueue_struct *svc_rdma_wq;
394 395
396#if RPCSVC_MAXPAYLOAD < (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
397#define RPCSVC_MAXPAYLOAD_RDMA RPCSVC_MAXPAYLOAD
398#else
399#define RPCSVC_MAXPAYLOAD_RDMA (RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT)
400#endif
401
395#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ 402#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 43cd89eacfab..3b305ab17afe 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -399,13 +399,13 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
399 return kernel_sendmsg(sock, &msg, NULL, 0, 0); 399 return kernel_sendmsg(sock, &msg, NULL, 0, 0);
400} 400}
401 401
402static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy) 402static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy, int *sent_p)
403{ 403{
404 ssize_t (*do_sendpage)(struct socket *sock, struct page *page, 404 ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
405 int offset, size_t size, int flags); 405 int offset, size_t size, int flags);
406 struct page **ppage; 406 struct page **ppage;
407 unsigned int remainder; 407 unsigned int remainder;
408 int err, sent = 0; 408 int err;
409 409
410 remainder = xdr->page_len - base; 410 remainder = xdr->page_len - base;
411 base += xdr->page_base; 411 base += xdr->page_base;
@@ -424,15 +424,15 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
424 err = do_sendpage(sock, *ppage, base, len, flags); 424 err = do_sendpage(sock, *ppage, base, len, flags);
425 if (remainder == 0 || err != len) 425 if (remainder == 0 || err != len)
426 break; 426 break;
427 sent += err; 427 *sent_p += err;
428 ppage++; 428 ppage++;
429 base = 0; 429 base = 0;
430 } 430 }
431 if (sent == 0) 431 if (err > 0) {
432 return err; 432 *sent_p += err;
433 if (err > 0) 433 err = 0;
434 sent += err; 434 }
435 return sent; 435 return err;
436} 436}
437 437
438/** 438/**
@@ -443,12 +443,14 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
443 * @xdr: buffer containing this request 443 * @xdr: buffer containing this request
444 * @base: starting position in the buffer 444 * @base: starting position in the buffer
445 * @zerocopy: true if it is safe to use sendpage() 445 * @zerocopy: true if it is safe to use sendpage()
446 * @sent_p: return the total number of bytes successfully queued for sending
446 * 447 *
447 */ 448 */
448static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy) 449static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy, int *sent_p)
449{ 450{
450 unsigned int remainder = xdr->len - base; 451 unsigned int remainder = xdr->len - base;
451 int err, sent = 0; 452 int err = 0;
453 int sent = 0;
452 454
453 if (unlikely(!sock)) 455 if (unlikely(!sock))
454 return -ENOTSOCK; 456 return -ENOTSOCK;
@@ -465,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
465 err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0); 467 err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
466 if (remainder == 0 || err != len) 468 if (remainder == 0 || err != len)
467 goto out; 469 goto out;
468 sent += err; 470 *sent_p += err;
469 base = 0; 471 base = 0;
470 } else 472 } else
471 base -= xdr->head[0].iov_len; 473 base -= xdr->head[0].iov_len;
@@ -473,23 +475,23 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
473 if (base < xdr->page_len) { 475 if (base < xdr->page_len) {
474 unsigned int len = xdr->page_len - base; 476 unsigned int len = xdr->page_len - base;
475 remainder -= len; 477 remainder -= len;
476 err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy); 478 err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy, &sent);
477 if (remainder == 0 || err != len) 479 *sent_p += sent;
480 if (remainder == 0 || sent != len)
478 goto out; 481 goto out;
479 sent += err;
480 base = 0; 482 base = 0;
481 } else 483 } else
482 base -= xdr->page_len; 484 base -= xdr->page_len;
483 485
484 if (base >= xdr->tail[0].iov_len) 486 if (base >= xdr->tail[0].iov_len)
485 return sent; 487 return 0;
486 err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0); 488 err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
487out: 489out:
488 if (sent == 0) 490 if (err > 0) {
489 return err; 491 *sent_p += err;
490 if (err > 0) 492 err = 0;
491 sent += err; 493 }
492 return sent; 494 return err;
493} 495}
494 496
495static void xs_nospace_callback(struct rpc_task *task) 497static void xs_nospace_callback(struct rpc_task *task)
@@ -573,19 +575,20 @@ static int xs_local_send_request(struct rpc_task *task)
573 container_of(xprt, struct sock_xprt, xprt); 575 container_of(xprt, struct sock_xprt, xprt);
574 struct xdr_buf *xdr = &req->rq_snd_buf; 576 struct xdr_buf *xdr = &req->rq_snd_buf;
575 int status; 577 int status;
578 int sent = 0;
576 579
577 xs_encode_stream_record_marker(&req->rq_snd_buf); 580 xs_encode_stream_record_marker(&req->rq_snd_buf);
578 581
579 xs_pktdump("packet data:", 582 xs_pktdump("packet data:",
580 req->rq_svec->iov_base, req->rq_svec->iov_len); 583 req->rq_svec->iov_base, req->rq_svec->iov_len);
581 584
582 status = xs_sendpages(transport->sock, NULL, 0, 585 status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
583 xdr, req->rq_bytes_sent, true); 586 true, &sent);
584 dprintk("RPC: %s(%u) = %d\n", 587 dprintk("RPC: %s(%u) = %d\n",
585 __func__, xdr->len - req->rq_bytes_sent, status); 588 __func__, xdr->len - req->rq_bytes_sent, status);
586 if (likely(status >= 0)) { 589 if (likely(sent > 0) || status == 0) {
587 req->rq_bytes_sent += status; 590 req->rq_bytes_sent += sent;
588 req->rq_xmit_bytes_sent += status; 591 req->rq_xmit_bytes_sent += sent;
589 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 592 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
590 req->rq_bytes_sent = 0; 593 req->rq_bytes_sent = 0;
591 return 0; 594 return 0;
@@ -626,6 +629,7 @@ static int xs_udp_send_request(struct rpc_task *task)
626 struct rpc_xprt *xprt = req->rq_xprt; 629 struct rpc_xprt *xprt = req->rq_xprt;
627 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 630 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
628 struct xdr_buf *xdr = &req->rq_snd_buf; 631 struct xdr_buf *xdr = &req->rq_snd_buf;
632 int sent = 0;
629 int status; 633 int status;
630 634
631 xs_pktdump("packet data:", 635 xs_pktdump("packet data:",
@@ -634,22 +638,25 @@ static int xs_udp_send_request(struct rpc_task *task)
634 638
635 if (!xprt_bound(xprt)) 639 if (!xprt_bound(xprt))
636 return -ENOTCONN; 640 return -ENOTCONN;
637 status = xs_sendpages(transport->sock, 641 status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
638 xs_addr(xprt), 642 xdr, req->rq_bytes_sent, true, &sent);
639 xprt->addrlen, xdr,
640 req->rq_bytes_sent, true);
641 643
642 dprintk("RPC: xs_udp_send_request(%u) = %d\n", 644 dprintk("RPC: xs_udp_send_request(%u) = %d\n",
643 xdr->len - req->rq_bytes_sent, status); 645 xdr->len - req->rq_bytes_sent, status);
644 646
645 if (status >= 0) { 647 /* firewall is blocking us, don't return -EAGAIN or we end up looping */
646 req->rq_xmit_bytes_sent += status; 648 if (status == -EPERM)
647 if (status >= req->rq_slen) 649 goto process_status;
650
651 if (sent > 0 || status == 0) {
652 req->rq_xmit_bytes_sent += sent;
653 if (sent >= req->rq_slen)
648 return 0; 654 return 0;
649 /* Still some bytes left; set up for a retry later. */ 655 /* Still some bytes left; set up for a retry later. */
650 status = -EAGAIN; 656 status = -EAGAIN;
651 } 657 }
652 658
659process_status:
653 switch (status) { 660 switch (status) {
654 case -ENOTSOCK: 661 case -ENOTSOCK:
655 status = -ENOTCONN; 662 status = -ENOTCONN;
@@ -665,6 +672,7 @@ static int xs_udp_send_request(struct rpc_task *task)
665 case -ENOBUFS: 672 case -ENOBUFS:
666 case -EPIPE: 673 case -EPIPE:
667 case -ECONNREFUSED: 674 case -ECONNREFUSED:
675 case -EPERM:
668 /* When the server has died, an ICMP port unreachable message 676 /* When the server has died, an ICMP port unreachable message
669 * prompts ECONNREFUSED. */ 677 * prompts ECONNREFUSED. */
670 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 678 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
@@ -713,6 +721,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
713 struct xdr_buf *xdr = &req->rq_snd_buf; 721 struct xdr_buf *xdr = &req->rq_snd_buf;
714 bool zerocopy = true; 722 bool zerocopy = true;
715 int status; 723 int status;
724 int sent;
716 725
717 xs_encode_stream_record_marker(&req->rq_snd_buf); 726 xs_encode_stream_record_marker(&req->rq_snd_buf);
718 727
@@ -730,26 +739,26 @@ static int xs_tcp_send_request(struct rpc_task *task)
730 * to cope with writespace callbacks arriving _after_ we have 739 * to cope with writespace callbacks arriving _after_ we have
731 * called sendmsg(). */ 740 * called sendmsg(). */
732 while (1) { 741 while (1) {
733 status = xs_sendpages(transport->sock, 742 sent = 0;
734 NULL, 0, xdr, req->rq_bytes_sent, 743 status = xs_sendpages(transport->sock, NULL, 0, xdr,
735 zerocopy); 744 req->rq_bytes_sent, zerocopy, &sent);
736 745
737 dprintk("RPC: xs_tcp_send_request(%u) = %d\n", 746 dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
738 xdr->len - req->rq_bytes_sent, status); 747 xdr->len - req->rq_bytes_sent, status);
739 748
740 if (unlikely(status < 0)) 749 if (unlikely(sent == 0 && status < 0))
741 break; 750 break;
742 751
743 /* If we've sent the entire packet, immediately 752 /* If we've sent the entire packet, immediately
744 * reset the count of bytes sent. */ 753 * reset the count of bytes sent. */
745 req->rq_bytes_sent += status; 754 req->rq_bytes_sent += sent;
746 req->rq_xmit_bytes_sent += status; 755 req->rq_xmit_bytes_sent += sent;
747 if (likely(req->rq_bytes_sent >= req->rq_slen)) { 756 if (likely(req->rq_bytes_sent >= req->rq_slen)) {
748 req->rq_bytes_sent = 0; 757 req->rq_bytes_sent = 0;
749 return 0; 758 return 0;
750 } 759 }
751 760
752 if (status != 0) 761 if (sent != 0)
753 continue; 762 continue;
754 status = -EAGAIN; 763 status = -EAGAIN;
755 break; 764 break;
@@ -845,6 +854,8 @@ static void xs_error_report(struct sock *sk)
845 dprintk("RPC: xs_error_report client %p, error=%d...\n", 854 dprintk("RPC: xs_error_report client %p, error=%d...\n",
846 xprt, -err); 855 xprt, -err);
847 trace_rpc_socket_error(xprt, sk->sk_socket, err); 856 trace_rpc_socket_error(xprt, sk->sk_socket, err);
857 if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
858 goto out;
848 xprt_wake_pending_tasks(xprt, err); 859 xprt_wake_pending_tasks(xprt, err);
849 out: 860 out:
850 read_unlock_bh(&sk->sk_callback_lock); 861 read_unlock_bh(&sk->sk_callback_lock);
@@ -1746,13 +1757,29 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
1746 unsigned short port = xs_get_srcport(transport); 1757 unsigned short port = xs_get_srcport(transport);
1747 unsigned short last; 1758 unsigned short last;
1748 1759
1760 /*
1761 * If we are asking for any ephemeral port (i.e. port == 0 &&
1762 * transport->xprt.resvport == 0), don't bind. Let the local
1763 * port selection happen implicitly when the socket is used
1764 * (for example at connect time).
1765 *
1766 * This ensures that we can continue to establish TCP
1767 * connections even when all local ephemeral ports are already
1768 * a part of some TCP connection. This makes no difference
1769 * for UDP sockets, but also doens't harm them.
1770 *
1771 * If we're asking for any reserved port (i.e. port == 0 &&
1772 * transport->xprt.resvport == 1) xs_get_srcport above will
1773 * ensure that port is non-zero and we will bind as needed.
1774 */
1775 if (port == 0)
1776 return 0;
1777
1749 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen); 1778 memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
1750 do { 1779 do {
1751 rpc_set_port((struct sockaddr *)&myaddr, port); 1780 rpc_set_port((struct sockaddr *)&myaddr, port);
1752 err = kernel_bind(sock, (struct sockaddr *)&myaddr, 1781 err = kernel_bind(sock, (struct sockaddr *)&myaddr,
1753 transport->xprt.addrlen); 1782 transport->xprt.addrlen);
1754 if (port == 0)
1755 break;
1756 if (err == 0) { 1783 if (err == 0) {
1757 transport->srcport = port; 1784 transport->srcport = port;
1758 break; 1785 break;
@@ -1927,8 +1954,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1927 struct socket *sock; 1954 struct socket *sock;
1928 int status = -EIO; 1955 int status = -EIO;
1929 1956
1930 current->flags |= PF_FSTRANS;
1931
1932 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1957 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1933 status = __sock_create(xprt->xprt_net, AF_LOCAL, 1958 status = __sock_create(xprt->xprt_net, AF_LOCAL,
1934 SOCK_STREAM, 0, &sock, 1); 1959 SOCK_STREAM, 0, &sock, 1);
@@ -1968,7 +1993,6 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
1968out: 1993out:
1969 xprt_clear_connecting(xprt); 1994 xprt_clear_connecting(xprt);
1970 xprt_wake_pending_tasks(xprt, status); 1995 xprt_wake_pending_tasks(xprt, status);
1971 current->flags &= ~PF_FSTRANS;
1972 return status; 1996 return status;
1973} 1997}
1974 1998
@@ -2071,8 +2095,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
2071 struct socket *sock = transport->sock; 2095 struct socket *sock = transport->sock;
2072 int status = -EIO; 2096 int status = -EIO;
2073 2097
2074 current->flags |= PF_FSTRANS;
2075
2076 /* Start by resetting any existing state */ 2098 /* Start by resetting any existing state */
2077 xs_reset_transport(transport); 2099 xs_reset_transport(transport);
2078 sock = xs_create_sock(xprt, transport, 2100 sock = xs_create_sock(xprt, transport,
@@ -2092,7 +2114,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
2092out: 2114out:
2093 xprt_clear_connecting(xprt); 2115 xprt_clear_connecting(xprt);
2094 xprt_wake_pending_tasks(xprt, status); 2116 xprt_wake_pending_tasks(xprt, status);
2095 current->flags &= ~PF_FSTRANS;
2096} 2117}
2097 2118
2098/* 2119/*
@@ -2229,8 +2250,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2229 struct rpc_xprt *xprt = &transport->xprt; 2250 struct rpc_xprt *xprt = &transport->xprt;
2230 int status = -EIO; 2251 int status = -EIO;
2231 2252
2232 current->flags |= PF_FSTRANS;
2233
2234 if (!sock) { 2253 if (!sock) {
2235 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 2254 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
2236 sock = xs_create_sock(xprt, transport, 2255 sock = xs_create_sock(xprt, transport,
@@ -2245,7 +2264,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2245 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, 2264 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
2246 &xprt->state); 2265 &xprt->state);
2247 /* "close" the socket, preserving the local port */ 2266 /* "close" the socket, preserving the local port */
2267 set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
2248 xs_tcp_reuse_connection(transport); 2268 xs_tcp_reuse_connection(transport);
2269 clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
2249 2270
2250 if (abort_and_exit) 2271 if (abort_and_exit)
2251 goto out_eagain; 2272 goto out_eagain;
@@ -2276,7 +2297,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2276 case -EINPROGRESS: 2297 case -EINPROGRESS:
2277 case -EALREADY: 2298 case -EALREADY:
2278 xprt_clear_connecting(xprt); 2299 xprt_clear_connecting(xprt);
2279 current->flags &= ~PF_FSTRANS;
2280 return; 2300 return;
2281 case -EINVAL: 2301 case -EINVAL:
2282 /* Happens, for instance, if the user specified a link 2302 /* Happens, for instance, if the user specified a link
@@ -2294,7 +2314,6 @@ out_eagain:
2294out: 2314out:
2295 xprt_clear_connecting(xprt); 2315 xprt_clear_connecting(xprt);
2296 xprt_wake_pending_tasks(xprt, status); 2316 xprt_wake_pending_tasks(xprt, status);
2297 current->flags &= ~PF_FSTRANS;
2298} 2317}
2299 2318
2300/** 2319/**