aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/rfcomm/tty.c6
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/core/sock.c8
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_output.c73
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/irda/ircomm/ircomm_tty.c256
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c6
-rw-r--r--net/sunrpc/Kconfig22
-rw-r--r--net/sunrpc/clnt.c48
-rw-r--r--net/sunrpc/rpcb_clnt.c103
-rw-r--r--net/sunrpc/svc.c158
-rw-r--r--net/sunrpc/svc_xprt.c31
-rw-r--r--net/sunrpc/svcsock.c40
-rw-r--r--net/sunrpc/xprt.c89
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c26
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c8
-rw-r--r--net/sunrpc/xprtsock.c363
32 files changed, 912 insertions, 668 deletions
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index abdc703a11d2..cab71ea2796d 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty)
1093 } 1093 }
1094} 1094}
1095 1095
1096static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused)
1097{
1098 return 0;
1099}
1100
1101static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) 1096static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp)
1102{ 1097{
1103 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; 1098 struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data;
@@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = {
1156 .send_xchar = rfcomm_tty_send_xchar, 1151 .send_xchar = rfcomm_tty_send_xchar,
1157 .hangup = rfcomm_tty_hangup, 1152 .hangup = rfcomm_tty_hangup,
1158 .wait_until_sent = rfcomm_tty_wait_until_sent, 1153 .wait_until_sent = rfcomm_tty_wait_until_sent,
1159 .read_proc = rfcomm_tty_read_proc,
1160 .tiocmget = rfcomm_tty_tiocmget, 1154 .tiocmget = rfcomm_tty_tiocmget,
1161 .tiocmset = rfcomm_tty_tiocmset, 1155 .tiocmset = rfcomm_tty_tiocmset,
1162}; 1156};
diff --git a/net/core/dev.c b/net/core/dev.c
index 52fea5b28ca6..91d792d17e09 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2472,8 +2472,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2472 return GRO_NORMAL; 2472 return GRO_NORMAL;
2473 2473
2474 for (p = napi->gro_list; p; p = p->next) { 2474 for (p = napi->gro_list; p; p = p->next) {
2475 NAPI_GRO_CB(p)->same_flow = !compare_ether_header( 2475 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2476 skb_mac_header(p), skb_gro_mac_header(skb)); 2476 && !compare_ether_header(skb_mac_header(p),
2477 skb_gro_mac_header(skb));
2477 NAPI_GRO_CB(p)->flush = 0; 2478 NAPI_GRO_CB(p)->flush = 0;
2478 } 2479 }
2479 2480
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 244ca56dffac..d9d5160610d5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
261 ret = 0; 261 ret = 0;
262 262
263err_out: 263err_out:
264 if (rule_buf) 264 kfree(rule_buf);
265 kfree(rule_buf);
266 265
267 return ret; 266 return ret;
268} 267}
diff --git a/net/core/sock.c b/net/core/sock.c
index 0620046e4eba..7dbf3ffb35cc 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk)
1677{ 1677{
1678 read_lock(&sk->sk_callback_lock); 1678 read_lock(&sk->sk_callback_lock);
1679 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1679 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1680 wake_up_interruptible(sk->sk_sleep); 1680 wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
1681 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); 1681 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
1682 read_unlock(&sk->sk_callback_lock); 1682 read_unlock(&sk->sk_callback_lock);
1683} 1683}
@@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len)
1686{ 1686{
1687 read_lock(&sk->sk_callback_lock); 1687 read_lock(&sk->sk_callback_lock);
1688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1688 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1689 wake_up_interruptible_sync(sk->sk_sleep); 1689 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
1690 POLLRDNORM | POLLRDBAND);
1690 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); 1691 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
1691 read_unlock(&sk->sk_callback_lock); 1692 read_unlock(&sk->sk_callback_lock);
1692} 1693}
@@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk)
1700 */ 1701 */
1701 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { 1702 if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
1702 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) 1703 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
1703 wake_up_interruptible_sync(sk->sk_sleep); 1704 wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
1705 POLLWRNORM | POLLWRBAND);
1704 1706
1705 /* Should agree with poll, otherwise some programs break */ 1707 /* Should agree with poll, otherwise some programs break */
1706 if (sock_writeable(sk)) 1708 if (sock_writeable(sk))
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 35c5f6a5cb7c..5ba533d234db 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,7 +253,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
253 indev = in ? in->name : nulldevname; 253 indev = in ? in->name : nulldevname;
254 outdev = out ? out->name : nulldevname; 254 outdev = out ? out->name : nulldevname;
255 255
256 rcu_read_lock(); 256 rcu_read_lock_bh();
257 private = rcu_dereference(table->private); 257 private = rcu_dereference(table->private);
258 table_base = rcu_dereference(private->entries[smp_processor_id()]); 258 table_base = rcu_dereference(private->entries[smp_processor_id()]);
259 259
@@ -329,7 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
329 } 329 }
330 } while (!hotdrop); 330 } while (!hotdrop);
331 331
332 rcu_read_unlock(); 332 rcu_read_unlock_bh();
333 333
334 if (hotdrop) 334 if (hotdrop)
335 return NF_DROP; 335 return NF_DROP;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 82ee7c9049ff..810c0b62c7d4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -339,7 +339,7 @@ ipt_do_table(struct sk_buff *skb,
339 339
340 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 340 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
341 341
342 rcu_read_lock(); 342 rcu_read_lock_bh();
343 private = rcu_dereference(table->private); 343 private = rcu_dereference(table->private);
344 table_base = rcu_dereference(private->entries[smp_processor_id()]); 344 table_base = rcu_dereference(private->entries[smp_processor_id()]);
345 345
@@ -437,7 +437,7 @@ ipt_do_table(struct sk_buff *skb,
437 } 437 }
438 } while (!hotdrop); 438 } while (!hotdrop);
439 439
440 rcu_read_unlock(); 440 rcu_read_unlock_bh();
441 441
442#ifdef DEBUG_ALLOW_ALL 442#ifdef DEBUG_ALLOW_ALL
443 return NF_ACCEPT; 443 return NF_ACCEPT;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2451aeb5ac23..fafbec8b073e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1081,8 +1081,7 @@ out_err:
1081 * this, no blocking and very strange errors 8) 1081 * this, no blocking and very strange errors 8)
1082 */ 1082 */
1083 1083
1084static int tcp_recv_urg(struct sock *sk, long timeo, 1084static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1085 struct msghdr *msg, int len, int flags)
1086{ 1085{
1087 struct tcp_sock *tp = tcp_sk(sk); 1086 struct tcp_sock *tp = tcp_sk(sk);
1088 1087
@@ -1697,7 +1696,7 @@ out:
1697 return err; 1696 return err;
1698 1697
1699recv_urg: 1698recv_urg:
1700 err = tcp_recv_urg(sk, timeo, msg, len, flags); 1699 err = tcp_recv_urg(sk, msg, len, flags);
1701 goto out; 1700 goto out;
1702} 1701}
1703 1702
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c1f259d2d33b..53300fa2359f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
754 tp->fackets_out -= decr; 754 tp->fackets_out -= decr;
755} 755}
756 756
757/* Pcount in the middle of the write queue got changed, we need to do various
758 * tweaks to fix counters
759 */
760static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
761{
762 struct tcp_sock *tp = tcp_sk(sk);
763
764 tp->packets_out -= decr;
765
766 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
767 tp->sacked_out -= decr;
768 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
769 tp->retrans_out -= decr;
770 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
771 tp->lost_out -= decr;
772
773 /* Reno case is special. Sigh... */
774 if (tcp_is_reno(tp) && decr > 0)
775 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
776
777 tcp_adjust_fackets_out(sk, skb, decr);
778
779 if (tp->lost_skb_hint &&
780 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
781 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
782 tp->lost_cnt_hint -= decr;
783
784 tcp_verify_left_out(tp);
785}
786
757/* Function to create two new TCP segments. Shrinks the given segment 787/* Function to create two new TCP segments. Shrinks the given segment
758 * to the specified size and appends a new segment with the rest of the 788 * to the specified size and appends a new segment with the rest of the
759 * packet to the list. This won't be called frequently, I hope. 789 * packet to the list. This won't be called frequently, I hope.
@@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
836 int diff = old_factor - tcp_skb_pcount(skb) - 866 int diff = old_factor - tcp_skb_pcount(skb) -
837 tcp_skb_pcount(buff); 867 tcp_skb_pcount(buff);
838 868
839 tp->packets_out -= diff; 869 if (diff)
840 870 tcp_adjust_pcount(sk, skb, diff);
841 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
842 tp->sacked_out -= diff;
843 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
844 tp->retrans_out -= diff;
845
846 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
847 tp->lost_out -= diff;
848
849 /* Adjust Reno SACK estimate. */
850 if (tcp_is_reno(tp) && diff > 0) {
851 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
852 tcp_verify_left_out(tp);
853 }
854 tcp_adjust_fackets_out(sk, skb, diff);
855
856 if (tp->lost_skb_hint &&
857 before(TCP_SKB_CB(skb)->seq,
858 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
859 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
860 tp->lost_cnt_hint -= diff;
861 } 871 }
862 872
863 /* Link BUFF into the send queue. */ 873 /* Link BUFF into the send queue. */
@@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1768 * packet counting does not break. 1778 * packet counting does not break.
1769 */ 1779 */
1770 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; 1780 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1771 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1772 tp->retrans_out -= tcp_skb_pcount(next_skb);
1773 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1774 tp->lost_out -= tcp_skb_pcount(next_skb);
1775 /* Reno case is special. Sigh... */
1776 if (tcp_is_reno(tp) && tp->sacked_out)
1777 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1778
1779 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1780 tp->packets_out -= tcp_skb_pcount(next_skb);
1781 1781
1782 /* changed transmit queue under us so clear hints */ 1782 /* changed transmit queue under us so clear hints */
1783 tcp_clear_retrans_hints_partial(tp); 1783 tcp_clear_retrans_hints_partial(tp);
1784 if (next_skb == tp->retransmit_skb_hint) 1784 if (next_skb == tp->retransmit_skb_hint)
1785 tp->retransmit_skb_hint = skb; 1785 tp->retransmit_skb_hint = skb;
1786 1786
1787 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
1788
1787 sk_wmem_free_skb(sk, next_skb); 1789 sk_wmem_free_skb(sk, next_skb);
1788} 1790}
1789 1791
@@ -1891,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1891 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1893 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1892 return -ENOMEM; /* We'll try again later. */ 1894 return -ENOMEM; /* We'll try again later. */
1893 } else { 1895 } else {
1894 tcp_init_tso_segs(sk, skb, cur_mss); 1896 int oldpcount = tcp_skb_pcount(skb);
1897
1898 if (unlikely(oldpcount > 1)) {
1899 tcp_init_tso_segs(sk, skb, cur_mss);
1900 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
1901 }
1895 } 1902 }
1896 1903
1897 tcp_retrans_try_collapse(sk, skb, cur_mss); 1904 tcp_retrans_try_collapse(sk, skb, cur_mss);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e89cfa3a8f25..dfed176aed37 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,7 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
365 365
366 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 366 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
367 367
368 rcu_read_lock(); 368 rcu_read_lock_bh();
369 private = rcu_dereference(table->private); 369 private = rcu_dereference(table->private);
370 table_base = rcu_dereference(private->entries[smp_processor_id()]); 370 table_base = rcu_dereference(private->entries[smp_processor_id()]);
371 371
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
466#ifdef CONFIG_NETFILTER_DEBUG 466#ifdef CONFIG_NETFILTER_DEBUG
467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
468#endif 468#endif
469 rcu_read_unlock(); 469 rcu_read_unlock_bh();
470 470
471#ifdef DEBUG_ALLOW_ALL 471#ifdef DEBUG_ALLOW_ALL
472 return NF_ACCEPT; 472 return NF_ACCEPT;
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 086d5ef098fd..811984d9324b 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/sched.h> 36#include <linux/sched.h>
37#include <linux/seq_file.h>
37#include <linux/termios.h> 38#include <linux/termios.h>
38#include <linux/tty.h> 39#include <linux/tty.h>
39#include <linux/interrupt.h> 40#include <linux/interrupt.h>
@@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap,
72static void ircomm_tty_flow_indication(void *instance, void *sap, 73static void ircomm_tty_flow_indication(void *instance, void *sap,
73 LOCAL_FLOW cmd); 74 LOCAL_FLOW cmd);
74#ifdef CONFIG_PROC_FS 75#ifdef CONFIG_PROC_FS
75static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, 76static const struct file_operations ircomm_tty_proc_fops;
76 int *eof, void *unused);
77#endif /* CONFIG_PROC_FS */ 77#endif /* CONFIG_PROC_FS */
78static struct tty_driver *driver; 78static struct tty_driver *driver;
79 79
@@ -98,7 +98,7 @@ static const struct tty_operations ops = {
98 .hangup = ircomm_tty_hangup, 98 .hangup = ircomm_tty_hangup,
99 .wait_until_sent = ircomm_tty_wait_until_sent, 99 .wait_until_sent = ircomm_tty_wait_until_sent,
100#ifdef CONFIG_PROC_FS 100#ifdef CONFIG_PROC_FS
101 .read_proc = ircomm_tty_read_proc, 101 .proc_fops = &ircomm_tty_proc_fops,
102#endif /* CONFIG_PROC_FS */ 102#endif /* CONFIG_PROC_FS */
103}; 103};
104 104
@@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap,
1245} 1245}
1246 1246
1247#ifdef CONFIG_PROC_FS 1247#ifdef CONFIG_PROC_FS
1248static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) 1248static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m)
1249{ 1249{
1250 int ret=0; 1250 char sep;
1251 1251
1252 ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]); 1252 seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]);
1253 1253
1254 ret += sprintf(buf+ret, "Service type: "); 1254 seq_puts(m, "Service type: ");
1255 if (self->service_type & IRCOMM_9_WIRE) 1255 if (self->service_type & IRCOMM_9_WIRE)
1256 ret += sprintf(buf+ret, "9_WIRE"); 1256 seq_puts(m, "9_WIRE");
1257 else if (self->service_type & IRCOMM_3_WIRE) 1257 else if (self->service_type & IRCOMM_3_WIRE)
1258 ret += sprintf(buf+ret, "3_WIRE"); 1258 seq_puts(m, "3_WIRE");
1259 else if (self->service_type & IRCOMM_3_WIRE_RAW) 1259 else if (self->service_type & IRCOMM_3_WIRE_RAW)
1260 ret += sprintf(buf+ret, "3_WIRE_RAW"); 1260 seq_puts(m, "3_WIRE_RAW");
1261 else 1261 else
1262 ret += sprintf(buf+ret, "No common service type!\n"); 1262 seq_puts(m, "No common service type!\n");
1263 ret += sprintf(buf+ret, "\n"); 1263 seq_putc(m, '\n');
1264 1264
1265 ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name); 1265 seq_printf(m, "Port name: %s\n", self->settings.port_name);
1266 1266
1267 ret += sprintf(buf+ret, "DTE status: "); 1267 seq_printf(m, "DTE status:");
1268 if (self->settings.dte & IRCOMM_RTS) 1268 sep = ' ';
1269 ret += sprintf(buf+ret, "RTS|"); 1269 if (self->settings.dte & IRCOMM_RTS) {
1270 if (self->settings.dte & IRCOMM_DTR) 1270 seq_printf(m, "%cRTS", sep);
1271 ret += sprintf(buf+ret, "DTR|"); 1271 sep = '|';
1272 if (self->settings.dte) 1272 }
1273 ret--; /* remove the last | */ 1273 if (self->settings.dte & IRCOMM_DTR) {
1274 ret += sprintf(buf+ret, "\n"); 1274 seq_printf(m, "%cDTR", sep);
1275 1275 sep = '|';
1276 ret += sprintf(buf+ret, "DCE status: "); 1276 }
1277 if (self->settings.dce & IRCOMM_CTS) 1277 seq_putc(m, '\n');
1278 ret += sprintf(buf+ret, "CTS|"); 1278
1279 if (self->settings.dce & IRCOMM_DSR) 1279 seq_puts(m, "DCE status:");
1280 ret += sprintf(buf+ret, "DSR|"); 1280 sep = ' ';
1281 if (self->settings.dce & IRCOMM_CD) 1281 if (self->settings.dce & IRCOMM_CTS) {
1282 ret += sprintf(buf+ret, "CD|"); 1282 seq_printf(m, "%cCTS", sep);
1283 if (self->settings.dce & IRCOMM_RI) 1283 sep = '|';
1284 ret += sprintf(buf+ret, "RI|"); 1284 }
1285 if (self->settings.dce) 1285 if (self->settings.dce & IRCOMM_DSR) {
1286 ret--; /* remove the last | */ 1286 seq_printf(m, "%cDSR", sep);
1287 ret += sprintf(buf+ret, "\n"); 1287 sep = '|';
1288 1288 }
1289 ret += sprintf(buf+ret, "Configuration: "); 1289 if (self->settings.dce & IRCOMM_CD) {
1290 seq_printf(m, "%cCD", sep);
1291 sep = '|';
1292 }
1293 if (self->settings.dce & IRCOMM_RI) {
1294 seq_printf(m, "%cRI", sep);
1295 sep = '|';
1296 }
1297 seq_putc(m, '\n');
1298
1299 seq_puts(m, "Configuration: ");
1290 if (!self->settings.null_modem) 1300 if (!self->settings.null_modem)
1291 ret += sprintf(buf+ret, "DTE <-> DCE\n"); 1301 seq_puts(m, "DTE <-> DCE\n");
1292 else 1302 else
1293 ret += sprintf(buf+ret, 1303 seq_puts(m, "DTE <-> DTE (null modem emulation)\n");
1294 "DTE <-> DTE (null modem emulation)\n"); 1304
1295 1305 seq_printf(m, "Data rate: %d\n", self->settings.data_rate);
1296 ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate); 1306
1297 1307 seq_puts(m, "Flow control:");
1298 ret += sprintf(buf+ret, "Flow control: "); 1308 sep = ' ';
1299 if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) 1309 if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) {
1300 ret += sprintf(buf+ret, "XON_XOFF_IN|"); 1310 seq_printf(m, "%cXON_XOFF_IN", sep);
1301 if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) 1311 sep = '|';
1302 ret += sprintf(buf+ret, "XON_XOFF_OUT|"); 1312 }
1303 if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) 1313 if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) {
1304 ret += sprintf(buf+ret, "RTS_CTS_IN|"); 1314 seq_printf(m, "%cXON_XOFF_OUT", sep);
1305 if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) 1315 sep = '|';
1306 ret += sprintf(buf+ret, "RTS_CTS_OUT|"); 1316 }
1307 if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) 1317 if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) {
1308 ret += sprintf(buf+ret, "DSR_DTR_IN|"); 1318 seq_printf(m, "%cRTS_CTS_IN", sep);
1309 if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) 1319 sep = '|';
1310 ret += sprintf(buf+ret, "DSR_DTR_OUT|"); 1320 }
1311 if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) 1321 if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) {
1312 ret += sprintf(buf+ret, "ENQ_ACK_IN|"); 1322 seq_printf(m, "%cRTS_CTS_OUT", sep);
1313 if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) 1323 sep = '|';
1314 ret += sprintf(buf+ret, "ENQ_ACK_OUT|"); 1324 }
1315 if (self->settings.flow_control) 1325 if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) {
1316 ret--; /* remove the last | */ 1326 seq_printf(m, "%cDSR_DTR_IN", sep);
1317 ret += sprintf(buf+ret, "\n"); 1327 sep = '|';
1318 1328 }
1319 ret += sprintf(buf+ret, "Flags: "); 1329 if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) {
1320 if (self->flags & ASYNC_CTS_FLOW) 1330 seq_printf(m, "%cDSR_DTR_OUT", sep);
1321 ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|"); 1331 sep = '|';
1322 if (self->flags & ASYNC_CHECK_CD) 1332 }
1323 ret += sprintf(buf+ret, "ASYNC_CHECK_CD|"); 1333 if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) {
1324 if (self->flags & ASYNC_INITIALIZED) 1334 seq_printf(m, "%cENQ_ACK_IN", sep);
1325 ret += sprintf(buf+ret, "ASYNC_INITIALIZED|"); 1335 sep = '|';
1326 if (self->flags & ASYNC_LOW_LATENCY) 1336 }
1327 ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|"); 1337 if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) {
1328 if (self->flags & ASYNC_CLOSING) 1338 seq_printf(m, "%cENQ_ACK_OUT", sep);
1329 ret += sprintf(buf+ret, "ASYNC_CLOSING|"); 1339 sep = '|';
1330 if (self->flags & ASYNC_NORMAL_ACTIVE) 1340 }
1331 ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|"); 1341 seq_putc(m, '\n');
1332 if (self->flags) 1342
1333 ret--; /* remove the last | */ 1343 seq_puts(m, "Flags:");
1334 ret += sprintf(buf+ret, "\n"); 1344 sep = ' ';
1335 1345 if (self->flags & ASYNC_CTS_FLOW) {
1336 ret += sprintf(buf+ret, "Role: %s\n", self->client ? 1346 seq_printf(m, "%cASYNC_CTS_FLOW", sep);
1337 "client" : "server"); 1347 sep = '|';
1338 ret += sprintf(buf+ret, "Open count: %d\n", self->open_count); 1348 }
1339 ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size); 1349 if (self->flags & ASYNC_CHECK_CD) {
1340 ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size); 1350 seq_printf(m, "%cASYNC_CHECK_CD", sep);
1351 sep = '|';
1352 }
1353 if (self->flags & ASYNC_INITIALIZED) {
1354 seq_printf(m, "%cASYNC_INITIALIZED", sep);
1355 sep = '|';
1356 }
1357 if (self->flags & ASYNC_LOW_LATENCY) {
1358 seq_printf(m, "%cASYNC_LOW_LATENCY", sep);
1359 sep = '|';
1360 }
1361 if (self->flags & ASYNC_CLOSING) {
1362 seq_printf(m, "%cASYNC_CLOSING", sep);
1363 sep = '|';
1364 }
1365 if (self->flags & ASYNC_NORMAL_ACTIVE) {
1366 seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep);
1367 sep = '|';
1368 }
1369 seq_putc(m, '\n');
1370
1371 seq_printf(m, "Role: %s\n", self->client ? "client" : "server");
1372 seq_printf(m, "Open count: %d\n", self->open_count);
1373 seq_printf(m, "Max data size: %d\n", self->max_data_size);
1374 seq_printf(m, "Max header size: %d\n", self->max_header_size);
1341 1375
1342 if (self->tty) 1376 if (self->tty)
1343 ret += sprintf(buf+ret, "Hardware: %s\n", 1377 seq_printf(m, "Hardware: %s\n",
1344 self->tty->hw_stopped ? "Stopped" : "Running"); 1378 self->tty->hw_stopped ? "Stopped" : "Running");
1345
1346 ret += sprintf(buf+ret, "\n");
1347 return ret;
1348} 1379}
1349 1380
1350 1381static int ircomm_tty_proc_show(struct seq_file *m, void *v)
1351/*
1352 * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused)
1353 *
1354 *
1355 *
1356 */
1357static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len,
1358 int *eof, void *unused)
1359{ 1382{
1360 struct ircomm_tty_cb *self; 1383 struct ircomm_tty_cb *self;
1361 int count = 0, l;
1362 off_t begin = 0;
1363 unsigned long flags; 1384 unsigned long flags;
1364 1385
1365 spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); 1386 spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags);
1366 1387
1367 self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); 1388 self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty);
1368 while ((self != NULL) && (count < 4000)) { 1389 while (self != NULL) {
1369 if (self->magic != IRCOMM_TTY_MAGIC) 1390 if (self->magic != IRCOMM_TTY_MAGIC)
1370 break; 1391 break;
1371 1392
1372 l = ircomm_tty_line_info(self, buf + count); 1393 ircomm_tty_line_info(self, m);
1373 count += l;
1374 if (count+begin > offset+len)
1375 goto done;
1376 if (count+begin < offset) {
1377 begin += count;
1378 count = 0;
1379 }
1380
1381 self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); 1394 self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty);
1382 } 1395 }
1383 *eof = 1;
1384done:
1385 spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); 1396 spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags);
1397 return 0;
1398}
1386 1399
1387 if (offset >= count+begin) 1400static int ircomm_tty_proc_open(struct inode *inode, struct file *file)
1388 return 0; 1401{
1389 *start = buf + (offset-begin); 1402 return single_open(file, ircomm_tty_proc_show, NULL);
1390 return ((len < begin+count-offset) ? len : begin+count-offset);
1391} 1403}
1404
1405static const struct file_operations ircomm_tty_proc_fops = {
1406 .owner = THIS_MODULE,
1407 .open = ircomm_tty_proc_open,
1408 .read = seq_read,
1409 .llseek = seq_lseek,
1410 .release = single_release,
1411};
1392#endif /* CONFIG_PROC_FS */ 1412#endif /* CONFIG_PROC_FS */
1393 1413
1394MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); 1414MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>");
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 06a7b798d9a7..4933b380985e 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51 51
52struct list_head rds_ib_devices; 52struct list_head rds_ib_devices;
53 53
54/* NOTE: if also grabbing ibdev lock, grab this first */
54DEFINE_SPINLOCK(ib_nodev_conns_lock); 55DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns); 56LIST_HEAD(ib_nodev_conns);
56 57
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
137 kfree(i_ipaddr); 138 kfree(i_ipaddr);
138 } 139 }
139 140
140 rds_ib_remove_conns(rds_ibdev); 141 rds_ib_destroy_conns(rds_ibdev);
141 142
142 if (rds_ibdev->mr_pool) 143 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 144 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
249void rds_ib_exit(void) 250void rds_ib_exit(void)
250{ 251{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 252 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns(); 253 rds_ib_destroy_nodev_conns();
253 ib_unregister_client(&rds_ib_client); 254 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit(); 255 rds_ib_sysctl_exit();
255 rds_ib_recv_exit(); 256 rds_ib_recv_exit();
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8be563a1363a..069206cae733 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -108,7 +108,12 @@ struct rds_ib_connection {
108 108
109 /* sending acks */ 109 /* sending acks */
110 unsigned long i_ack_flags; 110 unsigned long i_ack_flags;
111#ifdef KERNEL_HAS_ATOMIC64
112 atomic64_t i_ack_next; /* next ACK to send */
113#else
114 spinlock_t i_ack_lock; /* protect i_ack_next */
111 u64 i_ack_next; /* next ACK to send */ 115 u64 i_ack_next; /* next ACK to send */
116#endif
112 struct rds_header *i_ack; 117 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr; 118 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge; 119 struct ib_sge i_ack_sge;
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
267 272
268/* ib_rdma.c */ 273/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 274int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 275void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void); 276void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); 277void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
278static inline void rds_ib_destroy_nodev_conns(void)
279{
280 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
281}
282static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
283{
284 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
285}
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 286struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 287void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 288void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
355 return &sge[1]; 368 return &sge[1];
356} 369}
357 370
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif 371#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0532237bd128..f8e40e1a6038 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); 126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err) 127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn); 129 rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132 130
133 /* If the peer gave us the last packet it saw, process this as if 131 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */ 132 * we had received a regular ACK. */
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
616 /* 614 /*
617 * Move connection back to the nodev list. 615 * Move connection back to the nodev list.
618 */ 616 */
619 if (ic->rds_ibdev) { 617 if (ic->rds_ibdev)
620 618 rds_ib_remove_conn(ic->rds_ibdev, conn);
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631 619
632 ic->i_cm_id = NULL; 620 ic->i_cm_id = NULL;
633 ic->i_pd = NULL; 621 ic->i_pd = NULL;
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
648 636
649 /* Clear the ACK state */ 637 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 638 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0); 639#ifdef KERNEL_HAS_ATOMIC64
640 atomic64_set(&ic->i_ack_next, 0);
641#else
642 ic->i_ack_next = 0;
643#endif
652 ic->i_ack_recv = 0; 644 ic->i_ack_recv = 0;
653 645
654 /* Clear flow control state */ 646 /* Clear flow control state */
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
681 673
682 INIT_LIST_HEAD(&ic->ib_node); 674 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex); 675 mutex_init(&ic->i_recv_mutex);
676#ifndef KERNEL_HAS_ATOMIC64
677 spin_lock_init(&ic->i_ack_lock);
678#endif
684 679
685 /* 680 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they 681 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
701 return 0; 696 return 0;
702} 697}
703 698
699/*
700 * Free a connection. Connection must be shut down and not set for reconnect.
701 */
704void rds_ib_conn_free(void *arg) 702void rds_ib_conn_free(void *arg)
705{ 703{
706 struct rds_ib_connection *ic = arg; 704 struct rds_ib_connection *ic = arg;
705 spinlock_t *lock_ptr;
706
707 rdsdebug("ic %p\n", ic); 707 rdsdebug("ic %p\n", ic);
708
709 /*
710 * Conn is either on a dev's list or on the nodev list.
711 * A race with shutdown() or connect() would cause problems
712 * (since rds_ibdev would change) but that should never happen.
713 */
714 lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
715
716 spin_lock_irq(lock_ptr);
708 list_del(&ic->ib_node); 717 list_del(&ic->ib_node);
718 spin_unlock_irq(lock_ptr);
719
709 kfree(ic); 720 kfree(ic);
710} 721}
711 722
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 69a6289ed672..81033af93020 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140} 140}
141 141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 142void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{ 143{
144 struct rds_ib_connection *ic = conn->c_transport_data; 144 struct rds_ib_connection *ic = conn->c_transport_data;
145 145
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
148 BUG_ON(list_empty(&ib_nodev_conns)); 148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node)); 149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node); 150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152 151
153 spin_lock_irq(&rds_ibdev->spinlock); 152 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 153 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 154 spin_unlock_irq(&rds_ibdev->spinlock);
155 spin_unlock_irq(&ib_nodev_conns_lock);
156 156
157 ic->rds_ibdev = rds_ibdev; 157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160} 158}
161 159
162void rds_ib_remove_nodev_conns(void) 160void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
163{ 161{
164 struct rds_ib_connection *ic, *_ic; 162 struct rds_ib_connection *ic = conn->c_transport_data;
165 LIST_HEAD(tmp_list);
166 163
167 /* avoid calling conn_destroy with irqs off */ 164 /* place conn on nodev_conns_list */
168 spin_lock_irq(&ib_nodev_conns_lock); 165 spin_lock(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172 166
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 167 spin_lock_irq(&rds_ibdev->spinlock);
174 if (ic->conn->c_passive) 168 BUG_ON(list_empty(&ic->ib_node));
175 rds_conn_destroy(ic->conn->c_passive); 169 list_del(&ic->ib_node);
176 rds_conn_destroy(ic->conn); 170 spin_unlock_irq(&rds_ibdev->spinlock);
177 } 171
172 list_add_tail(&ic->ib_node, &ib_nodev_conns);
173
174 spin_unlock(&ib_nodev_conns_lock);
175
176 ic->rds_ibdev = NULL;
178} 177}
179 178
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) 179void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
181{ 180{
182 struct rds_ib_connection *ic, *_ic; 181 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 182 LIST_HEAD(tmp_list);
184 183
185 /* avoid calling conn_destroy with irqs off */ 184 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock); 185 spin_lock_irq(list_lock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list); 186 list_splice(list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list); 187 INIT_LIST_HEAD(list);
189 spin_unlock_irq(&rds_ibdev->spinlock); 188 spin_unlock_irq(list_lock);
190 189
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 190 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive) 191 if (ic->conn->c_passive)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 5061b5502162..36d931573ff4 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 399static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_ib_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 445static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 1b56905c4c08..b732efb5b634 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
51 51
52struct list_head rds_iw_devices; 52struct list_head rds_iw_devices;
53 53
54/* NOTE: if also grabbing iwdev lock, grab this first */
54DEFINE_SPINLOCK(iw_nodev_conns_lock); 55DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns); 56LIST_HEAD(iw_nodev_conns);
56 57
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
145 } 146 }
146 spin_unlock_irq(&rds_iwdev->spinlock); 147 spin_unlock_irq(&rds_iwdev->spinlock);
147 148
148 rds_iw_remove_conns(rds_iwdev); 149 rds_iw_destroy_conns(rds_iwdev);
149 150
150 if (rds_iwdev->mr_pool) 151 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); 152 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
258void rds_iw_exit(void) 259void rds_iw_exit(void)
259{ 260{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); 261 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns(); 262 rds_iw_destroy_nodev_conns();
262 ib_unregister_client(&rds_iw_client); 263 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit(); 264 rds_iw_sysctl_exit();
264 rds_iw_recv_exit(); 265 rds_iw_recv_exit();
diff --git a/net/rds/iw.h b/net/rds/iw.h
index 0ddda34f2a1c..b4fb27252895 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -131,7 +131,12 @@ struct rds_iw_connection {
131 131
132 /* sending acks */ 132 /* sending acks */
133 unsigned long i_ack_flags; 133 unsigned long i_ack_flags;
134#ifdef KERNEL_HAS_ATOMIC64
135 atomic64_t i_ack_next; /* next ACK to send */
136#else
137 spinlock_t i_ack_lock; /* protect i_ack_next */
134 u64 i_ack_next; /* next ACK to send */ 138 u64 i_ack_next; /* next ACK to send */
139#endif
135 struct rds_header *i_ack; 140 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr; 141 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge; 142 struct ib_sge i_ack_sge;
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
294 299
295/* ib_rdma.c */ 300/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 301int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); 302void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void); 303void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); 304void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
305static inline void rds_iw_destroy_nodev_conns(void)
306{
307 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
308}
309static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
310{
311 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
312}
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); 313struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); 314void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); 315void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
383 return &sge[1]; 396 return &sge[1];
384} 397}
385 398
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif 399#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 57ecb3d4b8a5..a416b0d492b1 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); 86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err) 87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); 88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn); 89 rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92 90
93 /* If the peer gave us the last packet it saw, process this as if 91 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */ 92 * we had received a regular ACK. */
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
637 * Move connection back to the nodev list. 635 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list. 636 * Remove cm_id from the device cm_id list.
639 */ 637 */
640 if (ic->rds_iwdev) { 638 if (ic->rds_iwdev)
641 639 rds_iw_remove_conn(ic->rds_iwdev, conn);
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653 640
654 rdma_destroy_id(ic->i_cm_id); 641 rdma_destroy_id(ic->i_cm_id);
655 642
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
672 659
673 /* Clear the ACK state */ 660 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 661 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0); 662#ifdef KERNEL_HAS_ATOMIC64
663 atomic64_set(&ic->i_ack_next, 0);
664#else
665 ic->i_ack_next = 0;
666#endif
676 ic->i_ack_recv = 0; 667 ic->i_ack_recv = 0;
677 668
678 /* Clear flow control state */ 669 /* Clear flow control state */
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
706 697
707 INIT_LIST_HEAD(&ic->iw_node); 698 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex); 699 mutex_init(&ic->i_recv_mutex);
700#ifndef KERNEL_HAS_ATOMIC64
701 spin_lock_init(&ic->i_ack_lock);
702#endif
709 703
710 /* 704 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they 705 * rds_iw_conn_shutdown() waits for these to be emptied so they
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
726 return 0; 720 return 0;
727} 721}
728 722
723/*
724 * Free a connection. Connection must be shut down and not set for reconnect.
725 */
729void rds_iw_conn_free(void *arg) 726void rds_iw_conn_free(void *arg)
730{ 727{
731 struct rds_iw_connection *ic = arg; 728 struct rds_iw_connection *ic = arg;
729 spinlock_t *lock_ptr;
730
732 rdsdebug("ic %p\n", ic); 731 rdsdebug("ic %p\n", ic);
732
733 /*
734 * Conn is either on a dev's list or on the nodev list.
735 * A race with shutdown() or connect() would cause problems
736 * (since rds_iwdev would change) but that should never happen.
737 */
738 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
739
740 spin_lock_irq(lock_ptr);
733 list_del(&ic->iw_node); 741 list_del(&ic->iw_node);
742 spin_unlock_irq(lock_ptr);
743
734 kfree(ic); 744 kfree(ic);
735} 745}
736 746
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 1c02a8f952d0..dcdb37da80f2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
196 return rds_iw_add_cm_id(rds_iwdev, cm_id); 196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197} 197}
198 198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) 199void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{ 200{
201 struct rds_iw_connection *ic = conn->c_transport_data; 201 struct rds_iw_connection *ic = conn->c_transport_data;
202 202
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
205 BUG_ON(list_empty(&iw_nodev_conns)); 205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock_irq(&rds_iwdev->spinlock);
212 spin_unlock_irq(&iw_nodev_conns_lock);
213 213
214 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217} 215}
218 216
219void rds_iw_remove_nodev_conns(void) 217void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
220{ 218{
221 struct rds_iw_connection *ic, *_ic; 219 struct rds_iw_connection *ic = conn->c_transport_data;
222 LIST_HEAD(tmp_list);
223 220
224 /* avoid calling conn_destroy with irqs off */ 221 /* place conn on nodev_conns_list */
225 spin_lock_irq(&iw_nodev_conns_lock); 222 spin_lock(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229 223
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 224 spin_lock_irq(&rds_iwdev->spinlock);
231 if (ic->conn->c_passive) 225 BUG_ON(list_empty(&ic->iw_node));
232 rds_conn_destroy(ic->conn->c_passive); 226 list_del(&ic->iw_node);
233 rds_conn_destroy(ic->conn); 227 spin_unlock_irq(&rds_iwdev->spinlock);
234 } 228
229 list_add_tail(&ic->iw_node, &iw_nodev_conns);
230
231 spin_unlock(&iw_nodev_conns_lock);
232
233 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
234 ic->rds_iwdev = NULL;
235} 235}
236 236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) 237void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
238{ 238{
239 struct rds_iw_connection *ic, *_ic; 239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list); 240 LIST_HEAD(tmp_list);
241 241
242 /* avoid calling conn_destroy with irqs off */ 242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock); 243 spin_lock_irq(list_lock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list); 244 list_splice(list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list); 245 INIT_LIST_HEAD(list);
246 spin_unlock_irq(&rds_iwdev->spinlock); 246 spin_unlock_irq(list_lock);
247 247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive) 249 if (ic->conn->c_passive)
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index a1931f0027a2..fde470fa50d5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, 399static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_iw_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) 445static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 060400704979..619f0a30a4e5 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -28,6 +28,10 @@
28 */ 28 */
29#define RDS_PORT 18634 29#define RDS_PORT 18634
30 30
31#ifdef ATOMIC64_INIT
32#define KERNEL_HAS_ATOMIC64
33#endif
34
31#ifdef DEBUG 35#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) 36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else 37#else
diff --git a/net/rds/send.c b/net/rds/send.c
index 1b37364656f0..104fe033203d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{ 615{
616 struct rds_message *rm, *tmp; 616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn; 617 struct rds_connection *conn;
618 unsigned long flags; 618 unsigned long flags, flags2;
619 LIST_HEAD(list); 619 LIST_HEAD(list);
620 int wake = 0; 620 int wake = 0;
621 621
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
651 list_for_each_entry(rm, &list, m_sock_item) { 651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that 652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */ 653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock); 654 spin_lock_irqsave(&rm->m_rs_lock, flags2);
655 rm->m_rs = NULL; 655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock); 656 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
657 657
658 /* 658 /*
659 * If we see this flag cleared then we're *sure* that someone 659 * If we see this flag cleared then we're *sure* that someone
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 5592883e1e4a..afd91c78ce8e 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA
17 17
18 If unsure, say N. 18 If unsure, say N.
19 19
20config SUNRPC_REGISTER_V4
21 bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)"
22 depends on SUNRPC && EXPERIMENTAL
23 default n
24 help
25 Sun added support for registering RPC services at an IPv6
26 address by creating two new versions of the rpcbind protocol
27 (RFC 1833).
28
29 This option enables support in the kernel RPC server for
30 registering kernel RPC services via version 4 of the rpcbind
31 protocol. If you enable this option, you must run a portmapper
32 daemon that supports rpcbind protocol version 4.
33
34 Serving NFS over IPv6 from knfsd (the kernel's NFS server)
35 requires that you enable this option and use a portmapper that
36 supports rpcbind version 4.
37
38 If unsure, say N to get traditional behavior (register kernel
39 RPC services using only rpcbind version 2). Distributions
40 using the legacy Linux portmapper daemon must say N here.
41
42config RPCSEC_GSS_KRB5 20config RPCSEC_GSS_KRB5
43 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" 21 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
44 depends on SUNRPC && EXPERIMENTAL 22 depends on SUNRPC && EXPERIMENTAL
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 836f15c0c4a3..5abab094441f 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task)
1032 dprint_status(task); 1032 dprint_status(task);
1033 1033
1034 task->tk_status = 0; 1034 task->tk_status = 0;
1035 if (status >= 0) { 1035 if (status >= 0 || status == -EAGAIN) {
1036 clnt->cl_stats->netreconn++; 1036 clnt->cl_stats->netreconn++;
1037 task->tk_action = call_transmit; 1037 task->tk_action = call_transmit;
1038 return; 1038 return;
1039 } 1039 }
1040 1040
1041 /* Something failed: remote service port may have changed */
1042 rpc_force_rebind(clnt);
1043
1044 switch (status) { 1041 switch (status) {
1045 case -ENOTCONN:
1046 case -EAGAIN:
1047 task->tk_action = call_bind;
1048 if (!RPC_IS_SOFT(task))
1049 return;
1050 /* if soft mounted, test if we've timed out */ 1042 /* if soft mounted, test if we've timed out */
1051 case -ETIMEDOUT: 1043 case -ETIMEDOUT:
1052 task->tk_action = call_timeout; 1044 task->tk_action = call_timeout;
1053 return; 1045 break;
1046 default:
1047 rpc_exit(task, -EIO);
1054 } 1048 }
1055 rpc_exit(task, -EIO);
1056} 1049}
1057 1050
1058/* 1051/*
@@ -1105,14 +1098,26 @@ static void
1105call_transmit_status(struct rpc_task *task) 1098call_transmit_status(struct rpc_task *task)
1106{ 1099{
1107 task->tk_action = call_status; 1100 task->tk_action = call_status;
1108 /* 1101 switch (task->tk_status) {
1109 * Special case: if we've been waiting on the socket's write_space() 1102 case -EAGAIN:
1110 * callback, then don't call xprt_end_transmit(). 1103 break;
1111 */ 1104 default:
1112 if (task->tk_status == -EAGAIN) 1105 xprt_end_transmit(task);
1113 return; 1106 /*
1114 xprt_end_transmit(task); 1107 * Special cases: if we've been waiting on the
1115 rpc_task_force_reencode(task); 1108 * socket's write_space() callback, or if the
1109 * socket just returned a connection error,
1110 * then hold onto the transport lock.
1111 */
1112 case -ECONNREFUSED:
1113 case -ECONNRESET:
1114 case -ENOTCONN:
1115 case -EHOSTDOWN:
1116 case -EHOSTUNREACH:
1117 case -ENETUNREACH:
1118 case -EPIPE:
1119 rpc_task_force_reencode(task);
1120 }
1116} 1121}
1117 1122
1118/* 1123/*
@@ -1152,9 +1157,12 @@ call_status(struct rpc_task *task)
1152 xprt_conditional_disconnect(task->tk_xprt, 1157 xprt_conditional_disconnect(task->tk_xprt,
1153 req->rq_connect_cookie); 1158 req->rq_connect_cookie);
1154 break; 1159 break;
1160 case -ECONNRESET:
1155 case -ECONNREFUSED: 1161 case -ECONNREFUSED:
1156 case -ENOTCONN:
1157 rpc_force_rebind(clnt); 1162 rpc_force_rebind(clnt);
1163 rpc_delay(task, 3*HZ);
1164 case -EPIPE:
1165 case -ENOTCONN:
1158 task->tk_action = call_bind; 1166 task->tk_action = call_bind;
1159 break; 1167 break;
1160 case -EAGAIN: 1168 case -EAGAIN:
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 03ae007641e4..beee6da33035 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -63,9 +63,16 @@ enum {
63 * r_owner 63 * r_owner
64 * 64 *
65 * The "owner" is allowed to unset a service in the rpcbind database. 65 * The "owner" is allowed to unset a service in the rpcbind database.
66 * We always use the following (arbitrary) fixed string. 66 *
67 * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a
68 * UID which it maps to a local user name via a password lookup.
69 * In all other cases it is ignored.
70 *
71 * For SET/UNSET requests, user space provides a value, even for
72 * network requests, and GETADDR uses an empty string. We follow
73 * those precedents here.
67 */ 74 */
68#define RPCB_OWNER_STRING "rpcb" 75#define RPCB_OWNER_STRING "0"
69#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) 76#define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING)
70 77
71static void rpcb_getport_done(struct rpc_task *, void *); 78static void rpcb_getport_done(struct rpc_task *, void *);
@@ -124,12 +131,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = {
124 .sin_port = htons(RPCBIND_PORT), 131 .sin_port = htons(RPCBIND_PORT),
125}; 132};
126 133
127static const struct sockaddr_in6 rpcb_in6addr_loopback = {
128 .sin6_family = AF_INET6,
129 .sin6_addr = IN6ADDR_LOOPBACK_INIT,
130 .sin6_port = htons(RPCBIND_PORT),
131};
132
133static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, 134static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr,
134 size_t addrlen, u32 version) 135 size_t addrlen, u32 version)
135{ 136{
@@ -176,9 +177,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
176 return rpc_create(&args); 177 return rpc_create(&args);
177} 178}
178 179
179static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, 180static int rpcb_register_call(const u32 version, struct rpc_message *msg)
180 u32 version, struct rpc_message *msg)
181{ 181{
182 struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback;
183 size_t addrlen = sizeof(rpcb_inaddr_loopback);
182 struct rpc_clnt *rpcb_clnt; 184 struct rpc_clnt *rpcb_clnt;
183 int result, error = 0; 185 int result, error = 0;
184 186
@@ -192,7 +194,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen,
192 error = PTR_ERR(rpcb_clnt); 194 error = PTR_ERR(rpcb_clnt);
193 195
194 if (error < 0) { 196 if (error < 0) {
195 printk(KERN_WARNING "RPC: failed to contact local rpcbind " 197 dprintk("RPC: failed to contact local rpcbind "
196 "server (errno %d).\n", -error); 198 "server (errno %d).\n", -error);
197 return error; 199 return error;
198 } 200 }
@@ -254,25 +256,23 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port)
254 if (port) 256 if (port)
255 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; 257 msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
256 258
257 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 259 return rpcb_register_call(RPCBVERS_2, &msg);
258 sizeof(rpcb_inaddr_loopback),
259 RPCBVERS_2, &msg);
260} 260}
261 261
262/* 262/*
263 * Fill in AF_INET family-specific arguments to register 263 * Fill in AF_INET family-specific arguments to register
264 */ 264 */
265static int rpcb_register_netid4(struct sockaddr_in *address_to_register, 265static int rpcb_register_inet4(const struct sockaddr *sap,
266 struct rpc_message *msg) 266 struct rpc_message *msg)
267{ 267{
268 const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
268 struct rpcbind_args *map = msg->rpc_argp; 269 struct rpcbind_args *map = msg->rpc_argp;
269 unsigned short port = ntohs(address_to_register->sin_port); 270 unsigned short port = ntohs(sin->sin_port);
270 char buf[32]; 271 char buf[32];
271 272
272 /* Construct AF_INET universal address */ 273 /* Construct AF_INET universal address */
273 snprintf(buf, sizeof(buf), "%pI4.%u.%u", 274 snprintf(buf, sizeof(buf), "%pI4.%u.%u",
274 &address_to_register->sin_addr.s_addr, 275 &sin->sin_addr.s_addr, port >> 8, port & 0xff);
275 port >> 8, port & 0xff);
276 map->r_addr = buf; 276 map->r_addr = buf;
277 277
278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 278 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -284,29 +284,27 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register,
284 if (port) 284 if (port)
285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 285 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
286 286
287 return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, 287 return rpcb_register_call(RPCBVERS_4, msg);
288 sizeof(rpcb_inaddr_loopback),
289 RPCBVERS_4, msg);
290} 288}
291 289
292/* 290/*
293 * Fill in AF_INET6 family-specific arguments to register 291 * Fill in AF_INET6 family-specific arguments to register
294 */ 292 */
295static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, 293static int rpcb_register_inet6(const struct sockaddr *sap,
296 struct rpc_message *msg) 294 struct rpc_message *msg)
297{ 295{
296 const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
298 struct rpcbind_args *map = msg->rpc_argp; 297 struct rpcbind_args *map = msg->rpc_argp;
299 unsigned short port = ntohs(address_to_register->sin6_port); 298 unsigned short port = ntohs(sin6->sin6_port);
300 char buf[64]; 299 char buf[64];
301 300
302 /* Construct AF_INET6 universal address */ 301 /* Construct AF_INET6 universal address */
303 if (ipv6_addr_any(&address_to_register->sin6_addr)) 302 if (ipv6_addr_any(&sin6->sin6_addr))
304 snprintf(buf, sizeof(buf), "::.%u.%u", 303 snprintf(buf, sizeof(buf), "::.%u.%u",
305 port >> 8, port & 0xff); 304 port >> 8, port & 0xff);
306 else 305 else
307 snprintf(buf, sizeof(buf), "%pI6.%u.%u", 306 snprintf(buf, sizeof(buf), "%pI6.%u.%u",
308 &address_to_register->sin6_addr, 307 &sin6->sin6_addr, port >> 8, port & 0xff);
309 port >> 8, port & 0xff);
310 map->r_addr = buf; 308 map->r_addr = buf;
311 309
312 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " 310 dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with "
@@ -318,9 +316,21 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
318 if (port) 316 if (port)
319 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; 317 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
320 318
321 return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, 319 return rpcb_register_call(RPCBVERS_4, msg);
322 sizeof(rpcb_in6addr_loopback), 320}
323 RPCBVERS_4, msg); 321
322static int rpcb_unregister_all_protofamilies(struct rpc_message *msg)
323{
324 struct rpcbind_args *map = msg->rpc_argp;
325
326 dprintk("RPC: unregistering [%u, %u, '%s'] with "
327 "local rpcbind\n",
328 map->r_prog, map->r_vers, map->r_netid);
329
330 map->r_addr = "";
331 msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
332
333 return rpcb_register_call(RPCBVERS_4, msg);
324} 334}
325 335
326/** 336/**
@@ -340,10 +350,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register,
340 * invoke this function once for each [program, version, address, 350 * invoke this function once for each [program, version, address,
341 * netid] tuple they wish to advertise. 351 * netid] tuple they wish to advertise.
342 * 352 *
343 * Callers may also unregister RPC services that are no longer 353 * Callers may also unregister RPC services that are registered at a
344 * available by setting the port number in the passed-in address 354 * specific address by setting the port number in @address to zero.
345 * to zero. Callers pass a netid of "" to unregister all 355 * They may unregister all registered protocol families at once for
346 * transport netids associated with [program, version, address]. 356 * a service by passing a NULL @address argument. If @netid is ""
357 * then all netids for [program, version, address] are unregistered.
347 * 358 *
348 * This function uses rpcbind protocol version 4 to contact the 359 * This function uses rpcbind protocol version 4 to contact the
349 * local rpcbind daemon. The local rpcbind daemon must support 360 * local rpcbind daemon. The local rpcbind daemon must support
@@ -378,13 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version,
378 .rpc_argp = &map, 389 .rpc_argp = &map,
379 }; 390 };
380 391
392 if (address == NULL)
393 return rpcb_unregister_all_protofamilies(&msg);
394
381 switch (address->sa_family) { 395 switch (address->sa_family) {
382 case AF_INET: 396 case AF_INET:
383 return rpcb_register_netid4((struct sockaddr_in *)address, 397 return rpcb_register_inet4(address, &msg);
384 &msg);
385 case AF_INET6: 398 case AF_INET6:
386 return rpcb_register_netid6((struct sockaddr_in6 *)address, 399 return rpcb_register_inet6(address, &msg);
387 &msg);
388 } 400 }
389 401
390 return -EAFNOSUPPORT; 402 return -EAFNOSUPPORT;
@@ -579,7 +591,7 @@ void rpcb_getport_async(struct rpc_task *task)
579 map->r_xprt = xprt_get(xprt); 591 map->r_xprt = xprt_get(xprt);
580 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); 592 map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
581 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); 593 map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
582 map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ 594 map->r_owner = "";
583 map->r_status = -EIO; 595 map->r_status = -EIO;
584 596
585 child = rpcb_call_async(rpcb_clnt, map, proc); 597 child = rpcb_call_async(rpcb_clnt, map, proc);
@@ -703,11 +715,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
703 *portp = 0; 715 *portp = 0;
704 addr_len = ntohl(*p++); 716 addr_len = ntohl(*p++);
705 717
718 if (addr_len == 0) {
719 dprintk("RPC: rpcb_decode_getaddr: "
720 "service is not registered\n");
721 return 0;
722 }
723
706 /* 724 /*
707 * Simple sanity check. The smallest possible universal 725 * Simple sanity check.
708 * address is an IPv4 address string containing 11 bytes.
709 */ 726 */
710 if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) 727 if (addr_len > RPCBIND_MAXUADDRLEN)
711 goto out_err; 728 goto out_err;
712 729
713 /* 730 /*
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bb507e2bb94d..9f2f2412a2f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu)
359 */ 359 */
360static struct svc_serv * 360static struct svc_serv *
361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools, 361__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
362 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 362 void (*shutdown)(struct svc_serv *serv))
363{ 363{
364 struct svc_serv *serv; 364 struct svc_serv *serv;
365 unsigned int vers; 365 unsigned int vers;
@@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
368 368
369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) 369 if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
370 return NULL; 370 return NULL;
371 serv->sv_family = family;
372 serv->sv_name = prog->pg_name; 371 serv->sv_name = prog->pg_name;
373 serv->sv_program = prog; 372 serv->sv_program = prog;
374 serv->sv_nrthreads = 1; 373 serv->sv_nrthreads = 1;
@@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
427 426
428struct svc_serv * 427struct svc_serv *
429svc_create(struct svc_program *prog, unsigned int bufsize, 428svc_create(struct svc_program *prog, unsigned int bufsize,
430 sa_family_t family, void (*shutdown)(struct svc_serv *serv)) 429 void (*shutdown)(struct svc_serv *serv))
431{ 430{
432 return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); 431 return __svc_create(prog, bufsize, /*npools*/1, shutdown);
433} 432}
434EXPORT_SYMBOL_GPL(svc_create); 433EXPORT_SYMBOL_GPL(svc_create);
435 434
436struct svc_serv * 435struct svc_serv *
437svc_create_pooled(struct svc_program *prog, unsigned int bufsize, 436svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
438 sa_family_t family, void (*shutdown)(struct svc_serv *serv), 437 void (*shutdown)(struct svc_serv *serv),
439 svc_thread_fn func, struct module *mod) 438 svc_thread_fn func, struct module *mod)
440{ 439{
441 struct svc_serv *serv; 440 struct svc_serv *serv;
442 unsigned int npools = svc_pool_map_get(); 441 unsigned int npools = svc_pool_map_get();
443 442
444 serv = __svc_create(prog, bufsize, npools, family, shutdown); 443 serv = __svc_create(prog, bufsize, npools, shutdown);
445 444
446 if (serv != NULL) { 445 if (serv != NULL) {
447 serv->sv_function = func; 446 serv->sv_function = func;
@@ -719,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp)
719} 718}
720EXPORT_SYMBOL_GPL(svc_exit_thread); 719EXPORT_SYMBOL_GPL(svc_exit_thread);
721 720
722#ifdef CONFIG_SUNRPC_REGISTER_V4
723
724/* 721/*
725 * Register an "inet" protocol family netid with the local 722 * Register an "inet" protocol family netid with the local
726 * rpcbind daemon via an rpcbind v4 SET request. 723 * rpcbind daemon via an rpcbind v4 SET request.
@@ -735,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
735 const unsigned short protocol, 732 const unsigned short protocol,
736 const unsigned short port) 733 const unsigned short port)
737{ 734{
738 struct sockaddr_in sin = { 735 const struct sockaddr_in sin = {
739 .sin_family = AF_INET, 736 .sin_family = AF_INET,
740 .sin_addr.s_addr = htonl(INADDR_ANY), 737 .sin_addr.s_addr = htonl(INADDR_ANY),
741 .sin_port = htons(port), 738 .sin_port = htons(port),
742 }; 739 };
743 char *netid; 740 const char *netid;
741 int error;
744 742
745 switch (protocol) { 743 switch (protocol) {
746 case IPPROTO_UDP: 744 case IPPROTO_UDP:
@@ -750,13 +748,23 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
750 netid = RPCBIND_NETID_TCP; 748 netid = RPCBIND_NETID_TCP;
751 break; 749 break;
752 default: 750 default:
753 return -EPROTONOSUPPORT; 751 return -ENOPROTOOPT;
754 } 752 }
755 753
756 return rpcb_v4_register(program, version, 754 error = rpcb_v4_register(program, version,
757 (struct sockaddr *)&sin, netid); 755 (const struct sockaddr *)&sin, netid);
756
757 /*
758 * User space didn't support rpcbind v4, so retry this
759 * registration request with the legacy rpcbind v2 protocol.
760 */
761 if (error == -EPROTONOSUPPORT)
762 error = rpcb_register(program, version, protocol, port);
763
764 return error;
758} 765}
759 766
767#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
760/* 768/*
761 * Register an "inet6" protocol family netid with the local 769 * Register an "inet6" protocol family netid with the local
762 * rpcbind daemon via an rpcbind v4 SET request. 770 * rpcbind daemon via an rpcbind v4 SET request.
@@ -771,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
771 const unsigned short protocol, 779 const unsigned short protocol,
772 const unsigned short port) 780 const unsigned short port)
773{ 781{
774 struct sockaddr_in6 sin6 = { 782 const struct sockaddr_in6 sin6 = {
775 .sin6_family = AF_INET6, 783 .sin6_family = AF_INET6,
776 .sin6_addr = IN6ADDR_ANY_INIT, 784 .sin6_addr = IN6ADDR_ANY_INIT,
777 .sin6_port = htons(port), 785 .sin6_port = htons(port),
778 }; 786 };
779 char *netid; 787 const char *netid;
788 int error;
780 789
781 switch (protocol) { 790 switch (protocol) {
782 case IPPROTO_UDP: 791 case IPPROTO_UDP:
@@ -786,12 +795,22 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
786 netid = RPCBIND_NETID_TCP6; 795 netid = RPCBIND_NETID_TCP6;
787 break; 796 break;
788 default: 797 default:
789 return -EPROTONOSUPPORT; 798 return -ENOPROTOOPT;
790 } 799 }
791 800
792 return rpcb_v4_register(program, version, 801 error = rpcb_v4_register(program, version,
793 (struct sockaddr *)&sin6, netid); 802 (const struct sockaddr *)&sin6, netid);
803
804 /*
805 * User space didn't support rpcbind version 4, so we won't
806 * use a PF_INET6 listener.
807 */
808 if (error == -EPROTONOSUPPORT)
809 error = -EAFNOSUPPORT;
810
811 return error;
794} 812}
813#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
795 814
796/* 815/*
797 * Register a kernel RPC service via rpcbind version 4. 816 * Register a kernel RPC service via rpcbind version 4.
@@ -799,69 +818,43 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
799 * Returns zero on success; a negative errno value is returned 818 * Returns zero on success; a negative errno value is returned
800 * if any error occurs. 819 * if any error occurs.
801 */ 820 */
802static int __svc_register(const u32 program, const u32 version, 821static int __svc_register(const char *progname,
803 const sa_family_t family, 822 const u32 program, const u32 version,
823 const int family,
804 const unsigned short protocol, 824 const unsigned short protocol,
805 const unsigned short port) 825 const unsigned short port)
806{ 826{
807 int error; 827 int error = -EAFNOSUPPORT;
808 828
809 switch (family) { 829 switch (family) {
810 case AF_INET: 830 case PF_INET:
811 return __svc_rpcb_register4(program, version, 831 error = __svc_rpcb_register4(program, version,
812 protocol, port); 832 protocol, port);
813 case AF_INET6: 833 break;
834#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
835 case PF_INET6:
814 error = __svc_rpcb_register6(program, version, 836 error = __svc_rpcb_register6(program, version,
815 protocol, port); 837 protocol, port);
816 if (error < 0) 838#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
817 return error;
818
819 /*
820 * Work around bug in some versions of Linux rpcbind
821 * which don't allow registration of both inet and
822 * inet6 netids.
823 *
824 * Error return ignored for now.
825 */
826 __svc_rpcb_register4(program, version,
827 protocol, port);
828 return 0;
829 } 839 }
830 840
831 return -EAFNOSUPPORT; 841 if (error < 0)
832} 842 printk(KERN_WARNING "svc: failed to register %sv%u RPC "
833 843 "service (errno %d).\n", progname, version, -error);
834#else /* CONFIG_SUNRPC_REGISTER_V4 */ 844 return error;
835
836/*
837 * Register a kernel RPC service via rpcbind version 2.
838 *
839 * Returns zero on success; a negative errno value is returned
840 * if any error occurs.
841 */
842static int __svc_register(const u32 program, const u32 version,
843 sa_family_t family,
844 const unsigned short protocol,
845 const unsigned short port)
846{
847 if (family != AF_INET)
848 return -EAFNOSUPPORT;
849
850 return rpcb_register(program, version, protocol, port);
851} 845}
852 846
853#endif /* CONFIG_SUNRPC_REGISTER_V4 */
854
855/** 847/**
856 * svc_register - register an RPC service with the local portmapper 848 * svc_register - register an RPC service with the local portmapper
857 * @serv: svc_serv struct for the service to register 849 * @serv: svc_serv struct for the service to register
850 * @family: protocol family of service's listener socket
858 * @proto: transport protocol number to advertise 851 * @proto: transport protocol number to advertise
859 * @port: port to advertise 852 * @port: port to advertise
860 * 853 *
861 * Service is registered for any address in serv's address family 854 * Service is registered for any address in the passed-in protocol family
862 */ 855 */
863int svc_register(const struct svc_serv *serv, const unsigned short proto, 856int svc_register(const struct svc_serv *serv, const int family,
864 const unsigned short port) 857 const unsigned short proto, const unsigned short port)
865{ 858{
866 struct svc_program *progp; 859 struct svc_program *progp;
867 unsigned int i; 860 unsigned int i;
@@ -879,15 +872,15 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
879 i, 872 i,
880 proto == IPPROTO_UDP? "udp" : "tcp", 873 proto == IPPROTO_UDP? "udp" : "tcp",
881 port, 874 port,
882 serv->sv_family, 875 family,
883 progp->pg_vers[i]->vs_hidden? 876 progp->pg_vers[i]->vs_hidden?
884 " (but not telling portmap)" : ""); 877 " (but not telling portmap)" : "");
885 878
886 if (progp->pg_vers[i]->vs_hidden) 879 if (progp->pg_vers[i]->vs_hidden)
887 continue; 880 continue;
888 881
889 error = __svc_register(progp->pg_prog, i, 882 error = __svc_register(progp->pg_name, progp->pg_prog,
890 serv->sv_family, proto, port); 883 i, family, proto, port);
891 if (error < 0) 884 if (error < 0)
892 break; 885 break;
893 } 886 }
@@ -896,38 +889,31 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto,
896 return error; 889 return error;
897} 890}
898 891
899#ifdef CONFIG_SUNRPC_REGISTER_V4 892/*
900 893 * If user space is running rpcbind, it should take the v4 UNSET
894 * and clear everything for this [program, version]. If user space
895 * is running portmap, it will reject the v4 UNSET, but won't have
896 * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient
897 * in this case to clear all existing entries for [program, version].
898 */
901static void __svc_unregister(const u32 program, const u32 version, 899static void __svc_unregister(const u32 program, const u32 version,
902 const char *progname) 900 const char *progname)
903{ 901{
904 struct sockaddr_in6 sin6 = {
905 .sin6_family = AF_INET6,
906 .sin6_addr = IN6ADDR_ANY_INIT,
907 .sin6_port = 0,
908 };
909 int error; 902 int error;
910 903
911 error = rpcb_v4_register(program, version, 904 error = rpcb_v4_register(program, version, NULL, "");
912 (struct sockaddr *)&sin6, "");
913 dprintk("svc: %s(%sv%u), error %d\n",
914 __func__, progname, version, error);
915}
916
917#else /* CONFIG_SUNRPC_REGISTER_V4 */
918 905
919static void __svc_unregister(const u32 program, const u32 version, 906 /*
920 const char *progname) 907 * User space didn't support rpcbind v4, so retry this
921{ 908 * request with the legacy rpcbind v2 protocol.
922 int error; 909 */
910 if (error == -EPROTONOSUPPORT)
911 error = rpcb_register(program, version, 0, 0);
923 912
924 error = rpcb_register(program, version, 0, 0);
925 dprintk("svc: %s(%sv%u), error %d\n", 913 dprintk("svc: %s(%sv%u), error %d\n",
926 __func__, progname, version, error); 914 __func__, progname, version, error);
927} 915}
928 916
929#endif /* CONFIG_SUNRPC_REGISTER_V4 */
930
931/* 917/*
932 * All netids, bind addresses and ports registered for [program, version] 918 * All netids, bind addresses and ports registered for [program, version]
933 * are removed from the local rpcbind database (if the service is not 919 * are removed from the local rpcbind database (if the service is not
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e588df5d6b34..2819ee093f36 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init);
161 161
162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, 162static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
163 struct svc_serv *serv, 163 struct svc_serv *serv,
164 unsigned short port, int flags) 164 const int family,
165 const unsigned short port,
166 int flags)
165{ 167{
166 struct sockaddr_in sin = { 168 struct sockaddr_in sin = {
167 .sin_family = AF_INET, 169 .sin_family = AF_INET,
@@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
176 struct sockaddr *sap; 178 struct sockaddr *sap;
177 size_t len; 179 size_t len;
178 180
179 switch (serv->sv_family) { 181 switch (family) {
180 case AF_INET: 182 case PF_INET:
181 sap = (struct sockaddr *)&sin; 183 sap = (struct sockaddr *)&sin;
182 len = sizeof(sin); 184 len = sizeof(sin);
183 break; 185 break;
184 case AF_INET6: 186 case PF_INET6:
185 sap = (struct sockaddr *)&sin6; 187 sap = (struct sockaddr *)&sin6;
186 len = sizeof(sin6); 188 len = sizeof(sin6);
187 break; 189 break;
@@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
192 return xcl->xcl_ops->xpo_create(serv, sap, len, flags); 194 return xcl->xcl_ops->xpo_create(serv, sap, len, flags);
193} 195}
194 196
195int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, 197int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,
198 const int family, const unsigned short port,
196 int flags) 199 int flags)
197{ 200{
198 struct svc_xprt_class *xcl; 201 struct svc_xprt_class *xcl;
@@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port,
209 goto err; 212 goto err;
210 213
211 spin_unlock(&svc_xprt_class_lock); 214 spin_unlock(&svc_xprt_class_lock);
212 newxprt = __svc_xpo_create(xcl, serv, port, flags); 215 newxprt = __svc_xpo_create(xcl, serv, family, port, flags);
213 if (IS_ERR(newxprt)) { 216 if (IS_ERR(newxprt)) {
214 module_put(xcl->xcl_owner); 217 module_put(xcl->xcl_owner);
215 return PTR_ERR(newxprt); 218 return PTR_ERR(newxprt);
@@ -1033,7 +1036,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1033 return dr; 1036 return dr;
1034} 1037}
1035 1038
1036/* 1039/**
1040 * svc_find_xprt - find an RPC transport instance
1041 * @serv: pointer to svc_serv to search
1042 * @xcl_name: C string containing transport's class name
1043 * @af: Address family of transport's local address
1044 * @port: transport's IP port number
1045 *
1037 * Return the transport instance pointer for the endpoint accepting 1046 * Return the transport instance pointer for the endpoint accepting
1038 * connections/peer traffic from the specified transport class, 1047 * connections/peer traffic from the specified transport class,
1039 * address family and port. 1048 * address family and port.
@@ -1042,14 +1051,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
1042 * wild-card, and will result in matching the first transport in the 1051 * wild-card, and will result in matching the first transport in the
1043 * service's list that has a matching class name. 1052 * service's list that has a matching class name.
1044 */ 1053 */
1045struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, 1054struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
1046 int af, int port) 1055 const sa_family_t af, const unsigned short port)
1047{ 1056{
1048 struct svc_xprt *xprt; 1057 struct svc_xprt *xprt;
1049 struct svc_xprt *found = NULL; 1058 struct svc_xprt *found = NULL;
1050 1059
1051 /* Sanity check the args */ 1060 /* Sanity check the args */
1052 if (!serv || !xcl_name) 1061 if (serv == NULL || xcl_name == NULL)
1053 return found; 1062 return found;
1054 1063
1055 spin_lock_bh(&serv->sv_lock); 1064 spin_lock_bh(&serv->sv_lock);
@@ -1058,7 +1067,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name,
1058 continue; 1067 continue;
1059 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) 1068 if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)
1060 continue; 1069 continue;
1061 if (port && port != svc_xprt_local_port(xprt)) 1070 if (port != 0 && port != svc_xprt_local_port(xprt))
1062 continue; 1071 continue;
1063 found = xprt; 1072 found = xprt;
1064 svc_xprt_get(xprt); 1073 svc_xprt_get(xprt);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 5763e6460fea..9d504234af4a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1110 struct svc_sock *svsk; 1110 struct svc_sock *svsk;
1111 struct sock *inet; 1111 struct sock *inet;
1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); 1112 int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
1113 int val;
1114 1113
1115 dprintk("svc: svc_setup_socket %p\n", sock); 1114 dprintk("svc: svc_setup_socket %p\n", sock);
1116 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { 1115 if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) {
@@ -1122,7 +1121,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1122 1121
1123 /* Register socket with portmapper */ 1122 /* Register socket with portmapper */
1124 if (*errp >= 0 && pmap_register) 1123 if (*errp >= 0 && pmap_register)
1125 *errp = svc_register(serv, inet->sk_protocol, 1124 *errp = svc_register(serv, inet->sk_family, inet->sk_protocol,
1126 ntohs(inet_sk(inet)->sport)); 1125 ntohs(inet_sk(inet)->sport));
1127 1126
1128 if (*errp < 0) { 1127 if (*errp < 0) {
@@ -1143,18 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1143 else 1142 else
1144 svc_tcp_init(svsk, serv); 1143 svc_tcp_init(svsk, serv);
1145 1144
1146 /*
1147 * We start one listener per sv_serv. We want AF_INET
1148 * requests to be automatically shunted to our AF_INET6
1149 * listener using a mapped IPv4 address. Make sure
1150 * no-one starts an equivalent IPv4 listener, which
1151 * would steal our incoming connections.
1152 */
1153 val = 0;
1154 if (serv->sv_family == AF_INET6)
1155 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1156 (char *)&val, sizeof(val));
1157
1158 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1159 svsk, svsk->sk_sk); 1146 svsk, svsk->sk_sk);
1160 1147
@@ -1222,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1222 struct sockaddr_storage addr; 1209 struct sockaddr_storage addr;
1223 struct sockaddr *newsin = (struct sockaddr *)&addr; 1210 struct sockaddr *newsin = (struct sockaddr *)&addr;
1224 int newlen; 1211 int newlen;
1212 int family;
1213 int val;
1225 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); 1214 RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
1226 1215
1227 dprintk("svc: svc_create_socket(%s, %d, %s)\n", 1216 dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1233,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
1233 "sockets supported\n"); 1222 "sockets supported\n");
1234 return ERR_PTR(-EINVAL); 1223 return ERR_PTR(-EINVAL);
1235 } 1224 }
1225
1236 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; 1226 type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM;
1227 switch (sin->sa_family) {
1228 case AF_INET6:
1229 family = PF_INET6;
1230 break;
1231 case AF_INET:
1232 family = PF_INET;
1233 break;
1234 default:
1235 return ERR_PTR(-EINVAL);
1236 }
1237 1237
1238 error = sock_create_kern(sin->sa_family, type, protocol, &sock); 1238 error = sock_create_kern(family, type, protocol, &sock);
1239 if (error < 0) 1239 if (error < 0)
1240 return ERR_PTR(error); 1240 return ERR_PTR(error);
1241 1241
1242 svc_reclassify_socket(sock); 1242 svc_reclassify_socket(sock);
1243 1243
1244 /*
1245 * If this is an PF_INET6 listener, we want to avoid
1246 * getting requests from IPv4 remotes. Those should
1247 * be shunted to a PF_INET listener via rpcbind.
1248 */
1249 val = 1;
1250 if (family == PF_INET6)
1251 kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
1252 (char *)&val, sizeof(val));
1253
1244 if (type == SOCK_STREAM) 1254 if (type == SOCK_STREAM)
1245 sock->sk->sk_reuse = 1; /* allow address reuse */ 1255 sock->sk->sk_reuse = 1; /* allow address reuse */
1246 error = kernel_bind(sock, sin, len); 1256 error = kernel_bind(sock, sin, len);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 62098d101a1f..a0bfe53f1621 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -152,6 +152,37 @@ out:
152EXPORT_SYMBOL_GPL(xprt_unregister_transport); 152EXPORT_SYMBOL_GPL(xprt_unregister_transport);
153 153
154/** 154/**
155 * xprt_load_transport - load a transport implementation
156 * @transport_name: transport to load
157 *
158 * Returns:
159 * 0: transport successfully loaded
160 * -ENOENT: transport module not available
161 */
162int xprt_load_transport(const char *transport_name)
163{
164 struct xprt_class *t;
165 char module_name[sizeof t->name + 5];
166 int result;
167
168 result = 0;
169 spin_lock(&xprt_list_lock);
170 list_for_each_entry(t, &xprt_list, list) {
171 if (strcmp(t->name, transport_name) == 0) {
172 spin_unlock(&xprt_list_lock);
173 goto out;
174 }
175 }
176 spin_unlock(&xprt_list_lock);
177 strcpy(module_name, "xprt");
178 strncat(module_name, transport_name, sizeof t->name);
179 result = request_module(module_name);
180out:
181 return result;
182}
183EXPORT_SYMBOL_GPL(xprt_load_transport);
184
185/**
155 * xprt_reserve_xprt - serialize write access to transports 186 * xprt_reserve_xprt - serialize write access to transports
156 * @task: task that is requesting access to the transport 187 * @task: task that is requesting access to the transport
157 * 188 *
@@ -580,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
580 dprintk("RPC: disconnected transport %p\n", xprt); 611 dprintk("RPC: disconnected transport %p\n", xprt);
581 spin_lock_bh(&xprt->transport_lock); 612 spin_lock_bh(&xprt->transport_lock);
582 xprt_clear_connected(xprt); 613 xprt_clear_connected(xprt);
583 xprt_wake_pending_tasks(xprt, -ENOTCONN); 614 xprt_wake_pending_tasks(xprt, -EAGAIN);
584 spin_unlock_bh(&xprt->transport_lock); 615 spin_unlock_bh(&xprt->transport_lock);
585} 616}
586EXPORT_SYMBOL_GPL(xprt_disconnect_done); 617EXPORT_SYMBOL_GPL(xprt_disconnect_done);
@@ -598,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
598 /* Try to schedule an autoclose RPC call */ 629 /* Try to schedule an autoclose RPC call */
599 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 630 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
600 queue_work(rpciod_workqueue, &xprt->task_cleanup); 631 queue_work(rpciod_workqueue, &xprt->task_cleanup);
601 xprt_wake_pending_tasks(xprt, -ENOTCONN); 632 xprt_wake_pending_tasks(xprt, -EAGAIN);
602 spin_unlock_bh(&xprt->transport_lock); 633 spin_unlock_bh(&xprt->transport_lock);
603} 634}
604 635
@@ -625,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
625 /* Try to schedule an autoclose RPC call */ 656 /* Try to schedule an autoclose RPC call */
626 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) 657 if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
627 queue_work(rpciod_workqueue, &xprt->task_cleanup); 658 queue_work(rpciod_workqueue, &xprt->task_cleanup);
628 xprt_wake_pending_tasks(xprt, -ENOTCONN); 659 xprt_wake_pending_tasks(xprt, -EAGAIN);
629out: 660out:
630 spin_unlock_bh(&xprt->transport_lock); 661 spin_unlock_bh(&xprt->transport_lock);
631} 662}
@@ -695,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task)
695 } 726 }
696 727
697 switch (task->tk_status) { 728 switch (task->tk_status) {
698 case -ENOTCONN: 729 case -EAGAIN:
699 dprintk("RPC: %5u xprt_connect_status: connection broken\n", 730 dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid);
700 task->tk_pid);
701 break; 731 break;
702 case -ETIMEDOUT: 732 case -ETIMEDOUT:
703 dprintk("RPC: %5u xprt_connect_status: connect attempt timed " 733 dprintk("RPC: %5u xprt_connect_status: connect attempt timed "
@@ -818,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task)
818 err = req->rq_received; 848 err = req->rq_received;
819 goto out_unlock; 849 goto out_unlock;
820 } 850 }
821 if (!xprt->ops->reserve_xprt(task)) { 851 if (!xprt->ops->reserve_xprt(task))
822 err = -EAGAIN; 852 err = -EAGAIN;
823 goto out_unlock;
824 }
825
826 if (!xprt_connected(xprt)) {
827 err = -ENOTCONN;
828 goto out_unlock;
829 }
830out_unlock: 853out_unlock:
831 spin_unlock_bh(&xprt->transport_lock); 854 spin_unlock_bh(&xprt->transport_lock);
832 return err; 855 return err;
@@ -870,32 +893,26 @@ void xprt_transmit(struct rpc_task *task)
870 req->rq_connect_cookie = xprt->connect_cookie; 893 req->rq_connect_cookie = xprt->connect_cookie;
871 req->rq_xtime = jiffies; 894 req->rq_xtime = jiffies;
872 status = xprt->ops->send_request(task); 895 status = xprt->ops->send_request(task);
873 if (status == 0) { 896 if (status != 0) {
874 dprintk("RPC: %5u xmit complete\n", task->tk_pid); 897 task->tk_status = status;
875 spin_lock_bh(&xprt->transport_lock); 898 return;
899 }
876 900
877 xprt->ops->set_retrans_timeout(task); 901 dprintk("RPC: %5u xmit complete\n", task->tk_pid);
902 spin_lock_bh(&xprt->transport_lock);
878 903
879 xprt->stat.sends++; 904 xprt->ops->set_retrans_timeout(task);
880 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
881 xprt->stat.bklog_u += xprt->backlog.qlen;
882 905
883 /* Don't race with disconnect */ 906 xprt->stat.sends++;
884 if (!xprt_connected(xprt)) 907 xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
885 task->tk_status = -ENOTCONN; 908 xprt->stat.bklog_u += xprt->backlog.qlen;
886 else if (!req->rq_received)
887 rpc_sleep_on(&xprt->pending, task, xprt_timer);
888 spin_unlock_bh(&xprt->transport_lock);
889 return;
890 }
891 909
892 /* Note: at this point, task->tk_sleeping has not yet been set, 910 /* Don't race with disconnect */
893 * hence there is no danger of the waking up task being put on 911 if (!xprt_connected(xprt))
894 * schedq, and being picked up by a parallel run of rpciod(). 912 task->tk_status = -ENOTCONN;
895 */ 913 else if (!req->rq_received)
896 task->tk_status = status; 914 rpc_sleep_on(&xprt->pending, task, xprt_timer);
897 if (status == -ECONNREFUSED) 915 spin_unlock_bh(&xprt->transport_lock);
898 rpc_sleep_on(&xprt->sending, task, NULL);
899} 916}
900 917
901static inline void do_xprt_reserve(struct rpc_task *task) 918static inline void do_xprt_reserve(struct rpc_task *task)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 14106d26bb95..e5e28d1946a4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
310 __func__, pad, destp, rqst->rq_slen, curlen); 310 __func__, pad, destp, rqst->rq_slen, curlen);
311 311
312 copy_len = rqst->rq_snd_buf.page_len; 312 copy_len = rqst->rq_snd_buf.page_len;
313
314 if (rqst->rq_snd_buf.tail[0].iov_len) {
315 curlen = rqst->rq_snd_buf.tail[0].iov_len;
316 if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) {
317 memmove(destp + copy_len,
318 rqst->rq_snd_buf.tail[0].iov_base, curlen);
319 r_xprt->rx_stats.pullup_copy_count += curlen;
320 }
321 dprintk("RPC: %s: tail destp 0x%p len %d\n",
322 __func__, destp + copy_len, curlen);
323 rqst->rq_svec[0].iov_len += curlen;
324 }
325
313 r_xprt->rx_stats.pullup_copy_count += copy_len; 326 r_xprt->rx_stats.pullup_copy_count += copy_len;
314 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; 327 npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT;
315 for (i = 0; copy_len && i < npages; i++) { 328 for (i = 0; copy_len && i < npages; i++) {
@@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
332 destp += curlen; 345 destp += curlen;
333 copy_len -= curlen; 346 copy_len -= curlen;
334 } 347 }
335 if (rqst->rq_snd_buf.tail[0].iov_len) {
336 curlen = rqst->rq_snd_buf.tail[0].iov_len;
337 if (destp != rqst->rq_snd_buf.tail[0].iov_base) {
338 memcpy(destp,
339 rqst->rq_snd_buf.tail[0].iov_base, curlen);
340 r_xprt->rx_stats.pullup_copy_count += curlen;
341 }
342 dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n",
343 __func__, destp, copy_len, curlen);
344 rqst->rq_svec[0].iov_len += curlen;
345 }
346 /* header now contains entire send message */ 348 /* header now contains entire send message */
347 return pad; 349 return pad;
348} 350}
@@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
656 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) 658 if (curlen > rqst->rq_rcv_buf.tail[0].iov_len)
657 curlen = rqst->rq_rcv_buf.tail[0].iov_len; 659 curlen = rqst->rq_rcv_buf.tail[0].iov_len;
658 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) 660 if (rqst->rq_rcv_buf.tail[0].iov_base != srcp)
659 memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); 661 memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen);
660 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", 662 dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n",
661 __func__, srcp, copy_len, curlen); 663 __func__, srcp, copy_len, curlen);
662 rqst->rq_rcv_buf.tail[0].iov_len = curlen; 664 rqst->rq_rcv_buf.tail[0].iov_len = curlen;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index a3334e3b73cc..6c26a675435a 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
191 struct xdr_buf *xdr, 191 struct xdr_buf *xdr,
192 struct svc_rdma_req_map *vec) 192 struct svc_rdma_req_map *vec)
193{ 193{
194 int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3;
195 int sge_no; 194 int sge_no;
196 u32 sge_bytes; 195 u32 sge_bytes;
197 u32 page_bytes; 196 u32 page_bytes;
@@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt,
235 sge_no++; 234 sge_no++;
236 } 235 }
237 236
238 BUG_ON(sge_no > sge_max); 237 dprintk("svcrdma: map_xdr: sge_no %d page_no %d "
238 "page_base %u page_len %u head_len %zu tail_len %zu\n",
239 sge_no, page_no, xdr->page_base, xdr->page_len,
240 xdr->head[0].iov_len, xdr->tail[0].iov_len);
241
239 vec->count = sge_no; 242 vec->count = sge_no;
240 return 0; 243 return 0;
241} 244}
@@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
579 ctxt->sge[page_no+1].length = 0; 582 ctxt->sge[page_no+1].length = 0;
580 } 583 }
581 BUG_ON(sge_no > rdma->sc_max_sge); 584 BUG_ON(sge_no > rdma->sc_max_sge);
582 BUG_ON(sge_no > ctxt->count);
583 memset(&send_wr, 0, sizeof send_wr); 585 memset(&send_wr, 0, sizeof send_wr);
584 ctxt->wr_op = IB_WR_SEND; 586 ctxt->wr_op = IB_WR_SEND;
585 send_wr.wr_id = (unsigned long)ctxt; 587 send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 568330eebbfe..d40ff50887aa 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -49,6 +49,9 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 49unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 50unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
51 51
52#define XS_TCP_LINGER_TO (15U * HZ)
53static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
54
52/* 55/*
53 * We can register our own files under /proc/sys/sunrpc by 56 * We can register our own files under /proc/sys/sunrpc by
54 * calling register_sysctl_table() again. The files in that 57 * calling register_sysctl_table() again. The files in that
@@ -117,6 +120,14 @@ static ctl_table xs_tunables_table[] = {
117 .extra2 = &xprt_max_resvport_limit 120 .extra2 = &xprt_max_resvport_limit
118 }, 121 },
119 { 122 {
123 .procname = "tcp_fin_timeout",
124 .data = &xs_tcp_fin_timeout,
125 .maxlen = sizeof(xs_tcp_fin_timeout),
126 .mode = 0644,
127 .proc_handler = &proc_dointvec_jiffies,
128 .strategy = sysctl_jiffies
129 },
130 {
120 .ctl_name = 0, 131 .ctl_name = 0,
121 }, 132 },
122}; 133};
@@ -521,11 +532,12 @@ static void xs_nospace_callback(struct rpc_task *task)
521 * @task: task to put to sleep 532 * @task: task to put to sleep
522 * 533 *
523 */ 534 */
524static void xs_nospace(struct rpc_task *task) 535static int xs_nospace(struct rpc_task *task)
525{ 536{
526 struct rpc_rqst *req = task->tk_rqstp; 537 struct rpc_rqst *req = task->tk_rqstp;
527 struct rpc_xprt *xprt = req->rq_xprt; 538 struct rpc_xprt *xprt = req->rq_xprt;
528 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 539 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
540 int ret = 0;
529 541
530 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", 542 dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
531 task->tk_pid, req->rq_slen - req->rq_bytes_sent, 543 task->tk_pid, req->rq_slen - req->rq_bytes_sent,
@@ -537,6 +549,7 @@ static void xs_nospace(struct rpc_task *task)
537 /* Don't race with disconnect */ 549 /* Don't race with disconnect */
538 if (xprt_connected(xprt)) { 550 if (xprt_connected(xprt)) {
539 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { 551 if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
552 ret = -EAGAIN;
540 /* 553 /*
541 * Notify TCP that we're limited by the application 554 * Notify TCP that we're limited by the application
542 * window size 555 * window size
@@ -548,10 +561,11 @@ static void xs_nospace(struct rpc_task *task)
548 } 561 }
549 } else { 562 } else {
550 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 563 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
551 task->tk_status = -ENOTCONN; 564 ret = -ENOTCONN;
552 } 565 }
553 566
554 spin_unlock_bh(&xprt->transport_lock); 567 spin_unlock_bh(&xprt->transport_lock);
568 return ret;
555} 569}
556 570
557/** 571/**
@@ -594,6 +608,8 @@ static int xs_udp_send_request(struct rpc_task *task)
594 /* Still some bytes left; set up for a retry later. */ 608 /* Still some bytes left; set up for a retry later. */
595 status = -EAGAIN; 609 status = -EAGAIN;
596 } 610 }
611 if (!transport->sock)
612 goto out;
597 613
598 switch (status) { 614 switch (status) {
599 case -ENOTSOCK: 615 case -ENOTSOCK:
@@ -601,21 +617,19 @@ static int xs_udp_send_request(struct rpc_task *task)
601 /* Should we call xs_close() here? */ 617 /* Should we call xs_close() here? */
602 break; 618 break;
603 case -EAGAIN: 619 case -EAGAIN:
604 xs_nospace(task); 620 status = xs_nospace(task);
605 break; 621 break;
622 default:
623 dprintk("RPC: sendmsg returned unrecognized error %d\n",
624 -status);
606 case -ENETUNREACH: 625 case -ENETUNREACH:
607 case -EPIPE: 626 case -EPIPE:
608 case -ECONNREFUSED: 627 case -ECONNREFUSED:
609 /* When the server has died, an ICMP port unreachable message 628 /* When the server has died, an ICMP port unreachable message
610 * prompts ECONNREFUSED. */ 629 * prompts ECONNREFUSED. */
611 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 630 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
612 break;
613 default:
614 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
615 dprintk("RPC: sendmsg returned unrecognized error %d\n",
616 -status);
617 } 631 }
618 632out:
619 return status; 633 return status;
620} 634}
621 635
@@ -697,6 +711,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
697 status = -EAGAIN; 711 status = -EAGAIN;
698 break; 712 break;
699 } 713 }
714 if (!transport->sock)
715 goto out;
700 716
701 switch (status) { 717 switch (status) {
702 case -ENOTSOCK: 718 case -ENOTSOCK:
@@ -704,23 +720,19 @@ static int xs_tcp_send_request(struct rpc_task *task)
704 /* Should we call xs_close() here? */ 720 /* Should we call xs_close() here? */
705 break; 721 break;
706 case -EAGAIN: 722 case -EAGAIN:
707 xs_nospace(task); 723 status = xs_nospace(task);
708 break; 724 break;
725 default:
726 dprintk("RPC: sendmsg returned unrecognized error %d\n",
727 -status);
709 case -ECONNRESET: 728 case -ECONNRESET:
729 case -EPIPE:
710 xs_tcp_shutdown(xprt); 730 xs_tcp_shutdown(xprt);
711 case -ECONNREFUSED: 731 case -ECONNREFUSED:
712 case -ENOTCONN: 732 case -ENOTCONN:
713 case -EPIPE:
714 status = -ENOTCONN;
715 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
716 break;
717 default:
718 dprintk("RPC: sendmsg returned unrecognized error %d\n",
719 -status);
720 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); 733 clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags);
721 xs_tcp_shutdown(xprt);
722 } 734 }
723 735out:
724 return status; 736 return status;
725} 737}
726 738
@@ -767,23 +779,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s
767 sk->sk_error_report = transport->old_error_report; 779 sk->sk_error_report = transport->old_error_report;
768} 780}
769 781
770/** 782static void xs_reset_transport(struct sock_xprt *transport)
771 * xs_close - close a socket
772 * @xprt: transport
773 *
774 * This is used when all requests are complete; ie, no DRC state remains
775 * on the server we want to save.
776 */
777static void xs_close(struct rpc_xprt *xprt)
778{ 783{
779 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
780 struct socket *sock = transport->sock; 784 struct socket *sock = transport->sock;
781 struct sock *sk = transport->inet; 785 struct sock *sk = transport->inet;
782 786
783 if (!sk) 787 if (sk == NULL)
784 goto clear_close_wait; 788 return;
785
786 dprintk("RPC: xs_close xprt %p\n", xprt);
787 789
788 write_lock_bh(&sk->sk_callback_lock); 790 write_lock_bh(&sk->sk_callback_lock);
789 transport->inet = NULL; 791 transport->inet = NULL;
@@ -797,8 +799,25 @@ static void xs_close(struct rpc_xprt *xprt)
797 sk->sk_no_check = 0; 799 sk->sk_no_check = 0;
798 800
799 sock_release(sock); 801 sock_release(sock);
800clear_close_wait: 802}
803
804/**
805 * xs_close - close a socket
806 * @xprt: transport
807 *
808 * This is used when all requests are complete; ie, no DRC state remains
809 * on the server we want to save.
810 */
811static void xs_close(struct rpc_xprt *xprt)
812{
813 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
814
815 dprintk("RPC: xs_close xprt %p\n", xprt);
816
817 xs_reset_transport(transport);
818
801 smp_mb__before_clear_bit(); 819 smp_mb__before_clear_bit();
820 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
802 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 821 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
803 clear_bit(XPRT_CLOSING, &xprt->state); 822 clear_bit(XPRT_CLOSING, &xprt->state);
804 smp_mb__after_clear_bit(); 823 smp_mb__after_clear_bit();
@@ -1126,6 +1145,47 @@ out:
1126 read_unlock(&sk->sk_callback_lock); 1145 read_unlock(&sk->sk_callback_lock);
1127} 1146}
1128 1147
1148/*
1149 * Do the equivalent of linger/linger2 handling for dealing with
1150 * broken servers that don't close the socket in a timely
1151 * fashion
1152 */
1153static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt,
1154 unsigned long timeout)
1155{
1156 struct sock_xprt *transport;
1157
1158 if (xprt_test_and_set_connecting(xprt))
1159 return;
1160 set_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1161 transport = container_of(xprt, struct sock_xprt, xprt);
1162 queue_delayed_work(rpciod_workqueue, &transport->connect_worker,
1163 timeout);
1164}
1165
1166static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1167{
1168 struct sock_xprt *transport;
1169
1170 transport = container_of(xprt, struct sock_xprt, xprt);
1171
1172 if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) ||
1173 !cancel_delayed_work(&transport->connect_worker))
1174 return;
1175 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1176 xprt_clear_connecting(xprt);
1177}
1178
1179static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1180{
1181 smp_mb__before_clear_bit();
1182 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1183 clear_bit(XPRT_CLOSING, &xprt->state);
1184 smp_mb__after_clear_bit();
1185 /* Mark transport as closed and wake up all pending tasks */
1186 xprt_disconnect_done(xprt);
1187}
1188
1129/** 1189/**
1130 * xs_tcp_state_change - callback to handle TCP socket state changes 1190 * xs_tcp_state_change - callback to handle TCP socket state changes
1131 * @sk: socket whose state has changed 1191 * @sk: socket whose state has changed
@@ -1158,7 +1218,7 @@ static void xs_tcp_state_change(struct sock *sk)
1158 transport->tcp_flags = 1218 transport->tcp_flags =
1159 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; 1219 TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
1160 1220
1161 xprt_wake_pending_tasks(xprt, 0); 1221 xprt_wake_pending_tasks(xprt, -EAGAIN);
1162 } 1222 }
1163 spin_unlock_bh(&xprt->transport_lock); 1223 spin_unlock_bh(&xprt->transport_lock);
1164 break; 1224 break;
@@ -1171,10 +1231,10 @@ static void xs_tcp_state_change(struct sock *sk)
1171 clear_bit(XPRT_CONNECTED, &xprt->state); 1231 clear_bit(XPRT_CONNECTED, &xprt->state);
1172 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1232 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1173 smp_mb__after_clear_bit(); 1233 smp_mb__after_clear_bit();
1234 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1174 break; 1235 break;
1175 case TCP_CLOSE_WAIT: 1236 case TCP_CLOSE_WAIT:
1176 /* The server initiated a shutdown of the socket */ 1237 /* The server initiated a shutdown of the socket */
1177 set_bit(XPRT_CLOSING, &xprt->state);
1178 xprt_force_disconnect(xprt); 1238 xprt_force_disconnect(xprt);
1179 case TCP_SYN_SENT: 1239 case TCP_SYN_SENT:
1180 xprt->connect_cookie++; 1240 xprt->connect_cookie++;
@@ -1187,40 +1247,35 @@ static void xs_tcp_state_change(struct sock *sk)
1187 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; 1247 xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
1188 break; 1248 break;
1189 case TCP_LAST_ACK: 1249 case TCP_LAST_ACK:
1250 set_bit(XPRT_CLOSING, &xprt->state);
1251 xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout);
1190 smp_mb__before_clear_bit(); 1252 smp_mb__before_clear_bit();
1191 clear_bit(XPRT_CONNECTED, &xprt->state); 1253 clear_bit(XPRT_CONNECTED, &xprt->state);
1192 smp_mb__after_clear_bit(); 1254 smp_mb__after_clear_bit();
1193 break; 1255 break;
1194 case TCP_CLOSE: 1256 case TCP_CLOSE:
1195 smp_mb__before_clear_bit(); 1257 xs_tcp_cancel_linger_timeout(xprt);
1196 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1258 xs_sock_mark_closed(xprt);
1197 clear_bit(XPRT_CLOSING, &xprt->state);
1198 smp_mb__after_clear_bit();
1199 /* Mark transport as closed and wake up all pending tasks */
1200 xprt_disconnect_done(xprt);
1201 } 1259 }
1202 out: 1260 out:
1203 read_unlock(&sk->sk_callback_lock); 1261 read_unlock(&sk->sk_callback_lock);
1204} 1262}
1205 1263
1206/** 1264/**
1207 * xs_tcp_error_report - callback mainly for catching RST events 1265 * xs_error_report - callback mainly for catching socket errors
1208 * @sk: socket 1266 * @sk: socket
1209 */ 1267 */
1210static void xs_tcp_error_report(struct sock *sk) 1268static void xs_error_report(struct sock *sk)
1211{ 1269{
1212 struct rpc_xprt *xprt; 1270 struct rpc_xprt *xprt;
1213 1271
1214 read_lock(&sk->sk_callback_lock); 1272 read_lock(&sk->sk_callback_lock);
1215 if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED)
1216 goto out;
1217 if (!(xprt = xprt_from_sock(sk))) 1273 if (!(xprt = xprt_from_sock(sk)))
1218 goto out; 1274 goto out;
1219 dprintk("RPC: %s client %p...\n" 1275 dprintk("RPC: %s client %p...\n"
1220 "RPC: error %d\n", 1276 "RPC: error %d\n",
1221 __func__, xprt, sk->sk_err); 1277 __func__, xprt, sk->sk_err);
1222 1278 xprt_wake_pending_tasks(xprt, -EAGAIN);
1223 xprt_force_disconnect(xprt);
1224out: 1279out:
1225 read_unlock(&sk->sk_callback_lock); 1280 read_unlock(&sk->sk_callback_lock);
1226} 1281}
@@ -1494,6 +1549,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1494 sk->sk_user_data = xprt; 1549 sk->sk_user_data = xprt;
1495 sk->sk_data_ready = xs_udp_data_ready; 1550 sk->sk_data_ready = xs_udp_data_ready;
1496 sk->sk_write_space = xs_udp_write_space; 1551 sk->sk_write_space = xs_udp_write_space;
1552 sk->sk_error_report = xs_error_report;
1497 sk->sk_no_check = UDP_CSUM_NORCV; 1553 sk->sk_no_check = UDP_CSUM_NORCV;
1498 sk->sk_allocation = GFP_ATOMIC; 1554 sk->sk_allocation = GFP_ATOMIC;
1499 1555
@@ -1526,9 +1582,10 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1526 goto out; 1582 goto out;
1527 1583
1528 /* Start by resetting any existing state */ 1584 /* Start by resetting any existing state */
1529 xs_close(xprt); 1585 xs_reset_transport(transport);
1530 1586
1531 if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1587 err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
1588 if (err < 0) {
1532 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1589 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1533 goto out; 1590 goto out;
1534 } 1591 }
@@ -1545,8 +1602,8 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1545 xs_udp_finish_connecting(xprt, sock); 1602 xs_udp_finish_connecting(xprt, sock);
1546 status = 0; 1603 status = 0;
1547out: 1604out:
1548 xprt_wake_pending_tasks(xprt, status);
1549 xprt_clear_connecting(xprt); 1605 xprt_clear_connecting(xprt);
1606 xprt_wake_pending_tasks(xprt, status);
1550} 1607}
1551 1608
1552/** 1609/**
@@ -1567,9 +1624,10 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1567 goto out; 1624 goto out;
1568 1625
1569 /* Start by resetting any existing state */ 1626 /* Start by resetting any existing state */
1570 xs_close(xprt); 1627 xs_reset_transport(transport);
1571 1628
1572 if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { 1629 err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
1630 if (err < 0) {
1573 dprintk("RPC: can't create UDP transport socket (%d).\n", -err); 1631 dprintk("RPC: can't create UDP transport socket (%d).\n", -err);
1574 goto out; 1632 goto out;
1575 } 1633 }
@@ -1586,18 +1644,17 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1586 xs_udp_finish_connecting(xprt, sock); 1644 xs_udp_finish_connecting(xprt, sock);
1587 status = 0; 1645 status = 0;
1588out: 1646out:
1589 xprt_wake_pending_tasks(xprt, status);
1590 xprt_clear_connecting(xprt); 1647 xprt_clear_connecting(xprt);
1648 xprt_wake_pending_tasks(xprt, status);
1591} 1649}
1592 1650
1593/* 1651/*
1594 * We need to preserve the port number so the reply cache on the server can 1652 * We need to preserve the port number so the reply cache on the server can
1595 * find our cached RPC replies when we get around to reconnecting. 1653 * find our cached RPC replies when we get around to reconnecting.
1596 */ 1654 */
1597static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) 1655static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1598{ 1656{
1599 int result; 1657 int result;
1600 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
1601 struct sockaddr any; 1658 struct sockaddr any;
1602 1659
1603 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); 1660 dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt);
@@ -1609,11 +1666,24 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
1609 memset(&any, 0, sizeof(any)); 1666 memset(&any, 0, sizeof(any));
1610 any.sa_family = AF_UNSPEC; 1667 any.sa_family = AF_UNSPEC;
1611 result = kernel_connect(transport->sock, &any, sizeof(any), 0); 1668 result = kernel_connect(transport->sock, &any, sizeof(any), 0);
1612 if (result) 1669 if (!result)
1670 xs_sock_mark_closed(xprt);
1671 else
1613 dprintk("RPC: AF_UNSPEC connect return code %d\n", 1672 dprintk("RPC: AF_UNSPEC connect return code %d\n",
1614 result); 1673 result);
1615} 1674}
1616 1675
1676static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport)
1677{
1678 unsigned int state = transport->inet->sk_state;
1679
1680 if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED)
1681 return;
1682 if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT))
1683 return;
1684 xs_abort_connection(xprt, transport);
1685}
1686
1617static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) 1687static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1618{ 1688{
1619 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1689 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1629,7 +1699,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1629 sk->sk_data_ready = xs_tcp_data_ready; 1699 sk->sk_data_ready = xs_tcp_data_ready;
1630 sk->sk_state_change = xs_tcp_state_change; 1700 sk->sk_state_change = xs_tcp_state_change;
1631 sk->sk_write_space = xs_tcp_write_space; 1701 sk->sk_write_space = xs_tcp_write_space;
1632 sk->sk_error_report = xs_tcp_error_report; 1702 sk->sk_error_report = xs_error_report;
1633 sk->sk_allocation = GFP_ATOMIC; 1703 sk->sk_allocation = GFP_ATOMIC;
1634 1704
1635 /* socket options */ 1705 /* socket options */
@@ -1657,37 +1727,42 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1657} 1727}
1658 1728
1659/** 1729/**
1660 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint 1730 * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint
1661 * @work: RPC transport to connect 1731 * @xprt: RPC transport to connect
1732 * @transport: socket transport to connect
1733 * @create_sock: function to create a socket of the correct type
1662 * 1734 *
1663 * Invoked by a work queue tasklet. 1735 * Invoked by a work queue tasklet.
1664 */ 1736 */
1665static void xs_tcp_connect_worker4(struct work_struct *work) 1737static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
1738 struct sock_xprt *transport,
1739 struct socket *(*create_sock)(struct rpc_xprt *,
1740 struct sock_xprt *))
1666{ 1741{
1667 struct sock_xprt *transport =
1668 container_of(work, struct sock_xprt, connect_worker.work);
1669 struct rpc_xprt *xprt = &transport->xprt;
1670 struct socket *sock = transport->sock; 1742 struct socket *sock = transport->sock;
1671 int err, status = -EIO; 1743 int status = -EIO;
1672 1744
1673 if (xprt->shutdown) 1745 if (xprt->shutdown)
1674 goto out; 1746 goto out;
1675 1747
1676 if (!sock) { 1748 if (!sock) {
1677 /* start from scratch */ 1749 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1678 if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1750 sock = create_sock(xprt, transport);
1679 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1751 if (IS_ERR(sock)) {
1752 status = PTR_ERR(sock);
1680 goto out; 1753 goto out;
1681 } 1754 }
1682 xs_reclassify_socket4(sock); 1755 } else {
1756 int abort_and_exit;
1683 1757
1684 if (xs_bind4(transport, sock) < 0) { 1758 abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
1685 sock_release(sock); 1759 &xprt->state);
1686 goto out;
1687 }
1688 } else
1689 /* "close" the socket, preserving the local port */ 1760 /* "close" the socket, preserving the local port */
1690 xs_tcp_reuse_connection(xprt); 1761 xs_tcp_reuse_connection(xprt, transport);
1762
1763 if (abort_and_exit)
1764 goto out_eagain;
1765 }
1691 1766
1692 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1767 dprintk("RPC: worker connecting xprt %p to address: %s\n",
1693 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1768 xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
@@ -1696,83 +1771,104 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1696 dprintk("RPC: %p connect status %d connected %d sock state %d\n", 1771 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1697 xprt, -status, xprt_connected(xprt), 1772 xprt, -status, xprt_connected(xprt),
1698 sock->sk->sk_state); 1773 sock->sk->sk_state);
1699 if (status < 0) { 1774 switch (status) {
1700 switch (status) { 1775 case -ECONNREFUSED:
1701 case -EINPROGRESS: 1776 case -ECONNRESET:
1702 case -EALREADY: 1777 case -ENETUNREACH:
1703 goto out_clear; 1778 /* retry with existing socket, after a delay */
1704 case -ECONNREFUSED: 1779 case 0:
1705 case -ECONNRESET: 1780 case -EINPROGRESS:
1706 /* retry with existing socket, after a delay */ 1781 case -EALREADY:
1707 break; 1782 xprt_clear_connecting(xprt);
1708 default: 1783 return;
1709 /* get rid of existing socket, and retry */
1710 xs_tcp_shutdown(xprt);
1711 }
1712 } 1784 }
1785 /* get rid of existing socket, and retry */
1786 xs_tcp_shutdown(xprt);
1787 printk("%s: connect returned unhandled error %d\n",
1788 __func__, status);
1789out_eagain:
1790 status = -EAGAIN;
1713out: 1791out:
1714 xprt_wake_pending_tasks(xprt, status);
1715out_clear:
1716 xprt_clear_connecting(xprt); 1792 xprt_clear_connecting(xprt);
1793 xprt_wake_pending_tasks(xprt, status);
1794}
1795
1796static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt,
1797 struct sock_xprt *transport)
1798{
1799 struct socket *sock;
1800 int err;
1801
1802 /* start from scratch */
1803 err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
1804 if (err < 0) {
1805 dprintk("RPC: can't create TCP transport socket (%d).\n",
1806 -err);
1807 goto out_err;
1808 }
1809 xs_reclassify_socket4(sock);
1810
1811 if (xs_bind4(transport, sock) < 0) {
1812 sock_release(sock);
1813 goto out_err;
1814 }
1815 return sock;
1816out_err:
1817 return ERR_PTR(-EIO);
1717} 1818}
1718 1819
1719/** 1820/**
1720 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint 1821 * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint
1721 * @work: RPC transport to connect 1822 * @work: RPC transport to connect
1722 * 1823 *
1723 * Invoked by a work queue tasklet. 1824 * Invoked by a work queue tasklet.
1724 */ 1825 */
1725static void xs_tcp_connect_worker6(struct work_struct *work) 1826static void xs_tcp_connect_worker4(struct work_struct *work)
1726{ 1827{
1727 struct sock_xprt *transport = 1828 struct sock_xprt *transport =
1728 container_of(work, struct sock_xprt, connect_worker.work); 1829 container_of(work, struct sock_xprt, connect_worker.work);
1729 struct rpc_xprt *xprt = &transport->xprt; 1830 struct rpc_xprt *xprt = &transport->xprt;
1730 struct socket *sock = transport->sock;
1731 int err, status = -EIO;
1732 1831
1733 if (xprt->shutdown) 1832 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4);
1734 goto out; 1833}
1735 1834
1736 if (!sock) { 1835static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt,
1737 /* start from scratch */ 1836 struct sock_xprt *transport)
1738 if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { 1837{
1739 dprintk("RPC: can't create TCP transport socket (%d).\n", -err); 1838 struct socket *sock;
1740 goto out; 1839 int err;
1741 } 1840
1742 xs_reclassify_socket6(sock); 1841 /* start from scratch */
1842 err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock);
1843 if (err < 0) {
1844 dprintk("RPC: can't create TCP transport socket (%d).\n",
1845 -err);
1846 goto out_err;
1847 }
1848 xs_reclassify_socket6(sock);
1743 1849
1744 if (xs_bind6(transport, sock) < 0) { 1850 if (xs_bind6(transport, sock) < 0) {
1745 sock_release(sock); 1851 sock_release(sock);
1746 goto out; 1852 goto out_err;
1747 } 1853 }
1748 } else 1854 return sock;
1749 /* "close" the socket, preserving the local port */ 1855out_err:
1750 xs_tcp_reuse_connection(xprt); 1856 return ERR_PTR(-EIO);
1857}
1751 1858
1752 dprintk("RPC: worker connecting xprt %p to address: %s\n", 1859/**
1753 xprt, xprt->address_strings[RPC_DISPLAY_ALL]); 1860 * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint
1861 * @work: RPC transport to connect
1862 *
1863 * Invoked by a work queue tasklet.
1864 */
1865static void xs_tcp_connect_worker6(struct work_struct *work)
1866{
1867 struct sock_xprt *transport =
1868 container_of(work, struct sock_xprt, connect_worker.work);
1869 struct rpc_xprt *xprt = &transport->xprt;
1754 1870
1755 status = xs_tcp_finish_connecting(xprt, sock); 1871 xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6);
1756 dprintk("RPC: %p connect status %d connected %d sock state %d\n",
1757 xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
1758 if (status < 0) {
1759 switch (status) {
1760 case -EINPROGRESS:
1761 case -EALREADY:
1762 goto out_clear;
1763 case -ECONNREFUSED:
1764 case -ECONNRESET:
1765 /* retry with existing socket, after a delay */
1766 break;
1767 default:
1768 /* get rid of existing socket, and retry */
1769 xs_tcp_shutdown(xprt);
1770 }
1771 }
1772out:
1773 xprt_wake_pending_tasks(xprt, status);
1774out_clear:
1775 xprt_clear_connecting(xprt);
1776} 1872}
1777 1873
1778/** 1874/**
@@ -1817,9 +1913,6 @@ static void xs_tcp_connect(struct rpc_task *task)
1817{ 1913{
1818 struct rpc_xprt *xprt = task->tk_xprt; 1914 struct rpc_xprt *xprt = task->tk_xprt;
1819 1915
1820 /* Initiate graceful shutdown of the socket if not already done */
1821 if (test_bit(XPRT_CONNECTED, &xprt->state))
1822 xs_tcp_shutdown(xprt);
1823 /* Exit if we need to wait for socket shutdown to complete */ 1916 /* Exit if we need to wait for socket shutdown to complete */
1824 if (test_bit(XPRT_CLOSING, &xprt->state)) 1917 if (test_bit(XPRT_CLOSING, &xprt->state))
1825 return; 1918 return;