aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c2
-rw-r--r--net/Kconfig2
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/ethtool.c3
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c4
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_output.c73
-rw-r--r--net/ipv6/Kconfig18
-rw-r--r--net/ipv6/netfilter/ip6_tables.c4
-rw-r--r--net/ipv6/xfrm6_output.c1
-rw-r--r--net/mac80211/Kconfig2
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/phonet/Kconfig2
-rw-r--r--net/rds/ib.c5
-rw-r--r--net/rds/ib.h28
-rw-r--r--net/rds/ib_cm.c43
-rw-r--r--net/rds/ib_rdma.c43
-rw-r--r--net/rds/ib_recv.c37
-rw-r--r--net/rds/iw.c5
-rw-r--r--net/rds/iw.h28
-rw-r--r--net/rds/iw_cm.c44
-rw-r--r--net/rds/iw_rdma.c44
-rw-r--r--net/rds/iw_recv.c37
-rw-r--r--net/rds/rds.h4
-rw-r--r--net/rds/send.c6
-rw-r--r--net/socket.c6
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svc_xprt.c127
-rw-r--r--net/sunrpc/svcsock.c35
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/wimax/Kconfig2
33 files changed, 413 insertions, 218 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index f1611a1e06a7..539e6064e6d4 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -215,3 +215,5 @@ struct net_device *alloc_fddidev(int sizeof_priv)
215 return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup); 215 return alloc_netdev(sizeof_priv, "fddi%d", fddi_setup);
216} 216}
217EXPORT_SYMBOL(alloc_fddidev); 217EXPORT_SYMBOL(alloc_fddidev);
218
219MODULE_LICENSE("GPL");
diff --git a/net/Kconfig b/net/Kconfig
index ec93e7e38b38..ce77db4fcec8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -140,7 +140,7 @@ config NETFILTER_ADVANCED
140 default y 140 default y
141 help 141 help
142 If you say Y here you can select between all the netfilter modules. 142 If you say Y here you can select between all the netfilter modules.
143 If you say N the more ununsual ones will not be shown and the 143 If you say N the more unusual ones will not be shown and the
144 basic ones needed by most people will default to 'M'. 144 basic ones needed by most people will default to 'M'.
145 145
146 If unsure, say Y. 146 If unsure, say Y.
diff --git a/net/core/dev.c b/net/core/dev.c
index 52fea5b28ca6..91d792d17e09 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2472,8 +2472,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2472 return GRO_NORMAL; 2472 return GRO_NORMAL;
2473 2473
2474 for (p = napi->gro_list; p; p = p->next) { 2474 for (p = napi->gro_list; p; p = p->next) {
2475 NAPI_GRO_CB(p)->same_flow = !compare_ether_header( 2475 NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
2476 skb_mac_header(p), skb_gro_mac_header(skb)); 2476 && !compare_ether_header(skb_mac_header(p),
2477 skb_gro_mac_header(skb));
2477 NAPI_GRO_CB(p)->flush = 0; 2478 NAPI_GRO_CB(p)->flush = 0;
2478 } 2479 }
2479 2480
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 244ca56dffac..d9d5160610d5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
261 ret = 0; 261 ret = 0;
262 262
263err_out: 263err_out:
264 if (rule_buf) 264 kfree(rule_buf);
265 kfree(rule_buf);
266 265
267 return ret; 266 return ret;
268} 267}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 35c5f6a5cb7c..5ba533d234db 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,7 +253,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
253 indev = in ? in->name : nulldevname; 253 indev = in ? in->name : nulldevname;
254 outdev = out ? out->name : nulldevname; 254 outdev = out ? out->name : nulldevname;
255 255
256 rcu_read_lock(); 256 rcu_read_lock_bh();
257 private = rcu_dereference(table->private); 257 private = rcu_dereference(table->private);
258 table_base = rcu_dereference(private->entries[smp_processor_id()]); 258 table_base = rcu_dereference(private->entries[smp_processor_id()]);
259 259
@@ -329,7 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
329 } 329 }
330 } while (!hotdrop); 330 } while (!hotdrop);
331 331
332 rcu_read_unlock(); 332 rcu_read_unlock_bh();
333 333
334 if (hotdrop) 334 if (hotdrop)
335 return NF_DROP; 335 return NF_DROP;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 82ee7c9049ff..810c0b62c7d4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -339,7 +339,7 @@ ipt_do_table(struct sk_buff *skb,
339 339
340 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 340 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
341 341
342 rcu_read_lock(); 342 rcu_read_lock_bh();
343 private = rcu_dereference(table->private); 343 private = rcu_dereference(table->private);
344 table_base = rcu_dereference(private->entries[smp_processor_id()]); 344 table_base = rcu_dereference(private->entries[smp_processor_id()]);
345 345
@@ -437,7 +437,7 @@ ipt_do_table(struct sk_buff *skb,
437 } 437 }
438 } while (!hotdrop); 438 } while (!hotdrop);
439 439
440 rcu_read_unlock(); 440 rcu_read_unlock_bh();
441 441
442#ifdef DEBUG_ALLOW_ALL 442#ifdef DEBUG_ALLOW_ALL
443 return NF_ACCEPT; 443 return NF_ACCEPT;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 2451aeb5ac23..fafbec8b073e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1081,8 +1081,7 @@ out_err:
1081 * this, no blocking and very strange errors 8) 1081 * this, no blocking and very strange errors 8)
1082 */ 1082 */
1083 1083
1084static int tcp_recv_urg(struct sock *sk, long timeo, 1084static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
1085 struct msghdr *msg, int len, int flags)
1086{ 1085{
1087 struct tcp_sock *tp = tcp_sk(sk); 1086 struct tcp_sock *tp = tcp_sk(sk);
1088 1087
@@ -1697,7 +1696,7 @@ out:
1697 return err; 1696 return err;
1698 1697
1699recv_urg: 1698recv_urg:
1700 err = tcp_recv_urg(sk, timeo, msg, len, flags); 1699 err = tcp_recv_urg(sk, msg, len, flags);
1701 goto out; 1700 goto out;
1702} 1701}
1703 1702
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c1f259d2d33b..53300fa2359f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb,
754 tp->fackets_out -= decr; 754 tp->fackets_out -= decr;
755} 755}
756 756
757/* Pcount in the middle of the write queue got changed, we need to do various
758 * tweaks to fix counters
759 */
760static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
761{
762 struct tcp_sock *tp = tcp_sk(sk);
763
764 tp->packets_out -= decr;
765
766 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
767 tp->sacked_out -= decr;
768 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
769 tp->retrans_out -= decr;
770 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
771 tp->lost_out -= decr;
772
773 /* Reno case is special. Sigh... */
774 if (tcp_is_reno(tp) && decr > 0)
775 tp->sacked_out -= min_t(u32, tp->sacked_out, decr);
776
777 tcp_adjust_fackets_out(sk, skb, decr);
778
779 if (tp->lost_skb_hint &&
780 before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
781 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
782 tp->lost_cnt_hint -= decr;
783
784 tcp_verify_left_out(tp);
785}
786
757/* Function to create two new TCP segments. Shrinks the given segment 787/* Function to create two new TCP segments. Shrinks the given segment
758 * to the specified size and appends a new segment with the rest of the 788 * to the specified size and appends a new segment with the rest of the
759 * packet to the list. This won't be called frequently, I hope. 789 * packet to the list. This won't be called frequently, I hope.
@@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
836 int diff = old_factor - tcp_skb_pcount(skb) - 866 int diff = old_factor - tcp_skb_pcount(skb) -
837 tcp_skb_pcount(buff); 867 tcp_skb_pcount(buff);
838 868
839 tp->packets_out -= diff; 869 if (diff)
840 870 tcp_adjust_pcount(sk, skb, diff);
841 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
842 tp->sacked_out -= diff;
843 if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
844 tp->retrans_out -= diff;
845
846 if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST)
847 tp->lost_out -= diff;
848
849 /* Adjust Reno SACK estimate. */
850 if (tcp_is_reno(tp) && diff > 0) {
851 tcp_dec_pcount_approx_int(&tp->sacked_out, diff);
852 tcp_verify_left_out(tp);
853 }
854 tcp_adjust_fackets_out(sk, skb, diff);
855
856 if (tp->lost_skb_hint &&
857 before(TCP_SKB_CB(skb)->seq,
858 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
859 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
860 tp->lost_cnt_hint -= diff;
861 } 871 }
862 872
863 /* Link BUFF into the send queue. */ 873 /* Link BUFF into the send queue. */
@@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1768 * packet counting does not break. 1778 * packet counting does not break.
1769 */ 1779 */
1770 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; 1780 TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS;
1771 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS)
1772 tp->retrans_out -= tcp_skb_pcount(next_skb);
1773 if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST)
1774 tp->lost_out -= tcp_skb_pcount(next_skb);
1775 /* Reno case is special. Sigh... */
1776 if (tcp_is_reno(tp) && tp->sacked_out)
1777 tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
1778
1779 tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb));
1780 tp->packets_out -= tcp_skb_pcount(next_skb);
1781 1781
1782 /* changed transmit queue under us so clear hints */ 1782 /* changed transmit queue under us so clear hints */
1783 tcp_clear_retrans_hints_partial(tp); 1783 tcp_clear_retrans_hints_partial(tp);
1784 if (next_skb == tp->retransmit_skb_hint) 1784 if (next_skb == tp->retransmit_skb_hint)
1785 tp->retransmit_skb_hint = skb; 1785 tp->retransmit_skb_hint = skb;
1786 1786
1787 tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb));
1788
1787 sk_wmem_free_skb(sk, next_skb); 1789 sk_wmem_free_skb(sk, next_skb);
1788} 1790}
1789 1791
@@ -1891,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1891 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1893 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1892 return -ENOMEM; /* We'll try again later. */ 1894 return -ENOMEM; /* We'll try again later. */
1893 } else { 1895 } else {
1894 tcp_init_tso_segs(sk, skb, cur_mss); 1896 int oldpcount = tcp_skb_pcount(skb);
1897
1898 if (unlikely(oldpcount > 1)) {
1899 tcp_init_tso_segs(sk, skb, cur_mss);
1900 tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
1901 }
1895 } 1902 }
1896 1903
1897 tcp_retrans_try_collapse(sk, skb, cur_mss); 1904 tcp_retrans_try_collapse(sk, skb, cur_mss);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ec992159b5f8..ca8cb326d1d2 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -22,17 +22,17 @@ menuconfig IPV6
22if IPV6 22if IPV6
23 23
24config IPV6_PRIVACY 24config IPV6_PRIVACY
25 bool "IPv6: Privacy Extensions support" 25 bool "IPv6: Privacy Extensions (RFC 3041) support"
26 ---help--- 26 ---help---
27 Privacy Extensions for Stateless Address Autoconfiguration in IPv6 27 Privacy Extensions for Stateless Address Autoconfiguration in IPv6
28 support. With this option, additional periodically-alter 28 support. With this option, additional periodically-altered
29 pseudo-random global-scope unicast address(es) will assigned to 29 pseudo-random global-scope unicast address(es) will be assigned to
30 your interface(s). 30 your interface(s).
31 31
32 We use our standard pseudo random algorithm to generate randomized 32 We use our standard pseudo-random algorithm to generate the
33 interface identifier, instead of one described in RFC 3041. 33 randomized interface identifier, instead of one described in RFC 3041.
34 34
35 By default, kernel do not generate temporary addresses. 35 By default the kernel does not generate temporary addresses.
36 To use temporary addresses, do 36 To use temporary addresses, do
37 37
38 echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 38 echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr
@@ -43,9 +43,9 @@ config IPV6_ROUTER_PREF
43 bool "IPv6: Router Preference (RFC 4191) support" 43 bool "IPv6: Router Preference (RFC 4191) support"
44 ---help--- 44 ---help---
45 Router Preference is an optional extension to the Router 45 Router Preference is an optional extension to the Router
46 Advertisement message to improve the ability of hosts 46 Advertisement message which improves the ability of hosts
47 to pick more appropriate router, especially when the hosts 47 to pick an appropriate router, especially when the hosts
48 is placed in a multi-homed network. 48 are placed in a multi-homed network.
49 49
50 If unsure, say N. 50 If unsure, say N.
51 51
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e89cfa3a8f25..dfed176aed37 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,7 +365,7 @@ ip6t_do_table(struct sk_buff *skb,
365 365
366 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 366 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
367 367
368 rcu_read_lock(); 368 rcu_read_lock_bh();
369 private = rcu_dereference(table->private); 369 private = rcu_dereference(table->private);
370 table_base = rcu_dereference(private->entries[smp_processor_id()]); 370 table_base = rcu_dereference(private->entries[smp_processor_id()]);
371 371
@@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
466#ifdef CONFIG_NETFILTER_DEBUG 466#ifdef CONFIG_NETFILTER_DEBUG
467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
468#endif 468#endif
469 rcu_read_unlock(); 469 rcu_read_unlock_bh();
470 470
471#ifdef DEBUG_ALLOW_ALL 471#ifdef DEBUG_ALLOW_ALL
472 return NF_ACCEPT; 472 return NF_ACCEPT;
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0af823cf7f1f..5ee5a031bc93 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -72,6 +72,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
72#endif 72#endif
73 73
74 skb->protocol = htons(ETH_P_IPV6); 74 skb->protocol = htons(ETH_P_IPV6);
75 skb->local_df = 1;
75 76
76 return x->outer_mode->output2(x, skb); 77 return x->outer_mode->output2(x, skb);
77} 78}
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 60c16162474c..f3d9ae350fb6 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -33,7 +33,7 @@ choice
33 ---help--- 33 ---help---
34 This option selects the default rate control algorithm 34 This option selects the default rate control algorithm
35 mac80211 will use. Note that this default can still be 35 mac80211 will use. Note that this default can still be
36 overriden through the ieee80211_default_rc_algo module 36 overridden through the ieee80211_default_rc_algo module
37 parameter if different algorithms are available. 37 parameter if different algorithms are available.
38 38
39config MAC80211_RC_DEFAULT_PID 39config MAC80211_RC_DEFAULT_PID
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2c967e4f706c..bb279bf59a1b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -52,7 +52,7 @@ config NF_CT_ACCT
52 52
53 Please note that currently this option only sets a default state. 53 Please note that currently this option only sets a default state.
54 You may change it at boot time with nf_conntrack.acct=0/1 kernel 54 You may change it at boot time with nf_conntrack.acct=0/1 kernel
55 paramater or by loading the nf_conntrack module with acct=0/1. 55 parameter or by loading the nf_conntrack module with acct=0/1.
56 56
57 You may also disable/enable it on a running system with: 57 You may also disable/enable it on a running system with:
58 sysctl net.netfilter.nf_conntrack_acct=0/1 58 sysctl net.netfilter.nf_conntrack_acct=0/1
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 51a5669573f2..6ec7d55b1769 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -6,7 +6,7 @@ config PHONET
6 tristate "Phonet protocols family" 6 tristate "Phonet protocols family"
7 help 7 help
8 The Phone Network protocol (PhoNet) is a packet-oriented 8 The Phone Network protocol (PhoNet) is a packet-oriented
9 communication protocol developped by Nokia for use with its modems. 9 communication protocol developed by Nokia for use with its modems.
10 10
11 This is required for Maemo to use cellular data connectivity (if 11 This is required for Maemo to use cellular data connectivity (if
12 supported). It can also be used to control Nokia phones 12 supported). It can also be used to control Nokia phones
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 06a7b798d9a7..4933b380985e 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51 51
52struct list_head rds_ib_devices; 52struct list_head rds_ib_devices;
53 53
54/* NOTE: if also grabbing ibdev lock, grab this first */
54DEFINE_SPINLOCK(ib_nodev_conns_lock); 55DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns); 56LIST_HEAD(ib_nodev_conns);
56 57
@@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device)
137 kfree(i_ipaddr); 138 kfree(i_ipaddr);
138 } 139 }
139 140
140 rds_ib_remove_conns(rds_ibdev); 141 rds_ib_destroy_conns(rds_ibdev);
141 142
142 if (rds_ibdev->mr_pool) 143 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); 144 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
@@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr)
249void rds_ib_exit(void) 250void rds_ib_exit(void)
250{ 251{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 252 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns(); 253 rds_ib_destroy_nodev_conns();
253 ib_unregister_client(&rds_ib_client); 254 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit(); 255 rds_ib_sysctl_exit();
255 rds_ib_recv_exit(); 256 rds_ib_recv_exit();
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 8be563a1363a..069206cae733 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -108,7 +108,12 @@ struct rds_ib_connection {
108 108
109 /* sending acks */ 109 /* sending acks */
110 unsigned long i_ack_flags; 110 unsigned long i_ack_flags;
111#ifdef KERNEL_HAS_ATOMIC64
112 atomic64_t i_ack_next; /* next ACK to send */
113#else
114 spinlock_t i_ack_lock; /* protect i_ack_next */
111 u64 i_ack_next; /* next ACK to send */ 115 u64 i_ack_next; /* next ACK to send */
116#endif
112 struct rds_header *i_ack; 117 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr; 118 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge; 119 struct ib_sge i_ack_sge;
@@ -267,9 +272,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
267 272
268/* ib_rdma.c */ 273/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 274int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 275void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void); 276void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); 277void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock);
278static inline void rds_ib_destroy_nodev_conns(void)
279{
280 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
281}
282static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
283{
284 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
285}
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 286struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 287void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 288void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -355,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
355 return &sge[1]; 368 return &sge[1];
356} 369}
357 370
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif 371#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0532237bd128..f8e40e1a6038 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); 126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err) 127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn); 129 rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132 130
133 /* If the peer gave us the last packet it saw, process this as if 131 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */ 132 * we had received a regular ACK. */
@@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
616 /* 614 /*
617 * Move connection back to the nodev list. 615 * Move connection back to the nodev list.
618 */ 616 */
619 if (ic->rds_ibdev) { 617 if (ic->rds_ibdev)
620 618 rds_ib_remove_conn(ic->rds_ibdev, conn);
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631 619
632 ic->i_cm_id = NULL; 620 ic->i_cm_id = NULL;
633 ic->i_pd = NULL; 621 ic->i_pd = NULL;
@@ -648,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
648 636
649 /* Clear the ACK state */ 637 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 638 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0); 639#ifdef KERNEL_HAS_ATOMIC64
640 atomic64_set(&ic->i_ack_next, 0);
641#else
642 ic->i_ack_next = 0;
643#endif
652 ic->i_ack_recv = 0; 644 ic->i_ack_recv = 0;
653 645
654 /* Clear flow control state */ 646 /* Clear flow control state */
@@ -681,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
681 673
682 INIT_LIST_HEAD(&ic->ib_node); 674 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex); 675 mutex_init(&ic->i_recv_mutex);
676#ifndef KERNEL_HAS_ATOMIC64
677 spin_lock_init(&ic->i_ack_lock);
678#endif
684 679
685 /* 680 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they 681 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -701,11 +696,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
701 return 0; 696 return 0;
702} 697}
703 698
699/*
700 * Free a connection. Connection must be shut down and not set for reconnect.
701 */
704void rds_ib_conn_free(void *arg) 702void rds_ib_conn_free(void *arg)
705{ 703{
706 struct rds_ib_connection *ic = arg; 704 struct rds_ib_connection *ic = arg;
705 spinlock_t *lock_ptr;
706
707 rdsdebug("ic %p\n", ic); 707 rdsdebug("ic %p\n", ic);
708
709 /*
710 * Conn is either on a dev's list or on the nodev list.
711 * A race with shutdown() or connect() would cause problems
712 * (since rds_ibdev would change) but that should never happen.
713 */
714 lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock;
715
716 spin_lock_irq(lock_ptr);
708 list_del(&ic->ib_node); 717 list_del(&ic->ib_node);
718 spin_unlock_irq(lock_ptr);
719
709 kfree(ic); 720 kfree(ic);
710} 721}
711 722
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 69a6289ed672..81033af93020 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140} 140}
141 141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 142void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{ 143{
144 struct rds_ib_connection *ic = conn->c_transport_data; 144 struct rds_ib_connection *ic = conn->c_transport_data;
145 145
@@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn
148 BUG_ON(list_empty(&ib_nodev_conns)); 148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node)); 149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node); 150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152 151
153 spin_lock_irq(&rds_ibdev->spinlock); 152 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 153 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 154 spin_unlock_irq(&rds_ibdev->spinlock);
155 spin_unlock_irq(&ib_nodev_conns_lock);
156 156
157 ic->rds_ibdev = rds_ibdev; 157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160} 158}
161 159
162void rds_ib_remove_nodev_conns(void) 160void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
163{ 161{
164 struct rds_ib_connection *ic, *_ic; 162 struct rds_ib_connection *ic = conn->c_transport_data;
165 LIST_HEAD(tmp_list);
166 163
167 /* avoid calling conn_destroy with irqs off */ 164 /* place conn on nodev_conns_list */
168 spin_lock_irq(&ib_nodev_conns_lock); 165 spin_lock(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172 166
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 167 spin_lock_irq(&rds_ibdev->spinlock);
174 if (ic->conn->c_passive) 168 BUG_ON(list_empty(&ic->ib_node));
175 rds_conn_destroy(ic->conn->c_passive); 169 list_del(&ic->ib_node);
176 rds_conn_destroy(ic->conn); 170 spin_unlock_irq(&rds_ibdev->spinlock);
177 } 171
172 list_add_tail(&ic->ib_node, &ib_nodev_conns);
173
174 spin_unlock(&ib_nodev_conns_lock);
175
176 ic->rds_ibdev = NULL;
178} 177}
179 178
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) 179void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock)
181{ 180{
182 struct rds_ib_connection *ic, *_ic; 181 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 182 LIST_HEAD(tmp_list);
184 183
185 /* avoid calling conn_destroy with irqs off */ 184 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock); 185 spin_lock_irq(list_lock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list); 186 list_splice(list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list); 187 INIT_LIST_HEAD(list);
189 spin_unlock_irq(&rds_ibdev->spinlock); 188 spin_unlock_irq(list_lock);
190 189
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { 190 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive) 191 if (ic->conn->c_passive)
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 5061b5502162..36d931573ff4 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, 399static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_ib_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 445static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 1b56905c4c08..b732efb5b634 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR
51 51
52struct list_head rds_iw_devices; 52struct list_head rds_iw_devices;
53 53
54/* NOTE: if also grabbing iwdev lock, grab this first */
54DEFINE_SPINLOCK(iw_nodev_conns_lock); 55DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns); 56LIST_HEAD(iw_nodev_conns);
56 57
@@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device)
145 } 146 }
146 spin_unlock_irq(&rds_iwdev->spinlock); 147 spin_unlock_irq(&rds_iwdev->spinlock);
147 148
148 rds_iw_remove_conns(rds_iwdev); 149 rds_iw_destroy_conns(rds_iwdev);
149 150
150 if (rds_iwdev->mr_pool) 151 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); 152 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
@@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr)
258void rds_iw_exit(void) 259void rds_iw_exit(void)
259{ 260{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); 261 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns(); 262 rds_iw_destroy_nodev_conns();
262 ib_unregister_client(&rds_iw_client); 263 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit(); 264 rds_iw_sysctl_exit();
264 rds_iw_recv_exit(); 265 rds_iw_recv_exit();
diff --git a/net/rds/iw.h b/net/rds/iw.h
index 0ddda34f2a1c..b4fb27252895 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -131,7 +131,12 @@ struct rds_iw_connection {
131 131
132 /* sending acks */ 132 /* sending acks */
133 unsigned long i_ack_flags; 133 unsigned long i_ack_flags;
134#ifdef KERNEL_HAS_ATOMIC64
135 atomic64_t i_ack_next; /* next ACK to send */
136#else
137 spinlock_t i_ack_lock; /* protect i_ack_next */
134 u64 i_ack_next; /* next ACK to send */ 138 u64 i_ack_next; /* next ACK to send */
139#endif
135 struct rds_header *i_ack; 140 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr; 141 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge; 142 struct ib_sge i_ack_sge;
@@ -294,9 +299,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn,
294 299
295/* ib_rdma.c */ 300/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); 301int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); 302void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void); 303void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); 304void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock);
305static inline void rds_iw_destroy_nodev_conns(void)
306{
307 __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock);
308}
309static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev)
310{
311 __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock);
312}
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); 313struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); 314void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); 315void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
@@ -383,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
383 return &sge[1]; 396 return &sge[1];
384} 397}
385 398
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif 399#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 57ecb3d4b8a5..a416b0d492b1 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); 86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err) 87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); 88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn); 89 rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92 90
93 /* If the peer gave us the last packet it saw, process this as if 91 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */ 92 * we had received a regular ACK. */
@@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
637 * Move connection back to the nodev list. 635 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list. 636 * Remove cm_id from the device cm_id list.
639 */ 637 */
640 if (ic->rds_iwdev) { 638 if (ic->rds_iwdev)
641 639 rds_iw_remove_conn(ic->rds_iwdev, conn);
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653 640
654 rdma_destroy_id(ic->i_cm_id); 641 rdma_destroy_id(ic->i_cm_id);
655 642
@@ -672,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn)
672 659
673 /* Clear the ACK state */ 660 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 661 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0); 662#ifdef KERNEL_HAS_ATOMIC64
663 atomic64_set(&ic->i_ack_next, 0);
664#else
665 ic->i_ack_next = 0;
666#endif
676 ic->i_ack_recv = 0; 667 ic->i_ack_recv = 0;
677 668
678 /* Clear flow control state */ 669 /* Clear flow control state */
@@ -706,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
706 697
707 INIT_LIST_HEAD(&ic->iw_node); 698 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex); 699 mutex_init(&ic->i_recv_mutex);
700#ifndef KERNEL_HAS_ATOMIC64
701 spin_lock_init(&ic->i_ack_lock);
702#endif
709 703
710 /* 704 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they 705 * rds_iw_conn_shutdown() waits for these to be emptied so they
@@ -726,11 +720,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
726 return 0; 720 return 0;
727} 721}
728 722
723/*
724 * Free a connection. Connection must be shut down and not set for reconnect.
725 */
729void rds_iw_conn_free(void *arg) 726void rds_iw_conn_free(void *arg)
730{ 727{
731 struct rds_iw_connection *ic = arg; 728 struct rds_iw_connection *ic = arg;
729 spinlock_t *lock_ptr;
730
732 rdsdebug("ic %p\n", ic); 731 rdsdebug("ic %p\n", ic);
732
733 /*
734 * Conn is either on a dev's list or on the nodev list.
735 * A race with shutdown() or connect() would cause problems
736 * (since rds_iwdev would change) but that should never happen.
737 */
738 lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock;
739
740 spin_lock_irq(lock_ptr);
733 list_del(&ic->iw_node); 741 list_del(&ic->iw_node);
742 spin_unlock_irq(lock_ptr);
743
734 kfree(ic); 744 kfree(ic);
735} 745}
736 746
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 1c02a8f952d0..dcdb37da80f2 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i
196 return rds_iw_add_cm_id(rds_iwdev, cm_id); 196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197} 197}
198 198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) 199void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{ 200{
201 struct rds_iw_connection *ic = conn->c_transport_data; 201 struct rds_iw_connection *ic = conn->c_transport_data;
202 202
@@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn
205 BUG_ON(list_empty(&iw_nodev_conns)); 205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node)); 206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node); 207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209 208
210 spin_lock_irq(&rds_iwdev->spinlock); 209 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 210 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 211 spin_unlock_irq(&rds_iwdev->spinlock);
212 spin_unlock_irq(&iw_nodev_conns_lock);
213 213
214 ic->rds_iwdev = rds_iwdev; 214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217} 215}
218 216
219void rds_iw_remove_nodev_conns(void) 217void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
220{ 218{
221 struct rds_iw_connection *ic, *_ic; 219 struct rds_iw_connection *ic = conn->c_transport_data;
222 LIST_HEAD(tmp_list);
223 220
224 /* avoid calling conn_destroy with irqs off */ 221 /* place conn on nodev_conns_list */
225 spin_lock_irq(&iw_nodev_conns_lock); 222 spin_lock(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229 223
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 224 spin_lock_irq(&rds_iwdev->spinlock);
231 if (ic->conn->c_passive) 225 BUG_ON(list_empty(&ic->iw_node));
232 rds_conn_destroy(ic->conn->c_passive); 226 list_del(&ic->iw_node);
233 rds_conn_destroy(ic->conn); 227 spin_unlock_irq(&rds_iwdev->spinlock);
234 } 228
229 list_add_tail(&ic->iw_node, &iw_nodev_conns);
230
231 spin_unlock(&iw_nodev_conns_lock);
232
233 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
234 ic->rds_iwdev = NULL;
235} 235}
236 236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) 237void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock)
238{ 238{
239 struct rds_iw_connection *ic, *_ic; 239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list); 240 LIST_HEAD(tmp_list);
241 241
242 /* avoid calling conn_destroy with irqs off */ 242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock); 243 spin_lock_irq(list_lock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list); 244 list_splice(list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list); 245 INIT_LIST_HEAD(list);
246 spin_unlock_irq(&rds_iwdev->spinlock); 246 spin_unlock_irq(list_lock);
247 247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { 248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive) 249 if (ic->conn->c_passive)
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index a1931f0027a2..fde470fa50d5 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
395 * room for it beyond the ring size. Send completion notices its special 395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case. 396 * wr_id and avoids working with the ring in that case.
397 */ 397 */
398#ifndef KERNEL_HAS_ATOMIC64
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, 399static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required) 400 int ack_required)
400{ 401{
401 rds_iw_set_64bit(&ic->i_ack_next, seq); 402 unsigned long flags;
403
404 spin_lock_irqsave(&ic->i_ack_lock, flags);
405 ic->i_ack_next = seq;
406 if (ack_required)
407 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
408 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
409}
410
411static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
412{
413 unsigned long flags;
414 u64 seq;
415
416 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
417
418 spin_lock_irqsave(&ic->i_ack_lock, flags);
419 seq = ic->i_ack_next;
420 spin_unlock_irqrestore(&ic->i_ack_lock, flags);
421
422 return seq;
423}
424#else
425static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
426 int ack_required)
427{
428 atomic64_set(&ic->i_ack_next, seq);
402 if (ack_required) { 429 if (ack_required) {
403 smp_mb__before_clear_bit(); 430 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 431 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
@@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 437 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit(); 438 smp_mb__after_clear_bit();
412 439
413 return ic->i_ack_next; 440 return atomic64_read(&ic->i_ack_next);
414} 441}
442#endif
443
415 444
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) 445static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{ 446{
@@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
464 * - i_ack_next, which is the last sequence number we received 493 * - i_ack_next, which is the last sequence number we received
465 * 494 *
466 * Potentially, send queue and receive queue handlers can run concurrently. 495 * Potentially, send queue and receive queue handlers can run concurrently.
496 * It would be nice to not have to use a spinlock to synchronize things,
497 * but the one problem that rules this out is that 64bit updates are
498 * not atomic on all platforms. Things would be a lot simpler if
499 * we had atomic64 or maybe cmpxchg64 everywhere.
467 * 500 *
468 * Reconnecting complicates this picture just slightly. When we 501 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer 502 * reconnect, we may be seeing duplicate packets. The peer
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 060400704979..619f0a30a4e5 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -28,6 +28,10 @@
28 */ 28 */
29#define RDS_PORT 18634 29#define RDS_PORT 18634
30 30
31#ifdef ATOMIC64_INIT
32#define KERNEL_HAS_ATOMIC64
33#endif
34
31#ifdef DEBUG 35#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) 36#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else 37#else
diff --git a/net/rds/send.c b/net/rds/send.c
index 1b37364656f0..104fe033203d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{ 615{
616 struct rds_message *rm, *tmp; 616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn; 617 struct rds_connection *conn;
618 unsigned long flags; 618 unsigned long flags, flags2;
619 LIST_HEAD(list); 619 LIST_HEAD(list);
620 int wake = 0; 620 int wake = 0;
621 621
@@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
651 list_for_each_entry(rm, &list, m_sock_item) { 651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that 652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */ 653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock); 654 spin_lock_irqsave(&rm->m_rs_lock, flags2);
655 rm->m_rs = NULL; 655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock); 656 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
657 657
658 /* 658 /*
659 * If we see this flag cleared then we're *sure* that someone 659 * If we see this flag cleared then we're *sure* that someone
diff --git a/net/socket.c b/net/socket.c
index 91d0c0254ffe..791d71a36a93 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -493,8 +493,7 @@ static struct socket *sock_alloc(void)
493 inode->i_uid = current_fsuid(); 493 inode->i_uid = current_fsuid();
494 inode->i_gid = current_fsgid(); 494 inode->i_gid = current_fsgid();
495 495
496 get_cpu_var(sockets_in_use)++; 496 percpu_add(sockets_in_use, 1);
497 put_cpu_var(sockets_in_use);
498 return sock; 497 return sock;
499} 498}
500 499
@@ -536,8 +535,7 @@ void sock_release(struct socket *sock)
536 if (sock->fasync_list) 535 if (sock->fasync_list)
537 printk(KERN_ERR "sock_release: fasync list not empty!\n"); 536 printk(KERN_ERR "sock_release: fasync list not empty!\n");
538 537
539 get_cpu_var(sockets_in_use)--; 538 percpu_sub(sockets_in_use, 1);
540 put_cpu_var(sockets_in_use);
541 if (!sock->file) { 539 if (!sock->file) {
542 iput(SOCK_INODE(sock)); 540 iput(SOCK_INODE(sock));
543 return; 541 return;
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index afd91c78ce8e..443c161eb8bd 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -47,7 +47,7 @@ config RPCSEC_GSS_SPKM3
47 select CRYPTO_CBC 47 select CRYPTO_CBC
48 help 48 help
49 Choose Y here to enable Secure RPC using the SPKM3 public key 49 Choose Y here to enable Secure RPC using the SPKM3 public key
50 GSS-API mechansim (RFC 2025). 50 GSS-API mechanism (RFC 2025).
51 51
52 Secure RPC calls with SPKM3 require an auxiliary userspace 52 Secure RPC calls with SPKM3 require an auxiliary userspace
53 daemon which may be found in the Linux nfs-utils package 53 daemon which may be found in the Linux nfs-utils package
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 9f2f2412a2f3..8847add6ca16 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -317,8 +317,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx)
317 } 317 }
318 case SVC_POOL_PERNODE: 318 case SVC_POOL_PERNODE:
319 { 319 {
320 node_to_cpumask_ptr(nodecpumask, node); 320 set_cpus_allowed_ptr(task, cpumask_of_node(node));
321 set_cpus_allowed_ptr(task, nodecpumask);
322 break; 321 break;
323 } 322 }
324 } 323 }
@@ -1009,6 +1008,8 @@ svc_process(struct svc_rqst *rqstp)
1009 rqstp->rq_res.tail[0].iov_len = 0; 1008 rqstp->rq_res.tail[0].iov_len = 0;
1010 /* Will be turned off only in gss privacy case: */ 1009 /* Will be turned off only in gss privacy case: */
1011 rqstp->rq_splice_ok = 1; 1010 rqstp->rq_splice_ok = 1;
1011 /* Will be turned off only when NFSv4 Sessions are used */
1012 rqstp->rq_usedeferral = 1;
1012 1013
1013 /* Setup reply header */ 1014 /* Setup reply header */
1014 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); 1015 rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1079,7 +1080,6 @@ svc_process(struct svc_rqst *rqstp)
1079 procp = versp->vs_proc + proc; 1080 procp = versp->vs_proc + proc;
1080 if (proc >= versp->vs_nproc || !procp->pc_func) 1081 if (proc >= versp->vs_nproc || !procp->pc_func)
1081 goto err_bad_proc; 1082 goto err_bad_proc;
1082 rqstp->rq_server = serv;
1083 rqstp->rq_procinfo = procp; 1083 rqstp->rq_procinfo = procp;
1084 1084
1085 /* Syntactic check complete */ 1085 /* Syntactic check complete */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2819ee093f36..c200d92e57e4 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -14,6 +14,8 @@
14 14
15#define RPCDBG_FACILITY RPCDBG_SVCXPRT 15#define RPCDBG_FACILITY RPCDBG_SVCXPRT
16 16
17#define SVC_MAX_WAKING 5
18
17static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); 19static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
18static int svc_deferred_recv(struct svc_rqst *rqstp); 20static int svc_deferred_recv(struct svc_rqst *rqstp);
19static struct cache_deferred_req *svc_defer(struct cache_req *req); 21static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -301,6 +303,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
301 struct svc_pool *pool; 303 struct svc_pool *pool;
302 struct svc_rqst *rqstp; 304 struct svc_rqst *rqstp;
303 int cpu; 305 int cpu;
306 int thread_avail;
304 307
305 if (!(xprt->xpt_flags & 308 if (!(xprt->xpt_flags &
306 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 309 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
@@ -312,18 +315,14 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
312 315
313 spin_lock_bh(&pool->sp_lock); 316 spin_lock_bh(&pool->sp_lock);
314 317
315 if (!list_empty(&pool->sp_threads) &&
316 !list_empty(&pool->sp_sockets))
317 printk(KERN_ERR
318 "svc_xprt_enqueue: "
319 "threads and transports both waiting??\n");
320
321 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { 318 if (test_bit(XPT_DEAD, &xprt->xpt_flags)) {
322 /* Don't enqueue dead transports */ 319 /* Don't enqueue dead transports */
323 dprintk("svc: transport %p is dead, not enqueued\n", xprt); 320 dprintk("svc: transport %p is dead, not enqueued\n", xprt);
324 goto out_unlock; 321 goto out_unlock;
325 } 322 }
326 323
324 pool->sp_stats.packets++;
325
327 /* Mark transport as busy. It will remain in this state until 326 /* Mark transport as busy. It will remain in this state until
328 * the provider calls svc_xprt_received. We update XPT_BUSY 327 * the provider calls svc_xprt_received. We update XPT_BUSY
329 * atomically because it also guards against trying to enqueue 328 * atomically because it also guards against trying to enqueue
@@ -356,7 +355,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
356 } 355 }
357 356
358 process: 357 process:
359 if (!list_empty(&pool->sp_threads)) { 358 /* Work out whether threads are available */
359 thread_avail = !list_empty(&pool->sp_threads); /* threads are asleep */
360 if (pool->sp_nwaking >= SVC_MAX_WAKING) {
361 /* too many threads are runnable and trying to wake up */
362 thread_avail = 0;
363 pool->sp_stats.overloads_avoided++;
364 }
365
366 if (thread_avail) {
360 rqstp = list_entry(pool->sp_threads.next, 367 rqstp = list_entry(pool->sp_threads.next,
361 struct svc_rqst, 368 struct svc_rqst,
362 rq_list); 369 rq_list);
@@ -371,11 +378,15 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
371 svc_xprt_get(xprt); 378 svc_xprt_get(xprt);
372 rqstp->rq_reserved = serv->sv_max_mesg; 379 rqstp->rq_reserved = serv->sv_max_mesg;
373 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); 380 atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
381 rqstp->rq_waking = 1;
382 pool->sp_nwaking++;
383 pool->sp_stats.threads_woken++;
374 BUG_ON(xprt->xpt_pool != pool); 384 BUG_ON(xprt->xpt_pool != pool);
375 wake_up(&rqstp->rq_wait); 385 wake_up(&rqstp->rq_wait);
376 } else { 386 } else {
377 dprintk("svc: transport %p put into queue\n", xprt); 387 dprintk("svc: transport %p put into queue\n", xprt);
378 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); 388 list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
389 pool->sp_stats.sockets_queued++;
379 BUG_ON(xprt->xpt_pool != pool); 390 BUG_ON(xprt->xpt_pool != pool);
380 } 391 }
381 392
@@ -588,6 +599,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
588 int pages; 599 int pages;
589 struct xdr_buf *arg; 600 struct xdr_buf *arg;
590 DECLARE_WAITQUEUE(wait, current); 601 DECLARE_WAITQUEUE(wait, current);
602 long time_left;
591 603
592 dprintk("svc: server %p waiting for data (to = %ld)\n", 604 dprintk("svc: server %p waiting for data (to = %ld)\n",
593 rqstp, timeout); 605 rqstp, timeout);
@@ -636,6 +648,11 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
636 return -EINTR; 648 return -EINTR;
637 649
638 spin_lock_bh(&pool->sp_lock); 650 spin_lock_bh(&pool->sp_lock);
651 if (rqstp->rq_waking) {
652 rqstp->rq_waking = 0;
653 pool->sp_nwaking--;
654 BUG_ON(pool->sp_nwaking < 0);
655 }
639 xprt = svc_xprt_dequeue(pool); 656 xprt = svc_xprt_dequeue(pool);
640 if (xprt) { 657 if (xprt) {
641 rqstp->rq_xprt = xprt; 658 rqstp->rq_xprt = xprt;
@@ -668,12 +685,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
668 add_wait_queue(&rqstp->rq_wait, &wait); 685 add_wait_queue(&rqstp->rq_wait, &wait);
669 spin_unlock_bh(&pool->sp_lock); 686 spin_unlock_bh(&pool->sp_lock);
670 687
671 schedule_timeout(timeout); 688 time_left = schedule_timeout(timeout);
672 689
673 try_to_freeze(); 690 try_to_freeze();
674 691
675 spin_lock_bh(&pool->sp_lock); 692 spin_lock_bh(&pool->sp_lock);
676 remove_wait_queue(&rqstp->rq_wait, &wait); 693 remove_wait_queue(&rqstp->rq_wait, &wait);
694 if (!time_left)
695 pool->sp_stats.threads_timedout++;
677 696
678 xprt = rqstp->rq_xprt; 697 xprt = rqstp->rq_xprt;
679 if (!xprt) { 698 if (!xprt) {
@@ -958,7 +977,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
958 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); 977 struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
959 struct svc_deferred_req *dr; 978 struct svc_deferred_req *dr;
960 979
961 if (rqstp->rq_arg.page_len) 980 if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
962 return NULL; /* if more than a page, give up FIXME */ 981 return NULL; /* if more than a page, give up FIXME */
963 if (rqstp->rq_deferred) { 982 if (rqstp->rq_deferred) {
964 dr = rqstp->rq_deferred; 983 dr = rqstp->rq_deferred;
@@ -1112,3 +1131,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen)
1112 return totlen; 1131 return totlen;
1113} 1132}
1114EXPORT_SYMBOL_GPL(svc_xprt_names); 1133EXPORT_SYMBOL_GPL(svc_xprt_names);
1134
1135
1136/*----------------------------------------------------------------------------*/
1137
1138static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos)
1139{
1140 unsigned int pidx = (unsigned int)*pos;
1141 struct svc_serv *serv = m->private;
1142
1143 dprintk("svc_pool_stats_start, *pidx=%u\n", pidx);
1144
1145 lock_kernel();
1146 /* bump up the pseudo refcount while traversing */
1147 svc_get(serv);
1148 unlock_kernel();
1149
1150 if (!pidx)
1151 return SEQ_START_TOKEN;
1152 return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]);
1153}
1154
1155static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos)
1156{
1157 struct svc_pool *pool = p;
1158 struct svc_serv *serv = m->private;
1159
1160 dprintk("svc_pool_stats_next, *pos=%llu\n", *pos);
1161
1162 if (p == SEQ_START_TOKEN) {
1163 pool = &serv->sv_pools[0];
1164 } else {
1165 unsigned int pidx = (pool - &serv->sv_pools[0]);
1166 if (pidx < serv->sv_nrpools-1)
1167 pool = &serv->sv_pools[pidx+1];
1168 else
1169 pool = NULL;
1170 }
1171 ++*pos;
1172 return pool;
1173}
1174
1175static void svc_pool_stats_stop(struct seq_file *m, void *p)
1176{
1177 struct svc_serv *serv = m->private;
1178
1179 lock_kernel();
1180 /* this function really, really should have been called svc_put() */
1181 svc_destroy(serv);
1182 unlock_kernel();
1183}
1184
1185static int svc_pool_stats_show(struct seq_file *m, void *p)
1186{
1187 struct svc_pool *pool = p;
1188
1189 if (p == SEQ_START_TOKEN) {
1190 seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n");
1191 return 0;
1192 }
1193
1194 seq_printf(m, "%u %lu %lu %lu %lu %lu\n",
1195 pool->sp_id,
1196 pool->sp_stats.packets,
1197 pool->sp_stats.sockets_queued,
1198 pool->sp_stats.threads_woken,
1199 pool->sp_stats.overloads_avoided,
1200 pool->sp_stats.threads_timedout);
1201
1202 return 0;
1203}
1204
1205static const struct seq_operations svc_pool_stats_seq_ops = {
1206 .start = svc_pool_stats_start,
1207 .next = svc_pool_stats_next,
1208 .stop = svc_pool_stats_stop,
1209 .show = svc_pool_stats_show,
1210};
1211
1212int svc_pool_stats_open(struct svc_serv *serv, struct file *file)
1213{
1214 int err;
1215
1216 err = seq_open(file, &svc_pool_stats_seq_ops);
1217 if (!err)
1218 ((struct seq_file *) file->private_data)->private = serv;
1219 return err;
1220}
1221EXPORT_SYMBOL(svc_pool_stats_open);
1222
1223/*----------------------------------------------------------------------------*/
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 9d504234af4a..af3198814c15 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -345,7 +345,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
345 lock_sock(sock->sk); 345 lock_sock(sock->sk);
346 sock->sk->sk_sndbuf = snd * 2; 346 sock->sk->sk_sndbuf = snd * 2;
347 sock->sk->sk_rcvbuf = rcv * 2; 347 sock->sk->sk_rcvbuf = rcv * 2;
348 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
349 release_sock(sock->sk); 348 release_sock(sock->sk);
350#endif 349#endif
351} 350}
@@ -797,23 +796,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
797 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), 796 test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags),
798 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); 797 test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
799 798
800 if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
801 /* sndbuf needs to have room for one request
802 * per thread, otherwise we can stall even when the
803 * network isn't a bottleneck.
804 *
805 * We count all threads rather than threads in a
806 * particular pool, which provides an upper bound
807 * on the number of threads which will access the socket.
808 *
809 * rcvbuf just needs to be able to hold a few requests.
810 * Normally they will be removed from the queue
811 * as soon a a complete request arrives.
812 */
813 svc_sock_setbufsize(svsk->sk_sock,
814 (serv->sv_nrthreads+3) * serv->sv_max_mesg,
815 3 * serv->sv_max_mesg);
816
817 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 799 clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
818 800
819 /* Receive data. If we haven't got the record length yet, get 801 /* Receive data. If we haven't got the record length yet, get
@@ -1061,15 +1043,6 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
1061 1043
1062 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; 1044 tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
1063 1045
1064 /* initialise setting must have enough space to
1065 * receive and respond to one request.
1066 * svc_tcp_recvfrom will re-adjust if necessary
1067 */
1068 svc_sock_setbufsize(svsk->sk_sock,
1069 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
1070 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
1071
1072 set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
1073 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); 1046 set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
1074 if (sk->sk_state != TCP_ESTABLISHED) 1047 if (sk->sk_state != TCP_ESTABLISHED)
1075 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); 1048 set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1139,8 +1112,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
1139 /* Initialize the socket */ 1112 /* Initialize the socket */
1140 if (sock->type == SOCK_DGRAM) 1113 if (sock->type == SOCK_DGRAM)
1141 svc_udp_init(svsk, serv); 1114 svc_udp_init(svsk, serv);
1142 else 1115 else {
1116 /* initialise setting must have enough space to
1117 * receive and respond to one request.
1118 */
1119 svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
1120 4 * serv->sv_max_mesg);
1143 svc_tcp_init(svsk, serv); 1121 svc_tcp_init(svsk, serv);
1122 }
1144 1123
1145 dprintk("svc: svc_setup_socket created %p (inet %p)\n", 1124 dprintk("svc: svc_setup_socket created %p (inet %p)\n",
1146 svsk, svsk->sk_sk); 1125 svsk, svsk->sk_sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index baac91049b0e..9dcc6e7f96ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
832 * All right, let's create it. 832 * All right, let's create it.
833 */ 833 */
834 mode = S_IFSOCK | 834 mode = S_IFSOCK |
835 (SOCK_INODE(sock)->i_mode & ~current->fs->umask); 835 (SOCK_INODE(sock)->i_mode & ~current_umask());
836 err = mnt_want_write(nd.path.mnt); 836 err = mnt_want_write(nd.path.mnt);
837 if (err) 837 if (err)
838 goto out_mknod_dput; 838 goto out_mknod_dput;
diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig
index 18495cdcd10d..1b46747a5f5a 100644
--- a/net/wimax/Kconfig
+++ b/net/wimax/Kconfig
@@ -8,7 +8,7 @@
8# 8#
9# As well, enablement of the RFKILL code means we need the INPUT layer 9# As well, enablement of the RFKILL code means we need the INPUT layer
10# support to inject events coming from hw rfkill switches. That 10# support to inject events coming from hw rfkill switches. That
11# dependency could be killed if input.h provided appropiate means to 11# dependency could be killed if input.h provided appropriate means to
12# work when input is disabled. 12# work when input is disabled.
13 13
14comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" 14comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled"