aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-22 23:48:37 -0400
committerLachlan McIlroy <lachlan@redback.melbourne.sgi.com>2008-05-22 23:48:37 -0400
commitc1e554aeea12d2dab5183e011c27dee6142dc927 (patch)
treea44908fa5c2f1f24b6f7e7313b60e49cd3235cdd /net
parentc203e45f069af47ca7623e4dcd8c00bfba2722e4 (diff)
parent78b58e549a3098a8c1408d0214bd25e5d5e7a3a3 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c34
-rw-r--r--net/8021q/vlan.h2
-rw-r--r--net/8021q/vlan_dev.c5
-rw-r--r--net/core/dev.c8
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv6/addrconf.c75
-rw-r--r--net/ipv6/ndisc.c8
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/mac80211/mlme.c14
-rw-r--r--net/mac80211/util.c37
-rw-r--r--net/sched/cls_api.c2
-rw-r--r--net/sunrpc/auth_generic.c8
-rw-r--r--net/sunrpc/svc_xprt.c23
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c102
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c11
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c290
18 files changed, 375 insertions, 266 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 2a739adaa92b..51961300b586 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -382,6 +382,24 @@ static void vlan_sync_address(struct net_device *dev,
382 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); 382 memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
383} 383}
384 384
385static void vlan_transfer_features(struct net_device *dev,
386 struct net_device *vlandev)
387{
388 unsigned long old_features = vlandev->features;
389
390 if (dev->features & NETIF_F_VLAN_TSO) {
391 vlandev->features &= ~VLAN_TSO_FEATURES;
392 vlandev->features |= dev->features & VLAN_TSO_FEATURES;
393 }
394 if (dev->features & NETIF_F_VLAN_CSUM) {
395 vlandev->features &= ~NETIF_F_ALL_CSUM;
396 vlandev->features |= dev->features & NETIF_F_ALL_CSUM;
397 }
398
399 if (old_features != vlandev->features)
400 netdev_features_change(vlandev);
401}
402
385static void __vlan_device_event(struct net_device *dev, unsigned long event) 403static void __vlan_device_event(struct net_device *dev, unsigned long event)
386{ 404{
387 switch (event) { 405 switch (event) {
@@ -410,10 +428,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
410 int i, flgs; 428 int i, flgs;
411 struct net_device *vlandev; 429 struct net_device *vlandev;
412 430
413 if (is_vlan_dev(dev)) { 431 if (is_vlan_dev(dev))
414 __vlan_device_event(dev, event); 432 __vlan_device_event(dev, event);
415 goto out;
416 }
417 433
418 grp = __vlan_find_group(dev); 434 grp = __vlan_find_group(dev);
419 if (!grp) 435 if (!grp)
@@ -450,6 +466,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
450 } 466 }
451 break; 467 break;
452 468
469 case NETDEV_FEAT_CHANGE:
470 /* Propagate device features to underlying device */
471 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
472 vlandev = vlan_group_get_device(grp, i);
473 if (!vlandev)
474 continue;
475
476 vlan_transfer_features(dev, vlandev);
477 }
478
479 break;
480
453 case NETDEV_DOWN: 481 case NETDEV_DOWN:
454 /* Put all VLANs for this dev in the down state too. */ 482 /* Put all VLANs for this dev in the down state too. */
455 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 483 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) {
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 5229a72c7ea1..79625696e86a 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -7,6 +7,8 @@
7#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT) 7#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
8#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1) 8#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
9 9
10#define VLAN_TSO_FEATURES (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_SG)
11
10/* Find a VLAN device by the MAC address of its Ethernet device, and 12/* Find a VLAN device by the MAC address of its Ethernet device, and
11 * it's VLAN ID. The default configuration is to have VLAN's scope 13 * it's VLAN ID. The default configuration is to have VLAN's scope
12 * to be box-wide, so the MAC will be ignored. The mac will only be 14 * to be box-wide, so the MAC will be ignored. The mac will only be
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index c961f0826005..b1cfbaa88db2 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -663,6 +663,11 @@ static int vlan_dev_init(struct net_device *dev)
663 (1<<__LINK_STATE_DORMANT))) | 663 (1<<__LINK_STATE_DORMANT))) |
664 (1<<__LINK_STATE_PRESENT); 664 (1<<__LINK_STATE_PRESENT);
665 665
666 if (real_dev->features & NETIF_F_VLAN_TSO)
667 dev->features |= real_dev->features & VLAN_TSO_FEATURES;
668 if (real_dev->features & NETIF_F_VLAN_CSUM)
669 dev->features |= real_dev->features & NETIF_F_ALL_CSUM;
670
666 /* ipv6 shared card related stuff */ 671 /* ipv6 shared card related stuff */
667 dev->dev_id = real_dev->dev_id; 672 dev->dev_id = real_dev->dev_id;
668 673
diff --git a/net/core/dev.c b/net/core/dev.c
index a1607bc0cd4c..582963077877 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -903,7 +903,11 @@ int dev_change_name(struct net_device *dev, char *newname)
903 strlcpy(dev->name, newname, IFNAMSIZ); 903 strlcpy(dev->name, newname, IFNAMSIZ);
904 904
905rollback: 905rollback:
906 device_rename(&dev->dev, dev->name); 906 err = device_rename(&dev->dev, dev->name);
907 if (err) {
908 memcpy(dev->name, oldname, IFNAMSIZ);
909 return err;
910 }
907 911
908 write_lock_bh(&dev_base_lock); 912 write_lock_bh(&dev_base_lock);
909 hlist_del(&dev->name_hlist); 913 hlist_del(&dev->name_hlist);
@@ -3137,7 +3141,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
3137 * Load in the correct multicast list now the flags have changed. 3141 * Load in the correct multicast list now the flags have changed.
3138 */ 3142 */
3139 3143
3140 if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) 3144 if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
3141 dev->change_rx_flags(dev, IFF_MULTICAST); 3145 dev->change_rx_flags(dev, IFF_MULTICAST);
3142 3146
3143 dev_set_rx_mode(dev); 3147 dev_set_rx_mode(dev);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8dca21110493..fdf537707e51 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -390,6 +390,7 @@ struct pktgen_thread {
390 int cpu; 390 int cpu;
391 391
392 wait_queue_head_t queue; 392 wait_queue_head_t queue;
393 struct completion start_done;
393}; 394};
394 395
395#define REMOVE 1 396#define REMOVE 1
@@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg)
3414 BUG_ON(smp_processor_id() != cpu); 3415 BUG_ON(smp_processor_id() != cpu);
3415 3416
3416 init_waitqueue_head(&t->queue); 3417 init_waitqueue_head(&t->queue);
3418 complete(&t->start_done);
3417 3419
3418 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); 3420 pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current));
3419 3421
@@ -3615,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu)
3615 INIT_LIST_HEAD(&t->if_list); 3617 INIT_LIST_HEAD(&t->if_list);
3616 3618
3617 list_add_tail(&t->th_list, &pktgen_threads); 3619 list_add_tail(&t->th_list, &pktgen_threads);
3620 init_completion(&t->start_done);
3618 3621
3619 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); 3622 p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu);
3620 if (IS_ERR(p)) { 3623 if (IS_ERR(p)) {
@@ -3639,6 +3642,7 @@ static int __init pktgen_create_thread(int cpu)
3639 } 3642 }
3640 3643
3641 wake_up_process(p); 3644 wake_up_process(p);
3645 wait_for_completion(&t->start_done);
3642 3646
3643 return 0; 3647 return 0;
3644} 3648}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 92f90ae46f4a..df41026b60db 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = {
160 .negative_advice = ipv4_negative_advice, 160 .negative_advice = ipv4_negative_advice,
161 .link_failure = ipv4_link_failure, 161 .link_failure = ipv4_link_failure,
162 .update_pmtu = ip_rt_update_pmtu, 162 .update_pmtu = ip_rt_update_pmtu,
163 .local_out = ip_local_out, 163 .local_out = __ip_local_out,
164 .entry_size = sizeof(struct rtable), 164 .entry_size = sizeof(struct rtable),
165 .entries = ATOMIC_INIT(0), 165 .entries = ATOMIC_INIT(0),
166}; 166};
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e591e09e5e4e..3a835578fd1c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1764,14 +1764,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1764 * 2) Configure prefixes with the auto flag set 1764 * 2) Configure prefixes with the auto flag set
1765 */ 1765 */
1766 1766
1767 /* Avoid arithmetic overflow. Really, we could 1767 if (valid_lft == INFINITY_LIFE_TIME)
1768 save rt_expires in seconds, likely valid_lft, 1768 rt_expires = ~0UL;
1769 but it would require division in fib gc, that it 1769 else if (valid_lft >= 0x7FFFFFFF/HZ) {
1770 not good. 1770 /* Avoid arithmetic overflow. Really, we could
1771 */ 1771 * save rt_expires in seconds, likely valid_lft,
1772 if (valid_lft >= 0x7FFFFFFF/HZ) 1772 * but it would require division in fib gc, that it
1773 * not good.
1774 */
1773 rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); 1775 rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
1774 else 1776 } else
1775 rt_expires = valid_lft * HZ; 1777 rt_expires = valid_lft * HZ;
1776 1778
1777 /* 1779 /*
@@ -1779,7 +1781,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1779 * Avoid arithmetic overflow there as well. 1781 * Avoid arithmetic overflow there as well.
1780 * Overflow can happen only if HZ < USER_HZ. 1782 * Overflow can happen only if HZ < USER_HZ.
1781 */ 1783 */
1782 if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) 1784 if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
1783 rt_expires = 0x7FFFFFFF / USER_HZ; 1785 rt_expires = 0x7FFFFFFF / USER_HZ;
1784 1786
1785 if (pinfo->onlink) { 1787 if (pinfo->onlink) {
@@ -1788,17 +1790,28 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
1788 dev->ifindex, 1); 1790 dev->ifindex, 1);
1789 1791
1790 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { 1792 if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
1791 if (rt->rt6i_flags&RTF_EXPIRES) { 1793 /* Autoconf prefix route */
1792 if (valid_lft == 0) { 1794 if (valid_lft == 0) {
1793 ip6_del_rt(rt); 1795 ip6_del_rt(rt);
1794 rt = NULL; 1796 rt = NULL;
1795 } else { 1797 } else if (~rt_expires) {
1796 rt->rt6i_expires = jiffies + rt_expires; 1798 /* not infinity */
1797 } 1799 rt->rt6i_expires = jiffies + rt_expires;
1800 rt->rt6i_flags |= RTF_EXPIRES;
1801 } else {
1802 rt->rt6i_flags &= ~RTF_EXPIRES;
1803 rt->rt6i_expires = 0;
1798 } 1804 }
1799 } else if (valid_lft) { 1805 } else if (valid_lft) {
1806 int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
1807 clock_t expires = 0;
1808 if (~rt_expires) {
1809 /* not infinity */
1810 flags |= RTF_EXPIRES;
1811 expires = jiffies_to_clock_t(rt_expires);
1812 }
1800 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, 1813 addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
1801 dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); 1814 dev, expires, flags);
1802 } 1815 }
1803 if (rt) 1816 if (rt)
1804 dst_release(&rt->u.dst); 1817 dst_release(&rt->u.dst);
@@ -2021,7 +2034,8 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2021 struct inet6_dev *idev; 2034 struct inet6_dev *idev;
2022 struct net_device *dev; 2035 struct net_device *dev;
2023 int scope; 2036 int scope;
2024 u32 flags = RTF_EXPIRES; 2037 u32 flags;
2038 clock_t expires;
2025 2039
2026 ASSERT_RTNL(); 2040 ASSERT_RTNL();
2027 2041
@@ -2041,8 +2055,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2041 if (valid_lft == INFINITY_LIFE_TIME) { 2055 if (valid_lft == INFINITY_LIFE_TIME) {
2042 ifa_flags |= IFA_F_PERMANENT; 2056 ifa_flags |= IFA_F_PERMANENT;
2043 flags = 0; 2057 flags = 0;
2044 } else if (valid_lft >= 0x7FFFFFFF/HZ) 2058 expires = 0;
2045 valid_lft = 0x7FFFFFFF/HZ; 2059 } else {
2060 if (valid_lft >= 0x7FFFFFFF/HZ)
2061 valid_lft = 0x7FFFFFFF/HZ;
2062 flags = RTF_EXPIRES;
2063 expires = jiffies_to_clock_t(valid_lft * HZ);
2064 }
2046 2065
2047 if (prefered_lft == 0) 2066 if (prefered_lft == 0)
2048 ifa_flags |= IFA_F_DEPRECATED; 2067 ifa_flags |= IFA_F_DEPRECATED;
@@ -2060,7 +2079,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
2060 spin_unlock_bh(&ifp->lock); 2079 spin_unlock_bh(&ifp->lock);
2061 2080
2062 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 2081 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
2063 jiffies_to_clock_t(valid_lft * HZ), flags); 2082 expires, flags);
2064 /* 2083 /*
2065 * Note that section 3.1 of RFC 4429 indicates 2084 * Note that section 3.1 of RFC 4429 indicates
2066 * that the Optimistic flag should not be set for 2085 * that the Optimistic flag should not be set for
@@ -3148,7 +3167,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
3148static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, 3167static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3149 u32 prefered_lft, u32 valid_lft) 3168 u32 prefered_lft, u32 valid_lft)
3150{ 3169{
3151 u32 flags = RTF_EXPIRES; 3170 u32 flags;
3171 clock_t expires;
3152 3172
3153 if (!valid_lft || (prefered_lft > valid_lft)) 3173 if (!valid_lft || (prefered_lft > valid_lft))
3154 return -EINVAL; 3174 return -EINVAL;
@@ -3156,8 +3176,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3156 if (valid_lft == INFINITY_LIFE_TIME) { 3176 if (valid_lft == INFINITY_LIFE_TIME) {
3157 ifa_flags |= IFA_F_PERMANENT; 3177 ifa_flags |= IFA_F_PERMANENT;
3158 flags = 0; 3178 flags = 0;
3159 } else if (valid_lft >= 0x7FFFFFFF/HZ) 3179 expires = 0;
3160 valid_lft = 0x7FFFFFFF/HZ; 3180 } else {
3181 if (valid_lft >= 0x7FFFFFFF/HZ)
3182 valid_lft = 0x7FFFFFFF/HZ;
3183 flags = RTF_EXPIRES;
3184 expires = jiffies_to_clock_t(valid_lft * HZ);
3185 }
3161 3186
3162 if (prefered_lft == 0) 3187 if (prefered_lft == 0)
3163 ifa_flags |= IFA_F_DEPRECATED; 3188 ifa_flags |= IFA_F_DEPRECATED;
@@ -3176,7 +3201,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
3176 ipv6_ifa_notify(0, ifp); 3201 ipv6_ifa_notify(0, ifp);
3177 3202
3178 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, 3203 addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
3179 jiffies_to_clock_t(valid_lft * HZ), flags); 3204 expires, flags);
3180 addrconf_verify(0); 3205 addrconf_verify(0);
3181 3206
3182 return 0; 3207 return 0;
@@ -4242,7 +4267,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
4242 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, 4267 neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
4243 NET_IPV6_NEIGH, "ipv6", 4268 NET_IPV6_NEIGH, "ipv6",
4244 &ndisc_ifinfo_sysctl_change, 4269 &ndisc_ifinfo_sysctl_change,
4245 NULL); 4270 ndisc_ifinfo_sysctl_strategy);
4246 __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, 4271 __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
4247 idev->dev->ifindex, idev, &idev->cnf); 4272 idev->dev->ifindex, idev, &idev->cnf);
4248} 4273}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a55fc05b8125..282fdb31f8ed 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
1727 return ret; 1727 return ret;
1728} 1728}
1729 1729
1730static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, 1730int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1731 int nlen, void __user *oldval, 1731 int nlen, void __user *oldval,
1732 size_t __user *oldlenp, 1732 size_t __user *oldlenp,
1733 void __user *newval, size_t newlen) 1733 void __user *newval, size_t newlen)
1734{ 1734{
1735 struct net_device *dev = ctl->extra1; 1735 struct net_device *dev = ctl->extra1;
1736 struct inet6_dev *idev; 1736 struct inet6_dev *idev;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 12bba0880345..48534c6c0735 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = {
109 .negative_advice = ip6_negative_advice, 109 .negative_advice = ip6_negative_advice,
110 .link_failure = ip6_link_failure, 110 .link_failure = ip6_link_failure,
111 .update_pmtu = ip6_rt_update_pmtu, 111 .update_pmtu = ip6_rt_update_pmtu,
112 .local_out = ip6_local_out, 112 .local_out = __ip6_local_out,
113 .entry_size = sizeof(struct rt6_info), 113 .entry_size = sizeof(struct rt6_info),
114 .entries = ATOMIC_INIT(0), 114 .entries = ATOMIC_INIT(0),
115}; 115};
@@ -475,7 +475,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
475 lifetime = ntohl(rinfo->lifetime); 475 lifetime = ntohl(rinfo->lifetime);
476 if (lifetime == 0xffffffff) { 476 if (lifetime == 0xffffffff) {
477 /* infinity */ 477 /* infinity */
478 } else if (lifetime > 0x7fffffff/HZ) { 478 } else if (lifetime > 0x7fffffff/HZ - 1) {
479 /* Avoid arithmetic overflow */ 479 /* Avoid arithmetic overflow */
480 lifetime = 0x7fffffff/HZ - 1; 480 lifetime = 0x7fffffff/HZ - 1;
481 } 481 }
@@ -1106,7 +1106,9 @@ int ip6_route_add(struct fib6_config *cfg)
1106 } 1106 }
1107 1107
1108 rt->u.dst.obsolete = -1; 1108 rt->u.dst.obsolete = -1;
1109 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); 1109 rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ?
1110 jiffies + clock_t_to_jiffies(cfg->fc_expires) :
1111 0;
1110 1112
1111 if (cfg->fc_protocol == RTPROT_UNSPEC) 1113 if (cfg->fc_protocol == RTPROT_UNSPEC)
1112 cfg->fc_protocol = RTPROT_BOOT; 1114 cfg->fc_protocol = RTPROT_BOOT;
@@ -2200,7 +2202,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2200 2202
2201 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); 2203 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2202 2204
2203 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; 2205 expires = (rt->rt6i_flags & RTF_EXPIRES) ?
2206 rt->rt6i_expires - jiffies : 0;
2207
2204 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, 2208 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2205 expires, rt->u.dst.error) < 0) 2209 expires, rt->u.dst.error) < 0)
2206 goto nla_put_failure; 2210 goto nla_put_failure;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4adba09e80ca..e470bf12b765 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3446,21 +3446,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev,
3446 struct ieee80211_sta_bss *bss, *selected = NULL; 3446 struct ieee80211_sta_bss *bss, *selected = NULL;
3447 int top_rssi = 0, freq; 3447 int top_rssi = 0, freq;
3448 3448
3449 if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
3450 IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) {
3451 ifsta->state = IEEE80211_AUTHENTICATE;
3452 ieee80211_sta_reset_auth(dev, ifsta);
3453 return 0;
3454 }
3455
3456 spin_lock_bh(&local->sta_bss_lock); 3449 spin_lock_bh(&local->sta_bss_lock);
3457 freq = local->oper_channel->center_freq; 3450 freq = local->oper_channel->center_freq;
3458 list_for_each_entry(bss, &local->sta_bss_list, list) { 3451 list_for_each_entry(bss, &local->sta_bss_list, list) {
3459 if (!(bss->capability & WLAN_CAPABILITY_ESS)) 3452 if (!(bss->capability & WLAN_CAPABILITY_ESS))
3460 continue; 3453 continue;
3461 3454
3462 if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ 3455 if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
3463 !!sdata->default_key) 3456 IEEE80211_STA_AUTO_BSSID_SEL |
3457 IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
3458 (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
3459 !!sdata->default_key))
3464 continue; 3460 continue;
3465 3461
3466 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && 3462 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24a465c4df09..131e9e6c8a32 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces(
389 struct ieee80211_local *local = hw_to_local(hw); 389 struct ieee80211_local *local = hw_to_local(hw);
390 struct ieee80211_sub_if_data *sdata; 390 struct ieee80211_sub_if_data *sdata;
391 391
392 rtnl_lock();
393
394 list_for_each_entry(sdata, &local->interfaces, list) {
395 switch (sdata->vif.type) {
396 case IEEE80211_IF_TYPE_INVALID:
397 case IEEE80211_IF_TYPE_MNTR:
398 case IEEE80211_IF_TYPE_VLAN:
399 continue;
400 case IEEE80211_IF_TYPE_AP:
401 case IEEE80211_IF_TYPE_STA:
402 case IEEE80211_IF_TYPE_IBSS:
403 case IEEE80211_IF_TYPE_WDS:
404 case IEEE80211_IF_TYPE_MESH_POINT:
405 break;
406 }
407 if (sdata->dev == local->mdev)
408 continue;
409 if (netif_running(sdata->dev))
410 iterator(data, sdata->dev->dev_addr,
411 &sdata->vif);
412 }
413
414 rtnl_unlock();
415}
416EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
417
418void ieee80211_iterate_active_interfaces_atomic(
419 struct ieee80211_hw *hw,
420 void (*iterator)(void *data, u8 *mac,
421 struct ieee80211_vif *vif),
422 void *data)
423{
424 struct ieee80211_local *local = hw_to_local(hw);
425 struct ieee80211_sub_if_data *sdata;
426
392 rcu_read_lock(); 427 rcu_read_lock();
393 428
394 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 429 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces(
413 448
414 rcu_read_unlock(); 449 rcu_read_unlock();
415} 450}
416EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); 451EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 1086df7478bc..9360fc81e8c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -220,7 +220,7 @@ replay:
220 tp = kzalloc(sizeof(*tp), GFP_KERNEL); 220 tp = kzalloc(sizeof(*tp), GFP_KERNEL);
221 if (tp == NULL) 221 if (tp == NULL)
222 goto errout; 222 goto errout;
223 err = -EINVAL; 223 err = -ENOENT;
224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); 224 tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]);
225 if (tp_ops == NULL) { 225 if (tp_ops == NULL) {
226#ifdef CONFIG_KMOD 226#ifdef CONFIG_KMOD
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index d927d9f57412..744b79fdcb19 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -17,8 +17,8 @@
17# define RPCDBG_FACILITY RPCDBG_AUTH 17# define RPCDBG_FACILITY RPCDBG_AUTH
18#endif 18#endif
19 19
20#define RPC_ANONYMOUS_USERID ((uid_t)-2) 20#define RPC_MACHINE_CRED_USERID ((uid_t)0)
21#define RPC_ANONYMOUS_GROUPID ((gid_t)-2) 21#define RPC_MACHINE_CRED_GROUPID ((gid_t)0)
22 22
23struct generic_cred { 23struct generic_cred {
24 struct rpc_cred gc_base; 24 struct rpc_cred gc_base;
@@ -44,8 +44,8 @@ EXPORT_SYMBOL_GPL(rpc_lookup_cred);
44struct rpc_cred *rpc_lookup_machine_cred(void) 44struct rpc_cred *rpc_lookup_machine_cred(void)
45{ 45{
46 struct auth_cred acred = { 46 struct auth_cred acred = {
47 .uid = RPC_ANONYMOUS_USERID, 47 .uid = RPC_MACHINE_CRED_USERID,
48 .gid = RPC_ANONYMOUS_GROUPID, 48 .gid = RPC_MACHINE_CRED_GROUPID,
49 .machine_cred = 1, 49 .machine_cred = 1,
50 }; 50 };
51 51
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index d8e8d79a8451..e46c825f4954 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -6,30 +6,9 @@
6 6
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9#include <linux/fcntl.h>
10#include <linux/net.h>
11#include <linux/in.h>
12#include <linux/inet.h>
13#include <linux/udp.h>
14#include <linux/tcp.h>
15#include <linux/unistd.h>
16#include <linux/slab.h>
17#include <linux/netdevice.h>
18#include <linux/skbuff.h>
19#include <linux/file.h>
20#include <linux/freezer.h> 9#include <linux/freezer.h>
21#include <linux/kthread.h> 10#include <linux/kthread.h>
22#include <net/sock.h> 11#include <net/sock.h>
23#include <net/checksum.h>
24#include <net/ip.h>
25#include <net/ipv6.h>
26#include <net/tcp_states.h>
27#include <linux/uaccess.h>
28#include <asm/ioctls.h>
29
30#include <linux/sunrpc/types.h>
31#include <linux/sunrpc/clnt.h>
32#include <linux/sunrpc/xdr.h>
33#include <linux/sunrpc/stats.h> 12#include <linux/sunrpc/stats.h>
34#include <linux/sunrpc/svc_xprt.h> 13#include <linux/sunrpc/svc_xprt.h>
35 14
@@ -296,8 +275,6 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
296 if (!(xprt->xpt_flags & 275 if (!(xprt->xpt_flags &
297 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) 276 ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED))))
298 return; 277 return;
299 if (test_bit(XPT_DEAD, &xprt->xpt_flags))
300 return;
301 278
302 cpu = get_cpu(); 279 cpu = get_cpu();
303 pool = svc_pool_for_cpu(xprt->xpt_server, cpu); 280 pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 3f30ee6006ae..f24800f2c098 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -278,7 +278,7 @@ static int ip_map_show(struct seq_file *m,
278 dom = im->m_client->h.name; 278 dom = im->m_client->h.name;
279 279
280 if (ipv6_addr_v4mapped(&addr)) { 280 if (ipv6_addr_v4mapped(&addr)) {
281 seq_printf(m, "%s" NIPQUAD_FMT "%s\n", 281 seq_printf(m, "%s " NIPQUAD_FMT " %s\n",
282 im->m_class, 282 im->m_class,
283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff, 283 ntohl(addr.s6_addr32[3]) >> 24 & 0xff,
284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff, 284 ntohl(addr.s6_addr32[3]) >> 16 & 0xff,
@@ -286,7 +286,7 @@ static int ip_map_show(struct seq_file *m,
286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff, 286 ntohl(addr.s6_addr32[3]) >> 0 & 0xff,
287 dom); 287 dom);
288 } else { 288 } else {
289 seq_printf(m, "%s" NIP6_FMT "%s\n", 289 seq_printf(m, "%s " NIP6_FMT " %s\n",
290 im->m_class, NIP6(addr), dom); 290 im->m_class, NIP6(addr), dom);
291 } 291 }
292 return 0; 292 return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c22d6b6f2db4..06ab4841537b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -260,11 +260,16 @@ static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
260 * On our side, we need to read into a pagelist. The first page immediately 260 * On our side, we need to read into a pagelist. The first page immediately
261 * follows the RPC header. 261 * follows the RPC header.
262 * 262 *
263 * This function returns 1 to indicate success. The data is not yet in 263 * This function returns:
264 * 0 - No error and no read-list found.
265 *
266 * 1 - Successful read-list processing. The data is not yet in
264 * the pagelist and therefore the RPC request must be deferred. The 267 * the pagelist and therefore the RPC request must be deferred. The
265 * I/O completion will enqueue the transport again and 268 * I/O completion will enqueue the transport again and
266 * svc_rdma_recvfrom will complete the request. 269 * svc_rdma_recvfrom will complete the request.
267 * 270 *
271 * <0 - Error processing/posting read-list.
272 *
268 * NOTE: The ctxt must not be touched after the last WR has been posted 273 * NOTE: The ctxt must not be touched after the last WR has been posted
269 * because the I/O completion processing may occur on another 274 * because the I/O completion processing may occur on another
270 * processor and free / modify the context. Ne touche pas! 275 * processor and free / modify the context. Ne touche pas!
@@ -284,7 +289,6 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
284 u64 sgl_offset; 289 u64 sgl_offset;
285 struct rpcrdma_read_chunk *ch; 290 struct rpcrdma_read_chunk *ch;
286 struct svc_rdma_op_ctxt *ctxt = NULL; 291 struct svc_rdma_op_ctxt *ctxt = NULL;
287 struct svc_rdma_op_ctxt *head;
288 struct svc_rdma_op_ctxt *tmp_sge_ctxt; 292 struct svc_rdma_op_ctxt *tmp_sge_ctxt;
289 struct svc_rdma_op_ctxt *tmp_ch_ctxt; 293 struct svc_rdma_op_ctxt *tmp_ch_ctxt;
290 struct chunk_sge *ch_sge_ary; 294 struct chunk_sge *ch_sge_ary;
@@ -302,25 +306,19 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
302 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge; 306 ch_sge_ary = (struct chunk_sge *)tmp_ch_ctxt->sge;
303 307
304 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); 308 svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count);
309 if (ch_count > RPCSVC_MAXPAGES)
310 return -EINVAL;
305 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, 311 sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp,
306 sge, ch_sge_ary, 312 sge, ch_sge_ary,
307 ch_count, byte_count); 313 ch_count, byte_count);
308 head = svc_rdma_get_context(xprt);
309 sgl_offset = 0; 314 sgl_offset = 0;
310 ch_no = 0; 315 ch_no = 0;
311 316
312 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; 317 for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
313 ch->rc_discrim != 0; ch++, ch_no++) { 318 ch->rc_discrim != 0; ch++, ch_no++) {
314next_sge: 319next_sge:
315 if (!ctxt) 320 ctxt = svc_rdma_get_context(xprt);
316 ctxt = head;
317 else {
318 ctxt->next = svc_rdma_get_context(xprt);
319 ctxt = ctxt->next;
320 }
321 ctxt->next = NULL;
322 ctxt->direction = DMA_FROM_DEVICE; 321 ctxt->direction = DMA_FROM_DEVICE;
323 clear_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
324 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 322 clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
325 323
326 /* Prepare READ WR */ 324 /* Prepare READ WR */
@@ -347,20 +345,15 @@ next_sge:
347 * the client and the RPC needs to be enqueued. 345 * the client and the RPC needs to be enqueued.
348 */ 346 */
349 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); 347 set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
350 ctxt->next = hdr_ctxt; 348 ctxt->read_hdr = hdr_ctxt;
351 hdr_ctxt->next = head;
352 } 349 }
353 /* Post the read */ 350 /* Post the read */
354 err = svc_rdma_send(xprt, &read_wr); 351 err = svc_rdma_send(xprt, &read_wr);
355 if (err) { 352 if (err) {
356 printk(KERN_ERR "svcrdma: Error posting send = %d\n", 353 printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
357 err); 354 err);
358 /* 355 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
359 * Break the circular list so free knows when 356 svc_rdma_put_context(ctxt, 0);
360 * to stop if the error happened to occur on
361 * the last read
362 */
363 ctxt->next = NULL;
364 goto out; 357 goto out;
365 } 358 }
366 atomic_inc(&rdma_stat_read); 359 atomic_inc(&rdma_stat_read);
@@ -371,7 +364,7 @@ next_sge:
371 goto next_sge; 364 goto next_sge;
372 } 365 }
373 sgl_offset = 0; 366 sgl_offset = 0;
374 err = 0; 367 err = 1;
375 } 368 }
376 369
377 out: 370 out:
@@ -389,25 +382,12 @@ next_sge:
389 while (rqstp->rq_resused) 382 while (rqstp->rq_resused)
390 rqstp->rq_respages[--rqstp->rq_resused] = NULL; 383 rqstp->rq_respages[--rqstp->rq_resused] = NULL;
391 384
392 if (err) { 385 return err;
393 printk(KERN_ERR "svcrdma : RDMA_READ error = %d\n", err);
394 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
395 /* Free the linked list of read contexts */
396 while (head != NULL) {
397 ctxt = head->next;
398 svc_rdma_put_context(head, 1);
399 head = ctxt;
400 }
401 return 0;
402 }
403
404 return 1;
405} 386}
406 387
407static int rdma_read_complete(struct svc_rqst *rqstp, 388static int rdma_read_complete(struct svc_rqst *rqstp,
408 struct svc_rdma_op_ctxt *data) 389 struct svc_rdma_op_ctxt *head)
409{ 390{
410 struct svc_rdma_op_ctxt *head = data->next;
411 int page_no; 391 int page_no;
412 int ret; 392 int ret;
413 393
@@ -433,21 +413,12 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
433 rqstp->rq_arg.len = head->arg.len; 413 rqstp->rq_arg.len = head->arg.len;
434 rqstp->rq_arg.buflen = head->arg.buflen; 414 rqstp->rq_arg.buflen = head->arg.buflen;
435 415
416 /* Free the context */
417 svc_rdma_put_context(head, 0);
418
436 /* XXX: What should this be? */ 419 /* XXX: What should this be? */
437 rqstp->rq_prot = IPPROTO_MAX; 420 rqstp->rq_prot = IPPROTO_MAX;
438 421 svc_xprt_copy_addrs(rqstp, rqstp->rq_xprt);
439 /*
440 * Free the contexts we used to build the RDMA_READ. We have
441 * to be careful here because the context list uses the same
442 * next pointer used to chain the contexts associated with the
443 * RDMA_READ
444 */
445 data->next = NULL; /* terminate circular list */
446 do {
447 data = head->next;
448 svc_rdma_put_context(head, 0);
449 head = data;
450 } while (head != NULL);
451 422
452 ret = rqstp->rq_arg.head[0].iov_len 423 ret = rqstp->rq_arg.head[0].iov_len
453 + rqstp->rq_arg.page_len 424 + rqstp->rq_arg.page_len
@@ -457,8 +428,6 @@ static int rdma_read_complete(struct svc_rqst *rqstp,
457 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base, 428 ret, rqstp->rq_arg.len, rqstp->rq_arg.head[0].iov_base,
458 rqstp->rq_arg.head[0].iov_len); 429 rqstp->rq_arg.head[0].iov_len);
459 430
460 /* Indicate that we've consumed an RQ credit */
461 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
462 svc_xprt_received(rqstp->rq_xprt); 431 svc_xprt_received(rqstp->rq_xprt);
463 return ret; 432 return ret;
464} 433}
@@ -480,13 +449,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
480 449
481 dprintk("svcrdma: rqstp=%p\n", rqstp); 450 dprintk("svcrdma: rqstp=%p\n", rqstp);
482 451
483 /*
484 * The rq_xprt_ctxt indicates if we've consumed an RQ credit
485 * or not. It is used in the rdma xpo_release_rqst function to
486 * determine whether or not to return an RQ WQE to the RQ.
487 */
488 rqstp->rq_xprt_ctxt = NULL;
489
490 spin_lock_bh(&rdma_xprt->sc_read_complete_lock); 452 spin_lock_bh(&rdma_xprt->sc_read_complete_lock);
491 if (!list_empty(&rdma_xprt->sc_read_complete_q)) { 453 if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
492 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next, 454 ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
@@ -537,21 +499,22 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
537 /* If the request is invalid, reply with an error */ 499 /* If the request is invalid, reply with an error */
538 if (len < 0) { 500 if (len < 0) {
539 if (len == -ENOSYS) 501 if (len == -ENOSYS)
540 (void)svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS); 502 svc_rdma_send_error(rdma_xprt, rmsgp, ERR_VERS);
541 goto close_out; 503 goto close_out;
542 } 504 }
543 505
544 /* Read read-list data. If we would need to wait, defer 506 /* Read read-list data. */
545 * it. Not that in this case, we don't return the RQ credit 507 ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
546 * until after the read completes. 508 if (ret > 0) {
547 */ 509 /* read-list posted, defer until data received from client. */
548 if (rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt)) {
549 svc_xprt_received(xprt); 510 svc_xprt_received(xprt);
550 return 0; 511 return 0;
551 } 512 }
552 513 if (ret < 0) {
553 /* Indicate we've consumed an RQ credit */ 514 /* Post of read-list failed, free context. */
554 rqstp->rq_xprt_ctxt = rqstp->rq_xprt; 515 svc_rdma_put_context(ctxt, 1);
516 return 0;
517 }
555 518
556 ret = rqstp->rq_arg.head[0].iov_len 519 ret = rqstp->rq_arg.head[0].iov_len
557 + rqstp->rq_arg.page_len 520 + rqstp->rq_arg.page_len
@@ -569,11 +532,8 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
569 return ret; 532 return ret;
570 533
571 close_out: 534 close_out:
572 if (ctxt) { 535 if (ctxt)
573 svc_rdma_put_context(ctxt, 1); 536 svc_rdma_put_context(ctxt, 1);
574 /* Indicate we've consumed an RQ credit */
575 rqstp->rq_xprt_ctxt = rqstp->rq_xprt;
576 }
577 dprintk("svcrdma: transport %p is closing\n", xprt); 537 dprintk("svcrdma: transport %p is closing\n", xprt);
578 /* 538 /*
579 * Set the close bit and enqueue it. svc_recv will see the 539 * Set the close bit and enqueue it. svc_recv will see the
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 981f190c1b39..fb82b1b683f8 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -389,6 +389,17 @@ static int send_reply(struct svcxprt_rdma *rdma,
389 int page_no; 389 int page_no;
390 int ret; 390 int ret;
391 391
392 /* Post a recv buffer to handle another request. */
393 ret = svc_rdma_post_recv(rdma);
394 if (ret) {
395 printk(KERN_INFO
396 "svcrdma: could not post a receive buffer, err=%d."
397 "Closing transport %p.\n", ret, rdma);
398 set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
399 svc_rdma_put_context(ctxt, 0);
400 return -ENOTCONN;
401 }
402
392 /* Prepare the context */ 403 /* Prepare the context */
393 ctxt->pages[0] = page; 404 ctxt->pages[0] = page;
394 ctxt->count = 1; 405 ctxt->count = 1;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index af408fc12634..e132509d1db0 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -103,8 +103,8 @@ static int rdma_bump_context_cache(struct svcxprt_rdma *xprt)
103 spin_lock_bh(&xprt->sc_ctxt_lock); 103 spin_lock_bh(&xprt->sc_ctxt_lock);
104 if (ctxt) { 104 if (ctxt) {
105 at_least_one = 1; 105 at_least_one = 1;
106 ctxt->next = xprt->sc_ctxt_head; 106 INIT_LIST_HEAD(&ctxt->free_list);
107 xprt->sc_ctxt_head = ctxt; 107 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
108 } else { 108 } else {
109 /* kmalloc failed...give up for now */ 109 /* kmalloc failed...give up for now */
110 xprt->sc_ctxt_cnt--; 110 xprt->sc_ctxt_cnt--;
@@ -123,7 +123,7 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
123 123
124 while (1) { 124 while (1) {
125 spin_lock_bh(&xprt->sc_ctxt_lock); 125 spin_lock_bh(&xprt->sc_ctxt_lock);
126 if (unlikely(xprt->sc_ctxt_head == NULL)) { 126 if (unlikely(list_empty(&xprt->sc_ctxt_free))) {
127 /* Try to bump my cache. */ 127 /* Try to bump my cache. */
128 spin_unlock_bh(&xprt->sc_ctxt_lock); 128 spin_unlock_bh(&xprt->sc_ctxt_lock);
129 129
@@ -136,12 +136,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
136 schedule_timeout_uninterruptible(msecs_to_jiffies(500)); 136 schedule_timeout_uninterruptible(msecs_to_jiffies(500));
137 continue; 137 continue;
138 } 138 }
139 ctxt = xprt->sc_ctxt_head; 139 ctxt = list_entry(xprt->sc_ctxt_free.next,
140 xprt->sc_ctxt_head = ctxt->next; 140 struct svc_rdma_op_ctxt,
141 free_list);
142 list_del_init(&ctxt->free_list);
141 spin_unlock_bh(&xprt->sc_ctxt_lock); 143 spin_unlock_bh(&xprt->sc_ctxt_lock);
142 ctxt->xprt = xprt; 144 ctxt->xprt = xprt;
143 INIT_LIST_HEAD(&ctxt->dto_q); 145 INIT_LIST_HEAD(&ctxt->dto_q);
144 ctxt->count = 0; 146 ctxt->count = 0;
147 atomic_inc(&xprt->sc_ctxt_used);
145 break; 148 break;
146 } 149 }
147 return ctxt; 150 return ctxt;
@@ -159,14 +162,15 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
159 put_page(ctxt->pages[i]); 162 put_page(ctxt->pages[i]);
160 163
161 for (i = 0; i < ctxt->count; i++) 164 for (i = 0; i < ctxt->count; i++)
162 dma_unmap_single(xprt->sc_cm_id->device->dma_device, 165 ib_dma_unmap_single(xprt->sc_cm_id->device,
163 ctxt->sge[i].addr, 166 ctxt->sge[i].addr,
164 ctxt->sge[i].length, 167 ctxt->sge[i].length,
165 ctxt->direction); 168 ctxt->direction);
169
166 spin_lock_bh(&xprt->sc_ctxt_lock); 170 spin_lock_bh(&xprt->sc_ctxt_lock);
167 ctxt->next = xprt->sc_ctxt_head; 171 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
168 xprt->sc_ctxt_head = ctxt;
169 spin_unlock_bh(&xprt->sc_ctxt_lock); 172 spin_unlock_bh(&xprt->sc_ctxt_lock);
173 atomic_dec(&xprt->sc_ctxt_used);
170} 174}
171 175
172/* ib_cq event handler */ 176/* ib_cq event handler */
@@ -228,23 +232,8 @@ static void dto_tasklet_func(unsigned long data)
228 list_del_init(&xprt->sc_dto_q); 232 list_del_init(&xprt->sc_dto_q);
229 spin_unlock_irqrestore(&dto_lock, flags); 233 spin_unlock_irqrestore(&dto_lock, flags);
230 234
231 if (test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags)) { 235 rq_cq_reap(xprt);
232 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP); 236 sq_cq_reap(xprt);
233 rq_cq_reap(xprt);
234 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
235 /*
236 * If data arrived before established event,
237 * don't enqueue. This defers RPC I/O until the
238 * RDMA connection is complete.
239 */
240 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
241 svc_xprt_enqueue(&xprt->sc_xprt);
242 }
243
244 if (test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) {
245 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
246 sq_cq_reap(xprt);
247 }
248 237
249 svc_xprt_put(&xprt->sc_xprt); 238 svc_xprt_put(&xprt->sc_xprt);
250 spin_lock_irqsave(&dto_lock, flags); 239 spin_lock_irqsave(&dto_lock, flags);
@@ -263,11 +252,15 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
263 struct svcxprt_rdma *xprt = cq_context; 252 struct svcxprt_rdma *xprt = cq_context;
264 unsigned long flags; 253 unsigned long flags;
265 254
255 /* Guard against unconditional flush call for destroyed QP */
256 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
257 return;
258
266 /* 259 /*
267 * Set the bit regardless of whether or not it's on the list 260 * Set the bit regardless of whether or not it's on the list
268 * because it may be on the list already due to an SQ 261 * because it may be on the list already due to an SQ
269 * completion. 262 * completion.
270 */ 263 */
271 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags); 264 set_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags);
272 265
273 /* 266 /*
@@ -290,6 +283,8 @@ static void rq_comp_handler(struct ib_cq *cq, void *cq_context)
290 * 283 *
291 * Take all completing WC off the CQE and enqueue the associated DTO 284 * Take all completing WC off the CQE and enqueue the associated DTO
292 * context on the dto_q for the transport. 285 * context on the dto_q for the transport.
286 *
287 * Note that caller must hold a transport reference.
293 */ 288 */
294static void rq_cq_reap(struct svcxprt_rdma *xprt) 289static void rq_cq_reap(struct svcxprt_rdma *xprt)
295{ 290{
@@ -297,29 +292,47 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt)
297 struct ib_wc wc; 292 struct ib_wc wc;
298 struct svc_rdma_op_ctxt *ctxt = NULL; 293 struct svc_rdma_op_ctxt *ctxt = NULL;
299 294
295 if (!test_and_clear_bit(RDMAXPRT_RQ_PENDING, &xprt->sc_flags))
296 return;
297
298 ib_req_notify_cq(xprt->sc_rq_cq, IB_CQ_NEXT_COMP);
300 atomic_inc(&rdma_stat_rq_poll); 299 atomic_inc(&rdma_stat_rq_poll);
301 300
302 spin_lock_bh(&xprt->sc_rq_dto_lock);
303 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) { 301 while ((ret = ib_poll_cq(xprt->sc_rq_cq, 1, &wc)) > 0) {
304 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 302 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
305 ctxt->wc_status = wc.status; 303 ctxt->wc_status = wc.status;
306 ctxt->byte_len = wc.byte_len; 304 ctxt->byte_len = wc.byte_len;
307 if (wc.status != IB_WC_SUCCESS) { 305 if (wc.status != IB_WC_SUCCESS) {
308 /* Close the transport */ 306 /* Close the transport */
307 dprintk("svcrdma: transport closing putting ctxt %p\n", ctxt);
309 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); 308 set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
310 svc_rdma_put_context(ctxt, 1); 309 svc_rdma_put_context(ctxt, 1);
310 svc_xprt_put(&xprt->sc_xprt);
311 continue; 311 continue;
312 } 312 }
313 spin_lock_bh(&xprt->sc_rq_dto_lock);
313 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q); 314 list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
315 spin_unlock_bh(&xprt->sc_rq_dto_lock);
316 svc_xprt_put(&xprt->sc_xprt);
314 } 317 }
315 spin_unlock_bh(&xprt->sc_rq_dto_lock);
316 318
317 if (ctxt) 319 if (ctxt)
318 atomic_inc(&rdma_stat_rq_prod); 320 atomic_inc(&rdma_stat_rq_prod);
321
322 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
323 /*
324 * If data arrived before established event,
325 * don't enqueue. This defers RPC I/O until the
326 * RDMA connection is complete.
327 */
328 if (!test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags))
329 svc_xprt_enqueue(&xprt->sc_xprt);
319} 330}
320 331
321/* 332/*
322 * Send Queue Completion Handler - potentially called on interrupt context. 333 * Send Queue Completion Handler - potentially called on interrupt context.
334 *
335 * Note that caller must hold a transport reference.
323 */ 336 */
324static void sq_cq_reap(struct svcxprt_rdma *xprt) 337static void sq_cq_reap(struct svcxprt_rdma *xprt)
325{ 338{
@@ -328,6 +341,11 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
328 struct ib_cq *cq = xprt->sc_sq_cq; 341 struct ib_cq *cq = xprt->sc_sq_cq;
329 int ret; 342 int ret;
330 343
344
345 if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
346 return;
347
348 ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
331 atomic_inc(&rdma_stat_sq_poll); 349 atomic_inc(&rdma_stat_sq_poll);
332 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { 350 while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
333 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; 351 ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
@@ -349,14 +367,16 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
349 367
350 case IB_WR_RDMA_READ: 368 case IB_WR_RDMA_READ:
351 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { 369 if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
370 struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
371 BUG_ON(!read_hdr);
352 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); 372 set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
353 set_bit(RDMACTXT_F_READ_DONE, &ctxt->flags);
354 spin_lock_bh(&xprt->sc_read_complete_lock); 373 spin_lock_bh(&xprt->sc_read_complete_lock);
355 list_add_tail(&ctxt->dto_q, 374 list_add_tail(&read_hdr->dto_q,
356 &xprt->sc_read_complete_q); 375 &xprt->sc_read_complete_q);
357 spin_unlock_bh(&xprt->sc_read_complete_lock); 376 spin_unlock_bh(&xprt->sc_read_complete_lock);
358 svc_xprt_enqueue(&xprt->sc_xprt); 377 svc_xprt_enqueue(&xprt->sc_xprt);
359 } 378 }
379 svc_rdma_put_context(ctxt, 0);
360 break; 380 break;
361 381
362 default: 382 default:
@@ -365,6 +385,7 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt)
365 wc.opcode, wc.status); 385 wc.opcode, wc.status);
366 break; 386 break;
367 } 387 }
388 svc_xprt_put(&xprt->sc_xprt);
368 } 389 }
369 390
370 if (ctxt) 391 if (ctxt)
@@ -376,11 +397,15 @@ static void sq_comp_handler(struct ib_cq *cq, void *cq_context)
376 struct svcxprt_rdma *xprt = cq_context; 397 struct svcxprt_rdma *xprt = cq_context;
377 unsigned long flags; 398 unsigned long flags;
378 399
400 /* Guard against unconditional flush call for destroyed QP */
401 if (atomic_read(&xprt->sc_xprt.xpt_ref.refcount)==0)
402 return;
403
379 /* 404 /*
380 * Set the bit regardless of whether or not it's on the list 405 * Set the bit regardless of whether or not it's on the list
381 * because it may be on the list already due to an RQ 406 * because it may be on the list already due to an RQ
382 * completion. 407 * completion.
383 */ 408 */
384 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags); 409 set_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags);
385 410
386 /* 411 /*
@@ -407,28 +432,29 @@ static void create_context_cache(struct svcxprt_rdma *xprt,
407 xprt->sc_ctxt_max = ctxt_max; 432 xprt->sc_ctxt_max = ctxt_max;
408 xprt->sc_ctxt_bump = ctxt_bump; 433 xprt->sc_ctxt_bump = ctxt_bump;
409 xprt->sc_ctxt_cnt = 0; 434 xprt->sc_ctxt_cnt = 0;
410 xprt->sc_ctxt_head = NULL; 435 atomic_set(&xprt->sc_ctxt_used, 0);
436
437 INIT_LIST_HEAD(&xprt->sc_ctxt_free);
411 for (i = 0; i < ctxt_count; i++) { 438 for (i = 0; i < ctxt_count; i++) {
412 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL); 439 ctxt = kmalloc(sizeof(*ctxt), GFP_KERNEL);
413 if (ctxt) { 440 if (ctxt) {
414 ctxt->next = xprt->sc_ctxt_head; 441 INIT_LIST_HEAD(&ctxt->free_list);
415 xprt->sc_ctxt_head = ctxt; 442 list_add(&ctxt->free_list, &xprt->sc_ctxt_free);
416 xprt->sc_ctxt_cnt++; 443 xprt->sc_ctxt_cnt++;
417 } 444 }
418 } 445 }
419} 446}
420 447
421static void destroy_context_cache(struct svc_rdma_op_ctxt *ctxt) 448static void destroy_context_cache(struct svcxprt_rdma *xprt)
422{ 449{
423 struct svc_rdma_op_ctxt *next; 450 while (!list_empty(&xprt->sc_ctxt_free)) {
424 if (!ctxt) 451 struct svc_rdma_op_ctxt *ctxt;
425 return; 452 ctxt = list_entry(xprt->sc_ctxt_free.next,
426 453 struct svc_rdma_op_ctxt,
427 do { 454 free_list);
428 next = ctxt->next; 455 list_del_init(&ctxt->free_list);
429 kfree(ctxt); 456 kfree(ctxt);
430 ctxt = next; 457 }
431 } while (next);
432} 458}
433 459
434static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, 460static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
@@ -465,7 +491,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
465 reqs + 491 reqs +
466 cma_xprt->sc_sq_depth + 492 cma_xprt->sc_sq_depth +
467 RPCRDMA_MAX_THREADS + 1); /* max */ 493 RPCRDMA_MAX_THREADS + 1); /* max */
468 if (!cma_xprt->sc_ctxt_head) { 494 if (list_empty(&cma_xprt->sc_ctxt_free)) {
469 kfree(cma_xprt); 495 kfree(cma_xprt);
470 return NULL; 496 return NULL;
471 } 497 }
@@ -520,7 +546,12 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
520 recv_wr.num_sge = ctxt->count; 546 recv_wr.num_sge = ctxt->count;
521 recv_wr.wr_id = (u64)(unsigned long)ctxt; 547 recv_wr.wr_id = (u64)(unsigned long)ctxt;
522 548
549 svc_xprt_get(&xprt->sc_xprt);
523 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr); 550 ret = ib_post_recv(xprt->sc_qp, &recv_wr, &bad_recv_wr);
551 if (ret) {
552 svc_xprt_put(&xprt->sc_xprt);
553 svc_rdma_put_context(ctxt, 1);
554 }
524 return ret; 555 return ret;
525} 556}
526 557
@@ -539,6 +570,7 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
539{ 570{
540 struct svcxprt_rdma *listen_xprt = new_cma_id->context; 571 struct svcxprt_rdma *listen_xprt = new_cma_id->context;
541 struct svcxprt_rdma *newxprt; 572 struct svcxprt_rdma *newxprt;
573 struct sockaddr *sa;
542 574
543 /* Create a new transport */ 575 /* Create a new transport */
544 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0); 576 newxprt = rdma_create_xprt(listen_xprt->sc_xprt.xpt_server, 0);
@@ -551,6 +583,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id)
551 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n", 583 dprintk("svcrdma: Creating newxprt=%p, cm_id=%p, listenxprt=%p\n",
552 newxprt, newxprt->sc_cm_id, listen_xprt); 584 newxprt, newxprt->sc_cm_id, listen_xprt);
553 585
586 /* Set the local and remote addresses in the transport */
587 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
588 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
589 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
590 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
591
554 /* 592 /*
555 * Enqueue the new transport on the accept queue of the listening 593 * Enqueue the new transport on the accept queue of the listening
556 * transport 594 * transport
@@ -627,6 +665,7 @@ static int rdma_cma_handler(struct rdma_cm_id *cma_id,
627 if (xprt) { 665 if (xprt) {
628 set_bit(XPT_CLOSE, &xprt->xpt_flags); 666 set_bit(XPT_CLOSE, &xprt->xpt_flags);
629 svc_xprt_enqueue(xprt); 667 svc_xprt_enqueue(xprt);
668 svc_xprt_put(xprt);
630 } 669 }
631 break; 670 break;
632 case RDMA_CM_EVENT_DEVICE_REMOVAL: 671 case RDMA_CM_EVENT_DEVICE_REMOVAL:
@@ -661,31 +700,27 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
661 700
662 cma_xprt = rdma_create_xprt(serv, 1); 701 cma_xprt = rdma_create_xprt(serv, 1);
663 if (!cma_xprt) 702 if (!cma_xprt)
664 return ERR_PTR(ENOMEM); 703 return ERR_PTR(-ENOMEM);
665 xprt = &cma_xprt->sc_xprt; 704 xprt = &cma_xprt->sc_xprt;
666 705
667 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP); 706 listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
668 if (IS_ERR(listen_id)) { 707 if (IS_ERR(listen_id)) {
669 svc_xprt_put(&cma_xprt->sc_xprt); 708 ret = PTR_ERR(listen_id);
670 dprintk("svcrdma: rdma_create_id failed = %ld\n", 709 dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
671 PTR_ERR(listen_id)); 710 goto err0;
672 return (void *)listen_id;
673 } 711 }
712
674 ret = rdma_bind_addr(listen_id, sa); 713 ret = rdma_bind_addr(listen_id, sa);
675 if (ret) { 714 if (ret) {
676 rdma_destroy_id(listen_id);
677 svc_xprt_put(&cma_xprt->sc_xprt);
678 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); 715 dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret);
679 return ERR_PTR(ret); 716 goto err1;
680 } 717 }
681 cma_xprt->sc_cm_id = listen_id; 718 cma_xprt->sc_cm_id = listen_id;
682 719
683 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); 720 ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG);
684 if (ret) { 721 if (ret) {
685 rdma_destroy_id(listen_id);
686 svc_xprt_put(&cma_xprt->sc_xprt);
687 dprintk("svcrdma: rdma_listen failed = %d\n", ret); 722 dprintk("svcrdma: rdma_listen failed = %d\n", ret);
688 return ERR_PTR(ret); 723 goto err1;
689 } 724 }
690 725
691 /* 726 /*
@@ -696,6 +731,12 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
696 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen); 731 svc_xprt_set_local(&cma_xprt->sc_xprt, sa, salen);
697 732
698 return &cma_xprt->sc_xprt; 733 return &cma_xprt->sc_xprt;
734
735 err1:
736 rdma_destroy_id(listen_id);
737 err0:
738 kfree(cma_xprt);
739 return ERR_PTR(ret);
699} 740}
700 741
701/* 742/*
@@ -716,7 +757,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
716 struct rdma_conn_param conn_param; 757 struct rdma_conn_param conn_param;
717 struct ib_qp_init_attr qp_attr; 758 struct ib_qp_init_attr qp_attr;
718 struct ib_device_attr devattr; 759 struct ib_device_attr devattr;
719 struct sockaddr *sa;
720 int ret; 760 int ret;
721 int i; 761 int i;
722 762
@@ -826,7 +866,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
826 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr; 866 newxprt->sc_sq_depth = qp_attr.cap.max_send_wr;
827 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr; 867 newxprt->sc_max_requests = qp_attr.cap.max_recv_wr;
828 } 868 }
829 svc_xprt_get(&newxprt->sc_xprt);
830 newxprt->sc_qp = newxprt->sc_cm_id->qp; 869 newxprt->sc_qp = newxprt->sc_cm_id->qp;
831 870
832 /* Register all of physical memory */ 871 /* Register all of physical memory */
@@ -850,6 +889,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
850 /* Swap out the handler */ 889 /* Swap out the handler */
851 newxprt->sc_cm_id->event_handler = rdma_cma_handler; 890 newxprt->sc_cm_id->event_handler = rdma_cma_handler;
852 891
892 /*
893 * Arm the CQs for the SQ and RQ before accepting so we can't
894 * miss the first message
895 */
896 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
897 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
898
853 /* Accept Connection */ 899 /* Accept Connection */
854 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags); 900 set_bit(RDMAXPRT_CONN_PENDING, &newxprt->sc_flags);
855 memset(&conn_param, 0, sizeof conn_param); 901 memset(&conn_param, 0, sizeof conn_param);
@@ -886,58 +932,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
886 newxprt->sc_max_requests, 932 newxprt->sc_max_requests,
887 newxprt->sc_ord); 933 newxprt->sc_ord);
888 934
889 /* Set the local and remote addresses in the transport */
890 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr;
891 svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa));
892 sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.src_addr;
893 svc_xprt_set_local(&newxprt->sc_xprt, sa, svc_addr_len(sa));
894
895 ib_req_notify_cq(newxprt->sc_sq_cq, IB_CQ_NEXT_COMP);
896 ib_req_notify_cq(newxprt->sc_rq_cq, IB_CQ_NEXT_COMP);
897 return &newxprt->sc_xprt; 935 return &newxprt->sc_xprt;
898 936
899 errout: 937 errout:
900 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); 938 dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret);
901 /* Take a reference in case the DTO handler runs */ 939 /* Take a reference in case the DTO handler runs */
902 svc_xprt_get(&newxprt->sc_xprt); 940 svc_xprt_get(&newxprt->sc_xprt);
903 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) { 941 if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp))
904 ib_destroy_qp(newxprt->sc_qp); 942 ib_destroy_qp(newxprt->sc_qp);
905 svc_xprt_put(&newxprt->sc_xprt);
906 }
907 rdma_destroy_id(newxprt->sc_cm_id); 943 rdma_destroy_id(newxprt->sc_cm_id);
908 /* This call to put will destroy the transport */ 944 /* This call to put will destroy the transport */
909 svc_xprt_put(&newxprt->sc_xprt); 945 svc_xprt_put(&newxprt->sc_xprt);
910 return NULL; 946 return NULL;
911} 947}
912 948
913/*
914 * Post an RQ WQE to the RQ when the rqst is being released. This
915 * effectively returns an RQ credit to the client. The rq_xprt_ctxt
916 * will be null if the request is deferred due to an RDMA_READ or the
917 * transport had no data ready (EAGAIN). Note that an RPC deferred in
918 * svc_process will still return the credit, this is because the data
919 * is copied and no longer consume a WQE/WC.
920 */
921static void svc_rdma_release_rqst(struct svc_rqst *rqstp) 949static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
922{ 950{
923 int err;
924 struct svcxprt_rdma *rdma =
925 container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
926 if (rqstp->rq_xprt_ctxt) {
927 BUG_ON(rqstp->rq_xprt_ctxt != rdma);
928 err = svc_rdma_post_recv(rdma);
929 if (err)
930 dprintk("svcrdma: failed to post an RQ WQE error=%d\n",
931 err);
932 }
933 rqstp->rq_xprt_ctxt = NULL;
934} 951}
935 952
936/* 953/*
937 * When connected, an svc_xprt has at least three references: 954 * When connected, an svc_xprt has at least two references:
938 *
939 * - A reference held by the QP. We still hold that here because this
940 * code deletes the QP and puts the reference.
941 * 955 *
942 * - A reference held by the cm_id between the ESTABLISHED and 956 * - A reference held by the cm_id between the ESTABLISHED and
943 * DISCONNECTED events. If the remote peer disconnected first, this 957 * DISCONNECTED events. If the remote peer disconnected first, this
@@ -946,7 +960,7 @@ static void svc_rdma_release_rqst(struct svc_rqst *rqstp)
946 * - A reference held by the svc_recv code that called this function 960 * - A reference held by the svc_recv code that called this function
947 * as part of close processing. 961 * as part of close processing.
948 * 962 *
949 * At a minimum two references should still be held. 963 * At a minimum one references should still be held.
950 */ 964 */
951static void svc_rdma_detach(struct svc_xprt *xprt) 965static void svc_rdma_detach(struct svc_xprt *xprt)
952{ 966{
@@ -956,23 +970,53 @@ static void svc_rdma_detach(struct svc_xprt *xprt)
956 970
957 /* Disconnect and flush posted WQE */ 971 /* Disconnect and flush posted WQE */
958 rdma_disconnect(rdma->sc_cm_id); 972 rdma_disconnect(rdma->sc_cm_id);
959
960 /* Destroy the QP if present (not a listener) */
961 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) {
962 ib_destroy_qp(rdma->sc_qp);
963 svc_xprt_put(xprt);
964 }
965
966 /* Destroy the CM ID */
967 rdma_destroy_id(rdma->sc_cm_id);
968} 973}
969 974
970static void svc_rdma_free(struct svc_xprt *xprt) 975static void __svc_rdma_free(struct work_struct *work)
971{ 976{
972 struct svcxprt_rdma *rdma = (struct svcxprt_rdma *)xprt; 977 struct svcxprt_rdma *rdma =
978 container_of(work, struct svcxprt_rdma, sc_work);
973 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma); 979 dprintk("svcrdma: svc_rdma_free(%p)\n", rdma);
980
974 /* We should only be called from kref_put */ 981 /* We should only be called from kref_put */
975 BUG_ON(atomic_read(&xprt->xpt_ref.refcount) != 0); 982 BUG_ON(atomic_read(&rdma->sc_xprt.xpt_ref.refcount) != 0);
983
984 /*
985 * Destroy queued, but not processed read completions. Note
986 * that this cleanup has to be done before destroying the
987 * cm_id because the device ptr is needed to unmap the dma in
988 * svc_rdma_put_context.
989 */
990 spin_lock_bh(&rdma->sc_read_complete_lock);
991 while (!list_empty(&rdma->sc_read_complete_q)) {
992 struct svc_rdma_op_ctxt *ctxt;
993 ctxt = list_entry(rdma->sc_read_complete_q.next,
994 struct svc_rdma_op_ctxt,
995 dto_q);
996 list_del_init(&ctxt->dto_q);
997 svc_rdma_put_context(ctxt, 1);
998 }
999 spin_unlock_bh(&rdma->sc_read_complete_lock);
1000
1001 /* Destroy queued, but not processed recv completions */
1002 spin_lock_bh(&rdma->sc_rq_dto_lock);
1003 while (!list_empty(&rdma->sc_rq_dto_q)) {
1004 struct svc_rdma_op_ctxt *ctxt;
1005 ctxt = list_entry(rdma->sc_rq_dto_q.next,
1006 struct svc_rdma_op_ctxt,
1007 dto_q);
1008 list_del_init(&ctxt->dto_q);
1009 svc_rdma_put_context(ctxt, 1);
1010 }
1011 spin_unlock_bh(&rdma->sc_rq_dto_lock);
1012
1013 /* Warn if we leaked a resource or under-referenced */
1014 WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0);
1015
1016 /* Destroy the QP if present (not a listener) */
1017 if (rdma->sc_qp && !IS_ERR(rdma->sc_qp))
1018 ib_destroy_qp(rdma->sc_qp);
1019
976 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq)) 1020 if (rdma->sc_sq_cq && !IS_ERR(rdma->sc_sq_cq))
977 ib_destroy_cq(rdma->sc_sq_cq); 1021 ib_destroy_cq(rdma->sc_sq_cq);
978 1022
@@ -985,10 +1029,21 @@ static void svc_rdma_free(struct svc_xprt *xprt)
985 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd)) 1029 if (rdma->sc_pd && !IS_ERR(rdma->sc_pd))
986 ib_dealloc_pd(rdma->sc_pd); 1030 ib_dealloc_pd(rdma->sc_pd);
987 1031
988 destroy_context_cache(rdma->sc_ctxt_head); 1032 /* Destroy the CM ID */
1033 rdma_destroy_id(rdma->sc_cm_id);
1034
1035 destroy_context_cache(rdma);
989 kfree(rdma); 1036 kfree(rdma);
990} 1037}
991 1038
1039static void svc_rdma_free(struct svc_xprt *xprt)
1040{
1041 struct svcxprt_rdma *rdma =
1042 container_of(xprt, struct svcxprt_rdma, sc_xprt);
1043 INIT_WORK(&rdma->sc_work, __svc_rdma_free);
1044 schedule_work(&rdma->sc_work);
1045}
1046
992static int svc_rdma_has_wspace(struct svc_xprt *xprt) 1047static int svc_rdma_has_wspace(struct svc_xprt *xprt)
993{ 1048{
994 struct svcxprt_rdma *rdma = 1049 struct svcxprt_rdma *rdma =
@@ -1018,7 +1073,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1018 int ret; 1073 int ret;
1019 1074
1020 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) 1075 if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
1021 return 0; 1076 return -ENOTCONN;
1022 1077
1023 BUG_ON(wr->send_flags != IB_SEND_SIGNALED); 1078 BUG_ON(wr->send_flags != IB_SEND_SIGNALED);
1024 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != 1079 BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op !=
@@ -1029,7 +1084,8 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1029 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { 1084 if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) {
1030 spin_unlock_bh(&xprt->sc_lock); 1085 spin_unlock_bh(&xprt->sc_lock);
1031 atomic_inc(&rdma_stat_sq_starve); 1086 atomic_inc(&rdma_stat_sq_starve);
1032 /* See if we can reap some SQ WR */ 1087
1088 /* See if we can opportunistically reap SQ WR to make room */
1033 sq_cq_reap(xprt); 1089 sq_cq_reap(xprt);
1034 1090
1035 /* Wait until SQ WR available if SQ still full */ 1091 /* Wait until SQ WR available if SQ still full */
@@ -1041,22 +1097,25 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
1041 continue; 1097 continue;
1042 } 1098 }
1043 /* Bumped used SQ WR count and post */ 1099 /* Bumped used SQ WR count and post */
1100 svc_xprt_get(&xprt->sc_xprt);
1044 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); 1101 ret = ib_post_send(xprt->sc_qp, wr, &bad_wr);
1045 if (!ret) 1102 if (!ret)
1046 atomic_inc(&xprt->sc_sq_count); 1103 atomic_inc(&xprt->sc_sq_count);
1047 else 1104 else {
1105 svc_xprt_put(&xprt->sc_xprt);
1048 dprintk("svcrdma: failed to post SQ WR rc=%d, " 1106 dprintk("svcrdma: failed to post SQ WR rc=%d, "
1049 "sc_sq_count=%d, sc_sq_depth=%d\n", 1107 "sc_sq_count=%d, sc_sq_depth=%d\n",
1050 ret, atomic_read(&xprt->sc_sq_count), 1108 ret, atomic_read(&xprt->sc_sq_count),
1051 xprt->sc_sq_depth); 1109 xprt->sc_sq_depth);
1110 }
1052 spin_unlock_bh(&xprt->sc_lock); 1111 spin_unlock_bh(&xprt->sc_lock);
1053 break; 1112 break;
1054 } 1113 }
1055 return ret; 1114 return ret;
1056} 1115}
1057 1116
1058int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, 1117void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1059 enum rpcrdma_errcode err) 1118 enum rpcrdma_errcode err)
1060{ 1119{
1061 struct ib_send_wr err_wr; 1120 struct ib_send_wr err_wr;
1062 struct ib_sge sge; 1121 struct ib_sge sge;
@@ -1094,9 +1153,8 @@ int svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
1094 /* Post It */ 1153 /* Post It */
1095 ret = svc_rdma_send(xprt, &err_wr); 1154 ret = svc_rdma_send(xprt, &err_wr);
1096 if (ret) { 1155 if (ret) {
1097 dprintk("svcrdma: Error posting send = %d\n", ret); 1156 dprintk("svcrdma: Error %d posting send for protocol error\n",
1157 ret);
1098 svc_rdma_put_context(ctxt, 1); 1158 svc_rdma_put_context(ctxt, 1);
1099 } 1159 }
1100
1101 return ret;
1102} 1160}