aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/infiniband
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /drivers/infiniband
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'drivers/infiniband')
-rw-r--r--drivers/infiniband/Kconfig2
-rw-r--r--drivers/infiniband/Makefile2
-rw-r--r--drivers/infiniband/core/addr.c66
-rw-r--r--drivers/infiniband/core/cache.c43
-rw-r--r--drivers/infiniband/core/cm.c84
-rw-r--r--drivers/infiniband/core/cm_msgs.h45
-rw-r--r--drivers/infiniband/core/cma.c241
-rw-r--r--drivers/infiniband/core/device.c16
-rw-r--r--drivers/infiniband/core/fmr_pool.c1
-rw-r--r--drivers/infiniband/core/iwcm.c25
-rw-r--r--drivers/infiniband/core/mad.c47
-rw-r--r--drivers/infiniband/core/multicast.c1
-rw-r--r--drivers/infiniband/core/netlink.c32
-rw-r--r--drivers/infiniband/core/packer.c1
-rw-r--r--drivers/infiniband/core/sa_query.c133
-rw-r--r--drivers/infiniband/core/sysfs.c31
-rw-r--r--drivers/infiniband/core/ucm.c6
-rw-r--r--drivers/infiniband/core/ucma.c99
-rw-r--r--drivers/infiniband/core/ud_header.c1
-rw-r--r--drivers/infiniband/core/umem.c9
-rw-r--r--drivers/infiniband/core/user_mad.c7
-rw-r--r--drivers/infiniband/core/uverbs.h18
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c760
-rw-r--r--drivers/infiniband/core/uverbs_main.c43
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c1
-rw-r--r--drivers/infiniband/core/verbs.c392
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c18
-rw-r--r--drivers/infiniband/hw/amso1100/c2.h8
-rw-r--r--drivers/infiniband/hw/amso1100/c2_ae.c6
-rw-r--r--drivers/infiniband/hw/amso1100/c2_intr.c5
-rw-r--r--drivers/infiniband/hw/amso1100/c2_pd.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2_provider.c7
-rw-r--r--drivers/infiniband/hw/amso1100/c2_qp.c4
-rw-r--r--drivers/infiniband/hw/amso1100/c2_rnic.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/Makefile2
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cm.c34
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_ev.c6
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.h1
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_qp.c38
-rw-r--r--drivers/infiniband/hw/cxgb4/Makefile4
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c1292
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/device.c591
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c18
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c112
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h190
-rw-r--r--drivers/infiniband/hw/cxgb4/mem.c23
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c21
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c209
-rw-r--r--drivers/infiniband/hw/cxgb4/resource.c180
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h24
-rw-r--r--drivers/infiniband/hw/cxgb4/user.h2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_classes.h4
-rw-r--r--drivers/infiniband/hw/ehca/ehca_cq.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_eq.c6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_hca.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c249
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.h6
-rw-r--r--drivers/infiniband/hw/ehca/ehca_main.c16
-rw-r--r--drivers/infiniband/hw/ehca/ehca_mrmw.c47
-rw-r--r--drivers/infiniband/hw/ehca/ehca_qp.c9
-rw-r--r--drivers/infiniband/hw/ehca/ehca_reqs.c3
-rw-r--r--drivers/infiniband/hw/ehca/ehca_tools.h1
-rw-r--r--drivers/infiniband/hw/ehca/ehca_uverbs.c4
-rw-r--r--drivers/infiniband/hw/ehca/hcp_if.c32
-rw-r--r--drivers/infiniband/hw/ehca/ipz_pt_fn.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_diag.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_driver.c13
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_fs.c8
-rw-r--r--drivers/infiniband/hw/ipath/ipath_iba6110.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_init_chip.c12
-rw-r--r--drivers/infiniband/hw/ipath/ipath_intr.c3
-rw-r--r--drivers/infiniband/hw/ipath/ipath_srq.c5
-rw-r--r--drivers/infiniband/hw/ipath/ipath_sysfs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_user_pages.c6
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1
-rw-r--r--drivers/infiniband/hw/mlx4/Kconfig3
-rw-r--r--drivers/infiniband/hw/mlx4/Makefile2
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c2
-rw-r--r--drivers/infiniband/hw/mlx4/alias_GUID.c688
-rw-r--r--drivers/infiniband/hw/mlx4/cm.c437
-rw-r--r--drivers/infiniband/hw/mlx4/cq.c90
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c1708
-rw-r--r--drivers/infiniband/hw/mlx4/main.c685
-rw-r--r--drivers/infiniband/hw/mlx4/mcg.c1256
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h385
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c2
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c891
-rw-r--r--drivers/infiniband/hw/mlx4/srq.c12
-rw-r--r--drivers/infiniband/hw/mlx4/sysfs.c794
-rw-r--r--drivers/infiniband/hw/mlx4/user.h12
-rw-r--r--drivers/infiniband/hw/mthca/mthca_catas.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cmd.c1
-rw-r--r--drivers/infiniband/hw/mthca/mthca_cq.c3
-rw-r--r--drivers/infiniband/hw/mthca/mthca_main.c9
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mr.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c4
-rw-r--r--drivers/infiniband/hw/mthca/mthca_reset.c8
-rw-r--r--drivers/infiniband/hw/nes/Makefile2
-rw-r--r--drivers/infiniband/hw/nes/nes.c23
-rw-r--r--drivers/infiniband/hw/nes/nes.h38
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c1204
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.h77
-rw-r--r--drivers/infiniband/hw/nes/nes_context.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c126
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.h37
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.c1160
-rw-r--r--drivers/infiniband/hw/nes/nes_mgt.h97
-rw-r--r--drivers/infiniband/hw/nes/nes_nic.c99
-rw-r--r--drivers/infiniband/hw/nes/nes_user.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_utils.c57
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c55
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.h14
-rw-r--r--drivers/infiniband/hw/ocrdma/Kconfig8
-rw-r--r--drivers/infiniband/hw/ocrdma/Makefile5
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h393
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_abi.h131
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c172
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.h42
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c2631
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h132
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c580
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h1675
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c2536
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.h93
-rw-r--r--drivers/infiniband/hw/qib/qib.h107
-rw-r--r--drivers/infiniband/hw/qib/qib_7220.h2
-rw-r--r--drivers/infiniband/hw/qib/qib_common.h14
-rw-r--r--drivers/infiniband/hw/qib/qib_diag.c14
-rw-r--r--drivers/infiniband/hw/qib/qib_driver.c47
-rw-r--r--drivers/infiniband/hw/qib/qib_eeprom.c41
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c68
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c31
-rw-r--r--drivers/infiniband/hw/qib/qib_iba6120.c96
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7220.c102
-rw-r--r--drivers/infiniband/hw/qib/qib_iba7322.c446
-rw-r--r--drivers/infiniband/hw/qib/qib_init.c262
-rw-r--r--drivers/infiniband/hw/qib/qib_intr.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_keys.c147
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c397
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.h198
-rw-r--r--drivers/infiniband/hw/qib/qib_mr.c247
-rw-r--r--drivers/infiniband/hw/qib/qib_pcie.c89
-rw-r--r--drivers/infiniband/hw/qib/qib_qp.c145
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.c35
-rw-r--r--drivers/infiniband/hw/qib/qib_qsfp.h5
-rw-r--r--drivers/infiniband/hw/qib/qib_rc.c73
-rw-r--r--drivers/infiniband/hw/qib/qib_ruc.c33
-rw-r--r--drivers/infiniband/hw/qib/qib_sd7220.c46
-rw-r--r--drivers/infiniband/hw/qib/qib_sdma.c12
-rw-r--r--drivers/infiniband/hw/qib/qib_srq.c5
-rw-r--r--drivers/infiniband/hw/qib/qib_sysfs.c258
-rw-r--r--drivers/infiniband/hw/qib/qib_twsi.c8
-rw-r--r--drivers/infiniband/hw/qib/qib_tx.c26
-rw-r--r--drivers/infiniband/hw/qib/qib_uc.c59
-rw-r--r--drivers/infiniband/hw/qib/qib_ud.c28
-rw-r--r--drivers/infiniband/hw/qib/qib_user_pages.c4
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c148
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h200
-rw-r--r--drivers/infiniband/hw/qib/qib_wc_x86_64.c14
-rw-r--r--drivers/infiniband/ulp/ipoib/Makefile3
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h85
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c66
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c55
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c756
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c77
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c172
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c124
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c115
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h22
-rw-r--r--drivers/infiniband/ulp/iser/iser_initiator.c57
-rw-r--r--drivers/infiniband/ulp/iser/iser_memory.c8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c178
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c499
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.h11
-rw-r--r--drivers/infiniband/ulp/srpt/Kconfig12
-rw-r--r--drivers/infiniband/ulp/srpt/Makefile2
-rw-r--r--drivers/infiniband/ulp/srpt/ib_dm_mad.h139
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c4018
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h442
184 files changed, 3962 insertions, 30051 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index a0f29c1d03b..0f9a84c1046 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -51,12 +51,10 @@ source "drivers/infiniband/hw/cxgb3/Kconfig"
51source "drivers/infiniband/hw/cxgb4/Kconfig" 51source "drivers/infiniband/hw/cxgb4/Kconfig"
52source "drivers/infiniband/hw/mlx4/Kconfig" 52source "drivers/infiniband/hw/mlx4/Kconfig"
53source "drivers/infiniband/hw/nes/Kconfig" 53source "drivers/infiniband/hw/nes/Kconfig"
54source "drivers/infiniband/hw/ocrdma/Kconfig"
55 54
56source "drivers/infiniband/ulp/ipoib/Kconfig" 55source "drivers/infiniband/ulp/ipoib/Kconfig"
57 56
58source "drivers/infiniband/ulp/srp/Kconfig" 57source "drivers/infiniband/ulp/srp/Kconfig"
59source "drivers/infiniband/ulp/srpt/Kconfig"
60 58
61source "drivers/infiniband/ulp/iser/Kconfig" 59source "drivers/infiniband/ulp/iser/Kconfig"
62 60
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index bf846a14b9d..9cc7a47d3e6 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -8,8 +8,6 @@ obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/ 8obj-$(CONFIG_INFINIBAND_CXGB4) += hw/cxgb4/
9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ 9obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/
10obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ 10obj-$(CONFIG_INFINIBAND_NES) += hw/nes/
11obj-$(CONFIG_INFINIBAND_OCRDMA) += hw/ocrdma/
12obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ 11obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/
13obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ 12obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/
14obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/
15obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ 13obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index eaec8d7a3b7..f2a84c6f854 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -37,7 +37,6 @@
37#include <linux/inetdevice.h> 37#include <linux/inetdevice.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/workqueue.h> 39#include <linux/workqueue.h>
40#include <linux/module.h>
41#include <net/arp.h> 40#include <net/arp.h>
42#include <net/neighbour.h> 41#include <net/neighbour.h>
43#include <net/route.h> 42#include <net/route.h>
@@ -129,7 +128,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
129 dev_put(dev); 128 dev_put(dev);
130 break; 129 break;
131 130
132#if IS_ENABLED(CONFIG_IPV6) 131#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
133 case AF_INET6: 132 case AF_INET6:
134 rcu_read_lock(); 133 rcu_read_lock();
135 for_each_netdev_rcu(&init_net, dev) { 134 for_each_netdev_rcu(&init_net, dev) {
@@ -152,11 +151,13 @@ static void set_timeout(unsigned long time)
152{ 151{
153 unsigned long delay; 152 unsigned long delay;
154 153
154 cancel_delayed_work(&work);
155
155 delay = time - jiffies; 156 delay = time - jiffies;
156 if ((long)delay <= 0) 157 if ((long)delay <= 0)
157 delay = 1; 158 delay = 1;
158 159
159 mod_delayed_work(addr_wq, &work, delay); 160 queue_delayed_work(addr_wq, &work, delay);
160} 161}
161 162
162static void queue_req(struct addr_req *req) 163static void queue_req(struct addr_req *req)
@@ -176,29 +177,6 @@ static void queue_req(struct addr_req *req)
176 mutex_unlock(&lock); 177 mutex_unlock(&lock);
177} 178}
178 179
179static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr)
180{
181 struct neighbour *n;
182 int ret;
183
184 n = dst_neigh_lookup(dst, daddr);
185
186 rcu_read_lock();
187 if (!n || !(n->nud_state & NUD_VALID)) {
188 if (n)
189 neigh_event_send(n, NULL);
190 ret = -ENODATA;
191 } else {
192 ret = rdma_copy_addr(dev_addr, dst->dev, n->ha);
193 }
194 rcu_read_unlock();
195
196 if (n)
197 neigh_release(n);
198
199 return ret;
200}
201
202static int addr4_resolve(struct sockaddr_in *src_in, 180static int addr4_resolve(struct sockaddr_in *src_in,
203 struct sockaddr_in *dst_in, 181 struct sockaddr_in *dst_in,
204 struct rdma_dev_addr *addr) 182 struct rdma_dev_addr *addr)
@@ -206,6 +184,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
206 __be32 src_ip = src_in->sin_addr.s_addr; 184 __be32 src_ip = src_in->sin_addr.s_addr;
207 __be32 dst_ip = dst_in->sin_addr.s_addr; 185 __be32 dst_ip = dst_in->sin_addr.s_addr;
208 struct rtable *rt; 186 struct rtable *rt;
187 struct neighbour *neigh;
209 struct flowi4 fl4; 188 struct flowi4 fl4;
210 int ret; 189 int ret;
211 190
@@ -234,25 +213,39 @@ static int addr4_resolve(struct sockaddr_in *src_in,
234 goto put; 213 goto put;
235 } 214 }
236 215
237 ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); 216 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev);
217 if (!neigh || !(neigh->nud_state & NUD_VALID)) {
218 rcu_read_lock();
219 neigh_event_send(dst_get_neighbour(&rt->dst), NULL);
220 rcu_read_unlock();
221 ret = -ENODATA;
222 if (neigh)
223 goto release;
224 goto put;
225 }
226
227 ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
228release:
229 neigh_release(neigh);
238put: 230put:
239 ip_rt_put(rt); 231 ip_rt_put(rt);
240out: 232out:
241 return ret; 233 return ret;
242} 234}
243 235
244#if IS_ENABLED(CONFIG_IPV6) 236#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
245static int addr6_resolve(struct sockaddr_in6 *src_in, 237static int addr6_resolve(struct sockaddr_in6 *src_in,
246 struct sockaddr_in6 *dst_in, 238 struct sockaddr_in6 *dst_in,
247 struct rdma_dev_addr *addr) 239 struct rdma_dev_addr *addr)
248{ 240{
249 struct flowi6 fl6; 241 struct flowi6 fl6;
242 struct neighbour *neigh;
250 struct dst_entry *dst; 243 struct dst_entry *dst;
251 int ret; 244 int ret;
252 245
253 memset(&fl6, 0, sizeof fl6); 246 memset(&fl6, 0, sizeof fl6);
254 fl6.daddr = dst_in->sin6_addr; 247 ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
255 fl6.saddr = src_in->sin6_addr; 248 ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
256 fl6.flowi6_oif = addr->bound_dev_if; 249 fl6.flowi6_oif = addr->bound_dev_if;
257 250
258 dst = ip6_route_output(&init_net, NULL, &fl6); 251 dst = ip6_route_output(&init_net, NULL, &fl6);
@@ -266,7 +259,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
266 goto put; 259 goto put;
267 260
268 src_in->sin6_family = AF_INET6; 261 src_in->sin6_family = AF_INET6;
269 src_in->sin6_addr = fl6.saddr; 262 ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
270 } 263 }
271 264
272 if (dst->dev->flags & IFF_LOOPBACK) { 265 if (dst->dev->flags & IFF_LOOPBACK) {
@@ -282,7 +275,16 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
282 goto put; 275 goto put;
283 } 276 }
284 277
285 ret = dst_fetch_ha(dst, addr, &fl6.daddr); 278 rcu_read_lock();
279 neigh = dst_get_neighbour(dst);
280 if (!neigh || !(neigh->nud_state & NUD_VALID)) {
281 if (neigh)
282 neigh_event_send(neigh, NULL);
283 ret = -ENODATA;
284 } else {
285 ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
286 }
287 rcu_read_unlock();
286put: 288put:
287 dst_release(dst); 289 dst_release(dst);
288 return ret; 290 return ret;
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 80f6cf2449f..9353992f9ee 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -167,7 +167,6 @@ int ib_find_cached_pkey(struct ib_device *device,
167 unsigned long flags; 167 unsigned long flags;
168 int i; 168 int i;
169 int ret = -ENOENT; 169 int ret = -ENOENT;
170 int partial_ix = -1;
171 170
172 if (port_num < start_port(device) || port_num > end_port(device)) 171 if (port_num < start_port(device) || port_num > end_port(device))
173 return -EINVAL; 172 return -EINVAL;
@@ -180,46 +179,6 @@ int ib_find_cached_pkey(struct ib_device *device,
180 179
181 for (i = 0; i < cache->table_len; ++i) 180 for (i = 0; i < cache->table_len; ++i)
182 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) { 181 if ((cache->table[i] & 0x7fff) == (pkey & 0x7fff)) {
183 if (cache->table[i] & 0x8000) {
184 *index = i;
185 ret = 0;
186 break;
187 } else
188 partial_ix = i;
189 }
190
191 if (ret && partial_ix >= 0) {
192 *index = partial_ix;
193 ret = 0;
194 }
195
196 read_unlock_irqrestore(&device->cache.lock, flags);
197
198 return ret;
199}
200EXPORT_SYMBOL(ib_find_cached_pkey);
201
202int ib_find_exact_cached_pkey(struct ib_device *device,
203 u8 port_num,
204 u16 pkey,
205 u16 *index)
206{
207 struct ib_pkey_cache *cache;
208 unsigned long flags;
209 int i;
210 int ret = -ENOENT;
211
212 if (port_num < start_port(device) || port_num > end_port(device))
213 return -EINVAL;
214
215 read_lock_irqsave(&device->cache.lock, flags);
216
217 cache = device->cache.pkey_cache[port_num - start_port(device)];
218
219 *index = -1;
220
221 for (i = 0; i < cache->table_len; ++i)
222 if (cache->table[i] == pkey) {
223 *index = i; 182 *index = i;
224 ret = 0; 183 ret = 0;
225 break; 184 break;
@@ -229,7 +188,7 @@ int ib_find_exact_cached_pkey(struct ib_device *device,
229 188
230 return ret; 189 return ret;
231} 190}
232EXPORT_SYMBOL(ib_find_exact_cached_pkey); 191EXPORT_SYMBOL(ib_find_cached_pkey);
233 192
234int ib_get_cached_lmc(struct ib_device *device, 193int ib_get_cached_lmc(struct ib_device *device,
235 u8 port_num, 194 u8 port_num,
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 394fea2ba1b..fc0f2bd9ca8 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -36,7 +36,6 @@
36#include <linux/completion.h> 36#include <linux/completion.h>
37#include <linux/dma-mapping.h> 37#include <linux/dma-mapping.h>
38#include <linux/device.h> 38#include <linux/device.h>
39#include <linux/module.h>
40#include <linux/err.h> 39#include <linux/err.h>
41#include <linux/idr.h> 40#include <linux/idr.h>
42#include <linux/interrupt.h> 41#include <linux/interrupt.h>
@@ -390,7 +389,7 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
390 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv, 389 ret = idr_get_new_above(&cm.local_id_table, cm_id_priv,
391 next_id, &id); 390 next_id, &id);
392 if (!ret) 391 if (!ret)
393 next_id = ((unsigned) id + 1) & MAX_IDR_MASK; 392 next_id = ((unsigned) id + 1) & MAX_ID_MASK;
394 spin_unlock_irqrestore(&cm.lock, flags); 393 spin_unlock_irqrestore(&cm.lock, flags);
395 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) ); 394 } while( (ret == -EAGAIN) && idr_pre_get(&cm.local_id_table, GFP_KERNEL) );
396 395
@@ -890,8 +889,6 @@ retest:
890 break; 889 break;
891 case IB_CM_ESTABLISHED: 890 case IB_CM_ESTABLISHED:
892 spin_unlock_irq(&cm_id_priv->lock); 891 spin_unlock_irq(&cm_id_priv->lock);
893 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
894 break;
895 ib_send_cm_dreq(cm_id, NULL, 0); 892 ib_send_cm_dreq(cm_id, NULL, 0);
896 goto retest; 893 goto retest;
897 case IB_CM_DREQ_SENT: 894 case IB_CM_DREQ_SENT:
@@ -1011,6 +1008,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
1011 req_msg->service_id = param->service_id; 1008 req_msg->service_id = param->service_id;
1012 req_msg->local_ca_guid = cm_id_priv->id.device->node_guid; 1009 req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1013 cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num)); 1010 cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1011 cm_req_set_resp_res(req_msg, param->responder_resources);
1014 cm_req_set_init_depth(req_msg, param->initiator_depth); 1012 cm_req_set_init_depth(req_msg, param->initiator_depth);
1015 cm_req_set_remote_resp_timeout(req_msg, 1013 cm_req_set_remote_resp_timeout(req_msg,
1016 param->remote_cm_response_timeout); 1014 param->remote_cm_response_timeout);
@@ -1019,16 +1017,12 @@ static void cm_format_req(struct cm_req_msg *req_msg,
1019 cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn)); 1017 cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1020 cm_req_set_local_resp_timeout(req_msg, 1018 cm_req_set_local_resp_timeout(req_msg,
1021 param->local_cm_response_timeout); 1019 param->local_cm_response_timeout);
1020 cm_req_set_retry_count(req_msg, param->retry_count);
1022 req_msg->pkey = param->primary_path->pkey; 1021 req_msg->pkey = param->primary_path->pkey;
1023 cm_req_set_path_mtu(req_msg, param->primary_path->mtu); 1022 cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1023 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1024 cm_req_set_max_cm_retries(req_msg, param->max_cm_retries); 1024 cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1025 1025 cm_req_set_srq(req_msg, param->srq);
1026 if (param->qp_type != IB_QPT_XRC_INI) {
1027 cm_req_set_resp_res(req_msg, param->responder_resources);
1028 cm_req_set_retry_count(req_msg, param->retry_count);
1029 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1030 cm_req_set_srq(req_msg, param->srq);
1031 }
1032 1026
1033 if (pri_path->hop_limit <= 1) { 1027 if (pri_path->hop_limit <= 1) {
1034 req_msg->primary_local_lid = pri_path->slid; 1028 req_msg->primary_local_lid = pri_path->slid;
@@ -1086,8 +1080,7 @@ static int cm_validate_req_param(struct ib_cm_req_param *param)
1086 if (!param->primary_path) 1080 if (!param->primary_path)
1087 return -EINVAL; 1081 return -EINVAL;
1088 1082
1089 if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC && 1083 if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC)
1090 param->qp_type != IB_QPT_XRC_INI)
1091 return -EINVAL; 1084 return -EINVAL;
1092 1085
1093 if (param->private_data && 1086 if (param->private_data &&
@@ -1608,24 +1601,18 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
1608 cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid); 1601 cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1609 rep_msg->local_comm_id = cm_id_priv->id.local_id; 1602 rep_msg->local_comm_id = cm_id_priv->id.local_id;
1610 rep_msg->remote_comm_id = cm_id_priv->id.remote_id; 1603 rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1604 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1611 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn)); 1605 cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1612 rep_msg->resp_resources = param->responder_resources; 1606 rep_msg->resp_resources = param->responder_resources;
1607 rep_msg->initiator_depth = param->initiator_depth;
1613 cm_rep_set_target_ack_delay(rep_msg, 1608 cm_rep_set_target_ack_delay(rep_msg,
1614 cm_id_priv->av.port->cm_dev->ack_delay); 1609 cm_id_priv->av.port->cm_dev->ack_delay);
1615 cm_rep_set_failover(rep_msg, param->failover_accepted); 1610 cm_rep_set_failover(rep_msg, param->failover_accepted);
1611 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1616 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count); 1612 cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1613 cm_rep_set_srq(rep_msg, param->srq);
1617 rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid; 1614 rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1618 1615
1619 if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1620 rep_msg->initiator_depth = param->initiator_depth;
1621 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1622 cm_rep_set_srq(rep_msg, param->srq);
1623 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1624 } else {
1625 cm_rep_set_srq(rep_msg, 1);
1626 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1627 }
1628
1629 if (param->private_data && param->private_data_len) 1616 if (param->private_data && param->private_data_len)
1630 memcpy(rep_msg->private_data, param->private_data, 1617 memcpy(rep_msg->private_data, param->private_data,
1631 param->private_data_len); 1618 param->private_data_len);
@@ -1673,7 +1660,7 @@ int ib_send_cm_rep(struct ib_cm_id *cm_id,
1673 cm_id_priv->initiator_depth = param->initiator_depth; 1660 cm_id_priv->initiator_depth = param->initiator_depth;
1674 cm_id_priv->responder_resources = param->responder_resources; 1661 cm_id_priv->responder_resources = param->responder_resources;
1675 cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg); 1662 cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1676 cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF); 1663 cm_id_priv->local_qpn = cm_rep_get_local_qpn(rep_msg);
1677 1664
1678out: spin_unlock_irqrestore(&cm_id_priv->lock, flags); 1665out: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1679 return ret; 1666 return ret;
@@ -1744,7 +1731,7 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1744} 1731}
1745EXPORT_SYMBOL(ib_send_cm_rtu); 1732EXPORT_SYMBOL(ib_send_cm_rtu);
1746 1733
1747static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type) 1734static void cm_format_rep_event(struct cm_work *work)
1748{ 1735{
1749 struct cm_rep_msg *rep_msg; 1736 struct cm_rep_msg *rep_msg;
1750 struct ib_cm_rep_event_param *param; 1737 struct ib_cm_rep_event_param *param;
@@ -1753,7 +1740,7 @@ static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1753 param = &work->cm_event.param.rep_rcvd; 1740 param = &work->cm_event.param.rep_rcvd;
1754 param->remote_ca_guid = rep_msg->local_ca_guid; 1741 param->remote_ca_guid = rep_msg->local_ca_guid;
1755 param->remote_qkey = be32_to_cpu(rep_msg->local_qkey); 1742 param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1756 param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type)); 1743 param->remote_qpn = be32_to_cpu(cm_rep_get_local_qpn(rep_msg));
1757 param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg)); 1744 param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1758 param->responder_resources = rep_msg->initiator_depth; 1745 param->responder_resources = rep_msg->initiator_depth;
1759 param->initiator_depth = rep_msg->resp_resources; 1746 param->initiator_depth = rep_msg->resp_resources;
@@ -1821,7 +1808,7 @@ static int cm_rep_handler(struct cm_work *work)
1821 return -EINVAL; 1808 return -EINVAL;
1822 } 1809 }
1823 1810
1824 cm_format_rep_event(work, cm_id_priv->qp_type); 1811 cm_format_rep_event(work);
1825 1812
1826 spin_lock_irq(&cm_id_priv->lock); 1813 spin_lock_irq(&cm_id_priv->lock);
1827 switch (cm_id_priv->id.state) { 1814 switch (cm_id_priv->id.state) {
@@ -1836,7 +1823,7 @@ static int cm_rep_handler(struct cm_work *work)
1836 1823
1837 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id; 1824 cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
1838 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid; 1825 cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
1839 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); 1826 cm_id_priv->timewait_info->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1840 1827
1841 spin_lock(&cm.lock); 1828 spin_lock(&cm.lock);
1842 /* Check for duplicate REP. */ 1829 /* Check for duplicate REP. */
@@ -1863,7 +1850,7 @@ static int cm_rep_handler(struct cm_work *work)
1863 1850
1864 cm_id_priv->id.state = IB_CM_REP_RCVD; 1851 cm_id_priv->id.state = IB_CM_REP_RCVD;
1865 cm_id_priv->id.remote_id = rep_msg->local_comm_id; 1852 cm_id_priv->id.remote_id = rep_msg->local_comm_id;
1866 cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type); 1853 cm_id_priv->remote_qpn = cm_rep_get_local_qpn(rep_msg);
1867 cm_id_priv->initiator_depth = rep_msg->resp_resources; 1854 cm_id_priv->initiator_depth = rep_msg->resp_resources;
1868 cm_id_priv->responder_resources = rep_msg->initiator_depth; 1855 cm_id_priv->responder_resources = rep_msg->initiator_depth;
1869 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg); 1856 cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
@@ -3505,8 +3492,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3505 qp_attr->path_mtu = cm_id_priv->path_mtu; 3492 qp_attr->path_mtu = cm_id_priv->path_mtu;
3506 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); 3493 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3507 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); 3494 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3508 if (cm_id_priv->qp_type == IB_QPT_RC || 3495 if (cm_id_priv->qp_type == IB_QPT_RC) {
3509 cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3510 *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC | 3496 *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3511 IB_QP_MIN_RNR_TIMER; 3497 IB_QP_MIN_RNR_TIMER;
3512 qp_attr->max_dest_rd_atomic = 3498 qp_attr->max_dest_rd_atomic =
@@ -3551,21 +3537,15 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3551 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) { 3537 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3552 *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN; 3538 *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3553 qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn); 3539 qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3554 switch (cm_id_priv->qp_type) { 3540 if (cm_id_priv->qp_type == IB_QPT_RC) {
3555 case IB_QPT_RC: 3541 *qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
3556 case IB_QPT_XRC_INI: 3542 IB_QP_RNR_RETRY |
3557 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3558 IB_QP_MAX_QP_RD_ATOMIC; 3543 IB_QP_MAX_QP_RD_ATOMIC;
3544 qp_attr->timeout = cm_id_priv->av.timeout;
3559 qp_attr->retry_cnt = cm_id_priv->retry_count; 3545 qp_attr->retry_cnt = cm_id_priv->retry_count;
3560 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count; 3546 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3561 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth; 3547 qp_attr->max_rd_atomic =
3562 /* fall through */ 3548 cm_id_priv->initiator_depth;
3563 case IB_QPT_XRC_TGT:
3564 *qp_attr_mask |= IB_QP_TIMEOUT;
3565 qp_attr->timeout = cm_id_priv->av.timeout;
3566 break;
3567 default:
3568 break;
3569 } 3549 }
3570 if (cm_id_priv->alt_av.ah_attr.dlid) { 3550 if (cm_id_priv->alt_av.ah_attr.dlid) {
3571 *qp_attr_mask |= IB_QP_PATH_MIG_STATE; 3551 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
@@ -3659,7 +3639,7 @@ static struct kobj_type cm_port_obj_type = {
3659 .release = cm_release_port_obj 3639 .release = cm_release_port_obj
3660}; 3640};
3661 3641
3662static char *cm_devnode(struct device *dev, umode_t *mode) 3642static char *cm_devnode(struct device *dev, mode_t *mode)
3663{ 3643{
3664 if (mode) 3644 if (mode)
3665 *mode = 0666; 3645 *mode = 0666;
@@ -3848,28 +3828,24 @@ static int __init ib_cm_init(void)
3848 INIT_LIST_HEAD(&cm.timewait_list); 3828 INIT_LIST_HEAD(&cm.timewait_list);
3849 3829
3850 ret = class_register(&cm_class); 3830 ret = class_register(&cm_class);
3851 if (ret) { 3831 if (ret)
3852 ret = -ENOMEM; 3832 return -ENOMEM;
3853 goto error1;
3854 }
3855 3833
3856 cm.wq = create_workqueue("ib_cm"); 3834 cm.wq = create_workqueue("ib_cm");
3857 if (!cm.wq) { 3835 if (!cm.wq) {
3858 ret = -ENOMEM; 3836 ret = -ENOMEM;
3859 goto error2; 3837 goto error1;
3860 } 3838 }
3861 3839
3862 ret = ib_register_client(&cm_client); 3840 ret = ib_register_client(&cm_client);
3863 if (ret) 3841 if (ret)
3864 goto error3; 3842 goto error2;
3865 3843
3866 return 0; 3844 return 0;
3867error3:
3868 destroy_workqueue(cm.wq);
3869error2: 3845error2:
3870 class_unregister(&cm_class); 3846 destroy_workqueue(cm.wq);
3871error1: 3847error1:
3872 idr_destroy(&cm.local_id_table); 3848 class_unregister(&cm_class);
3873 return ret; 3849 return ret;
3874} 3850}
3875 3851
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index be068f47e47..7e63c08f697 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2004, 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2004 Intel Corporation. All rights reserved.
3 * Copyright (c) 2004 Topspin Corporation. All rights reserved. 3 * Copyright (c) 2004 Topspin Corporation. All rights reserved.
4 * Copyright (c) 2004 Voltaire Corporation. All rights reserved. 4 * Copyright (c) 2004 Voltaire Corporation. All rights reserved.
5 * 5 *
@@ -44,6 +44,18 @@
44 44
45#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */ 45#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
46 46
47#define CM_REQ_ATTR_ID cpu_to_be16(0x0010)
48#define CM_MRA_ATTR_ID cpu_to_be16(0x0011)
49#define CM_REJ_ATTR_ID cpu_to_be16(0x0012)
50#define CM_REP_ATTR_ID cpu_to_be16(0x0013)
51#define CM_RTU_ATTR_ID cpu_to_be16(0x0014)
52#define CM_DREQ_ATTR_ID cpu_to_be16(0x0015)
53#define CM_DREP_ATTR_ID cpu_to_be16(0x0016)
54#define CM_SIDR_REQ_ATTR_ID cpu_to_be16(0x0017)
55#define CM_SIDR_REP_ATTR_ID cpu_to_be16(0x0018)
56#define CM_LAP_ATTR_ID cpu_to_be16(0x0019)
57#define CM_APR_ATTR_ID cpu_to_be16(0x001A)
58
47enum cm_msg_sequence { 59enum cm_msg_sequence {
48 CM_MSG_SEQUENCE_REQ, 60 CM_MSG_SEQUENCE_REQ,
49 CM_MSG_SEQUENCE_LAP, 61 CM_MSG_SEQUENCE_LAP,
@@ -74,7 +86,7 @@ struct cm_req_msg {
74 __be16 pkey; 86 __be16 pkey;
75 /* path MTU:4, RDC exists:1, RNR retry count:3. */ 87 /* path MTU:4, RDC exists:1, RNR retry count:3. */
76 u8 offset50; 88 u8 offset50;
77 /* max CM Retries:4, SRQ:1, extended transport type:3 */ 89 /* max CM Retries:4, SRQ:1, rsvd:3 */
78 u8 offset51; 90 u8 offset51;
79 91
80 __be16 primary_local_lid; 92 __be16 primary_local_lid;
@@ -163,11 +175,6 @@ static inline enum ib_qp_type cm_req_get_qp_type(struct cm_req_msg *req_msg)
163 switch(transport_type) { 175 switch(transport_type) {
164 case 0: return IB_QPT_RC; 176 case 0: return IB_QPT_RC;
165 case 1: return IB_QPT_UC; 177 case 1: return IB_QPT_UC;
166 case 3:
167 switch (req_msg->offset51 & 0x7) {
168 case 1: return IB_QPT_XRC_TGT;
169 default: return 0;
170 }
171 default: return 0; 178 default: return 0;
172 } 179 }
173} 180}
@@ -181,12 +188,6 @@ static inline void cm_req_set_qp_type(struct cm_req_msg *req_msg,
181 req_msg->offset40) & 188 req_msg->offset40) &
182 0xFFFFFFF9) | 0x2); 189 0xFFFFFFF9) | 0x2);
183 break; 190 break;
184 case IB_QPT_XRC_INI:
185 req_msg->offset40 = cpu_to_be32((be32_to_cpu(
186 req_msg->offset40) &
187 0xFFFFFFF9) | 0x6);
188 req_msg->offset51 = (req_msg->offset51 & 0xF8) | 1;
189 break;
190 default: 191 default:
191 req_msg->offset40 = cpu_to_be32(be32_to_cpu( 192 req_msg->offset40 = cpu_to_be32(be32_to_cpu(
192 req_msg->offset40) & 193 req_msg->offset40) &
@@ -526,23 +527,6 @@ static inline void cm_rep_set_local_qpn(struct cm_rep_msg *rep_msg, __be32 qpn)
526 (be32_to_cpu(rep_msg->offset12) & 0x000000FF)); 527 (be32_to_cpu(rep_msg->offset12) & 0x000000FF));
527} 528}
528 529
529static inline __be32 cm_rep_get_local_eecn(struct cm_rep_msg *rep_msg)
530{
531 return cpu_to_be32(be32_to_cpu(rep_msg->offset16) >> 8);
532}
533
534static inline void cm_rep_set_local_eecn(struct cm_rep_msg *rep_msg, __be32 eecn)
535{
536 rep_msg->offset16 = cpu_to_be32((be32_to_cpu(eecn) << 8) |
537 (be32_to_cpu(rep_msg->offset16) & 0x000000FF));
538}
539
540static inline __be32 cm_rep_get_qpn(struct cm_rep_msg *rep_msg, enum ib_qp_type qp_type)
541{
542 return (qp_type == IB_QPT_XRC_INI) ?
543 cm_rep_get_local_eecn(rep_msg) : cm_rep_get_local_qpn(rep_msg);
544}
545
546static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg) 530static inline __be32 cm_rep_get_starting_psn(struct cm_rep_msg *rep_msg)
547{ 531{
548 return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8); 532 return cpu_to_be32(be32_to_cpu(rep_msg->offset20) >> 8);
@@ -787,7 +771,6 @@ struct cm_apr_msg {
787 771
788 u8 info_length; 772 u8 info_length;
789 u8 ap_status; 773 u8 ap_status;
790 __be16 rsvd;
791 u8 info[IB_CM_APR_INFO_LENGTH]; 774 u8 info[IB_CM_APR_INFO_LENGTH];
792 775
793 u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; 776 u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE];
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d789eea3216..ca4c5dcd713 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -41,8 +41,6 @@
41#include <linux/idr.h> 41#include <linux/idr.h>
42#include <linux/inetdevice.h> 42#include <linux/inetdevice.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/module.h>
45#include <net/route.h>
46 44
47#include <net/tcp.h> 45#include <net/tcp.h>
48#include <net/ipv6.h> 46#include <net/ipv6.h>
@@ -83,7 +81,6 @@ static DEFINE_IDR(sdp_ps);
83static DEFINE_IDR(tcp_ps); 81static DEFINE_IDR(tcp_ps);
84static DEFINE_IDR(udp_ps); 82static DEFINE_IDR(udp_ps);
85static DEFINE_IDR(ipoib_ps); 83static DEFINE_IDR(ipoib_ps);
86static DEFINE_IDR(ib_ps);
87 84
88struct cma_device { 85struct cma_device {
89 struct list_head list; 86 struct list_head list;
@@ -99,10 +96,6 @@ struct rdma_bind_list {
99 unsigned short port; 96 unsigned short port;
100}; 97};
101 98
102enum {
103 CMA_OPTION_AFONLY,
104};
105
106/* 99/*
107 * Device removal can occur at anytime, so we need extra handling to 100 * Device removal can occur at anytime, so we need extra handling to
108 * serialize notifying the user of device removal with other callbacks. 101 * serialize notifying the user of device removal with other callbacks.
@@ -141,11 +134,9 @@ struct rdma_id_private {
141 u32 qkey; 134 u32 qkey;
142 u32 qp_num; 135 u32 qp_num;
143 pid_t owner; 136 pid_t owner;
144 u32 options;
145 u8 srq; 137 u8 srq;
146 u8 tos; 138 u8 tos;
147 u8 reuseaddr; 139 u8 reuseaddr;
148 u8 afonly;
149}; 140};
150 141
151struct cma_multicast { 142struct cma_multicast {
@@ -345,17 +336,17 @@ static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_nu
345 336
346 err = ib_query_port(device, port_num, &props); 337 err = ib_query_port(device, port_num, &props);
347 if (err) 338 if (err)
348 return err; 339 return 1;
349 340
350 for (i = 0; i < props.gid_tbl_len; ++i) { 341 for (i = 0; i < props.gid_tbl_len; ++i) {
351 err = ib_query_gid(device, port_num, i, &tmp); 342 err = ib_query_gid(device, port_num, i, &tmp);
352 if (err) 343 if (err)
353 return err; 344 return 1;
354 if (!memcmp(&tmp, gid, sizeof tmp)) 345 if (!memcmp(&tmp, gid, sizeof tmp))
355 return 0; 346 return 0;
356 } 347 }
357 348
358 return -EADDRNOTAVAIL; 349 return -EAGAIN;
359} 350}
360 351
361static int cma_acquire_dev(struct rdma_id_private *id_priv) 352static int cma_acquire_dev(struct rdma_id_private *id_priv)
@@ -388,7 +379,8 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv)
388 if (!ret) { 379 if (!ret) {
389 id_priv->id.port_num = port; 380 id_priv->id.port_num = port;
390 goto out; 381 goto out;
391 } 382 } else if (ret == 1)
383 break;
392 } 384 }
393 } 385 }
394 } 386 }
@@ -1116,7 +1108,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1116 if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { 1108 if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) {
1117 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; 1109 rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND;
1118 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); 1110 rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1119 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); 1111 ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey);
1120 } else { 1112 } else {
1121 ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, 1113 ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr,
1122 &rt->addr.dev_addr); 1114 &rt->addr.dev_addr);
@@ -1187,15 +1179,6 @@ static void cma_set_req_event_data(struct rdma_cm_event *event,
1187 event->param.conn.qp_num = req_data->remote_qpn; 1179 event->param.conn.qp_num = req_data->remote_qpn;
1188} 1180}
1189 1181
1190static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_event)
1191{
1192 return (((ib_event->event == IB_CM_REQ_RECEIVED) &&
1193 (ib_event->param.req_rcvd.qp_type == id->qp_type)) ||
1194 ((ib_event->event == IB_CM_SIDR_REQ_RECEIVED) &&
1195 (id->qp_type == IB_QPT_UD)) ||
1196 (!id->qp_type));
1197}
1198
1199static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event) 1182static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1200{ 1183{
1201 struct rdma_id_private *listen_id, *conn_id; 1184 struct rdma_id_private *listen_id, *conn_id;
@@ -1203,16 +1186,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1203 int offset, ret; 1186 int offset, ret;
1204 1187
1205 listen_id = cm_id->context; 1188 listen_id = cm_id->context;
1206 if (!cma_check_req_qp_type(&listen_id->id, ib_event))
1207 return -EINVAL;
1208
1209 if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) 1189 if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
1210 return -ECONNABORTED; 1190 return -ECONNABORTED;
1211 1191
1212 memset(&event, 0, sizeof event); 1192 memset(&event, 0, sizeof event);
1213 offset = cma_user_data_offset(listen_id->id.ps); 1193 offset = cma_user_data_offset(listen_id->id.ps);
1214 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1194 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1215 if (ib_event->event == IB_CM_SIDR_REQ_RECEIVED) { 1195 if (listen_id->id.qp_type == IB_QPT_UD) {
1216 conn_id = cma_new_udp_id(&listen_id->id, ib_event); 1196 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1217 event.param.ud.private_data = ib_event->private_data + offset; 1197 event.param.ud.private_data = ib_event->private_data + offset;
1218 event.param.ud.private_data_len = 1198 event.param.ud.private_data_len =
@@ -1224,13 +1204,13 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1224 } 1204 }
1225 if (!conn_id) { 1205 if (!conn_id) {
1226 ret = -ENOMEM; 1206 ret = -ENOMEM;
1227 goto err1; 1207 goto out;
1228 } 1208 }
1229 1209
1230 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING); 1210 mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
1231 ret = cma_acquire_dev(conn_id); 1211 ret = cma_acquire_dev(conn_id);
1232 if (ret) 1212 if (ret)
1233 goto err2; 1213 goto release_conn_id;
1234 1214
1235 conn_id->cm_id.ib = cm_id; 1215 conn_id->cm_id.ib = cm_id;
1236 cm_id->context = conn_id; 1216 cm_id->context = conn_id;
@@ -1242,33 +1222,31 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1242 */ 1222 */
1243 atomic_inc(&conn_id->refcount); 1223 atomic_inc(&conn_id->refcount);
1244 ret = conn_id->id.event_handler(&conn_id->id, &event); 1224 ret = conn_id->id.event_handler(&conn_id->id, &event);
1245 if (ret) 1225 if (!ret) {
1246 goto err3; 1226 /*
1247 1227 * Acquire mutex to prevent user executing rdma_destroy_id()
1248 /* 1228 * while we're accessing the cm_id.
1249 * Acquire mutex to prevent user executing rdma_destroy_id() 1229 */
1250 * while we're accessing the cm_id. 1230 mutex_lock(&lock);
1251 */ 1231 if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
1252 mutex_lock(&lock); 1232 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
1253 if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD)) 1233 mutex_unlock(&lock);
1254 ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0); 1234 mutex_unlock(&conn_id->handler_mutex);
1255 mutex_unlock(&lock); 1235 cma_deref_id(conn_id);
1256 mutex_unlock(&conn_id->handler_mutex); 1236 goto out;
1257 mutex_unlock(&listen_id->handler_mutex); 1237 }
1258 cma_deref_id(conn_id); 1238 cma_deref_id(conn_id);
1259 return 0;
1260 1239
1261err3:
1262 cma_deref_id(conn_id);
1263 /* Destroy the CM ID by returning a non-zero value. */ 1240 /* Destroy the CM ID by returning a non-zero value. */
1264 conn_id->cm_id.ib = NULL; 1241 conn_id->cm_id.ib = NULL;
1265err2: 1242
1243release_conn_id:
1266 cma_exch(conn_id, RDMA_CM_DESTROYING); 1244 cma_exch(conn_id, RDMA_CM_DESTROYING);
1267 mutex_unlock(&conn_id->handler_mutex); 1245 mutex_unlock(&conn_id->handler_mutex);
1268err1: 1246 rdma_destroy_id(&conn_id->id);
1247
1248out:
1269 mutex_unlock(&listen_id->handler_mutex); 1249 mutex_unlock(&listen_id->handler_mutex);
1270 if (conn_id)
1271 rdma_destroy_id(&conn_id->id);
1272 return ret; 1250 return ret;
1273} 1251}
1274 1252
@@ -1302,10 +1280,8 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1302 } else { 1280 } else {
1303 cma_set_ip_ver(cma_data, 4); 1281 cma_set_ip_ver(cma_data, 4);
1304 cma_set_ip_ver(cma_mask, 0xF); 1282 cma_set_ip_ver(cma_mask, 0xF);
1305 if (!cma_any_addr(addr)) { 1283 cma_data->dst_addr.ip4.addr = ip4_addr;
1306 cma_data->dst_addr.ip4.addr = ip4_addr; 1284 cma_mask->dst_addr.ip4.addr = htonl(~0);
1307 cma_mask->dst_addr.ip4.addr = htonl(~0);
1308 }
1309 } 1285 }
1310 break; 1286 break;
1311 case AF_INET6: 1287 case AF_INET6:
@@ -1319,11 +1295,9 @@ static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1319 } else { 1295 } else {
1320 cma_set_ip_ver(cma_data, 6); 1296 cma_set_ip_ver(cma_data, 6);
1321 cma_set_ip_ver(cma_mask, 0xF); 1297 cma_set_ip_ver(cma_mask, 0xF);
1322 if (!cma_any_addr(addr)) { 1298 cma_data->dst_addr.ip6 = ip6_addr;
1323 cma_data->dst_addr.ip6 = ip6_addr; 1299 memset(&cma_mask->dst_addr.ip6, 0xFF,
1324 memset(&cma_mask->dst_addr.ip6, 0xFF, 1300 sizeof cma_mask->dst_addr.ip6);
1325 sizeof cma_mask->dst_addr.ip6);
1326 }
1327 } 1301 }
1328 break; 1302 break;
1329 default: 1303 default:
@@ -1354,8 +1328,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1354 switch (iw_event->status) { 1328 switch (iw_event->status) {
1355 case 0: 1329 case 0:
1356 event.event = RDMA_CM_EVENT_ESTABLISHED; 1330 event.event = RDMA_CM_EVENT_ESTABLISHED;
1357 event.param.conn.initiator_depth = iw_event->ird;
1358 event.param.conn.responder_resources = iw_event->ord;
1359 break; 1331 break;
1360 case -ECONNRESET: 1332 case -ECONNRESET:
1361 case -ECONNREFUSED: 1333 case -ECONNREFUSED:
@@ -1371,8 +1343,6 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1371 break; 1343 break;
1372 case IW_CM_EVENT_ESTABLISHED: 1344 case IW_CM_EVENT_ESTABLISHED:
1373 event.event = RDMA_CM_EVENT_ESTABLISHED; 1345 event.event = RDMA_CM_EVENT_ESTABLISHED;
1374 event.param.conn.initiator_depth = iw_event->ird;
1375 event.param.conn.responder_resources = iw_event->ord;
1376 break; 1346 break;
1377 default: 1347 default:
1378 BUG_ON(1); 1348 BUG_ON(1);
@@ -1463,8 +1433,8 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1463 event.event = RDMA_CM_EVENT_CONNECT_REQUEST; 1433 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1464 event.param.conn.private_data = iw_event->private_data; 1434 event.param.conn.private_data = iw_event->private_data;
1465 event.param.conn.private_data_len = iw_event->private_data_len; 1435 event.param.conn.private_data_len = iw_event->private_data_len;
1466 event.param.conn.initiator_depth = iw_event->ird; 1436 event.param.conn.initiator_depth = attr.max_qp_init_rd_atom;
1467 event.param.conn.responder_resources = iw_event->ord; 1437 event.param.conn.responder_resources = attr.max_qp_rd_atom;
1468 1438
1469 /* 1439 /*
1470 * Protect against the user destroying conn_id from another thread 1440 * Protect against the user destroying conn_id from another thread
@@ -1508,7 +1478,7 @@ static int cma_ib_listen(struct rdma_id_private *id_priv)
1508 1478
1509 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 1479 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
1510 svc_id = cma_get_service_id(id_priv->id.ps, addr); 1480 svc_id = cma_get_service_id(id_priv->id.ps, addr);
1511 if (cma_any_addr(addr) && !id_priv->afonly) 1481 if (cma_any_addr(addr))
1512 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL); 1482 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1513 else { 1483 else {
1514 cma_set_compare_data(id_priv->id.ps, addr, &compare_data); 1484 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
@@ -1582,7 +1552,6 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1582 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list); 1552 list_add_tail(&dev_id_priv->listen_list, &id_priv->listen_list);
1583 atomic_inc(&id_priv->refcount); 1553 atomic_inc(&id_priv->refcount);
1584 dev_id_priv->internal_id = 1; 1554 dev_id_priv->internal_id = 1;
1585 dev_id_priv->afonly = id_priv->afonly;
1586 1555
1587 ret = rdma_listen(id, id_priv->backlog); 1556 ret = rdma_listen(id, id_priv->backlog);
1588 if (ret) 1557 if (ret)
@@ -1839,10 +1808,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
1839 route->path_rec->reversible = 1; 1808 route->path_rec->reversible = 1;
1840 route->path_rec->pkey = cpu_to_be16(0xffff); 1809 route->path_rec->pkey = cpu_to_be16(0xffff);
1841 route->path_rec->mtu_selector = IB_SA_EQ; 1810 route->path_rec->mtu_selector = IB_SA_EQ;
1842 route->path_rec->sl = netdev_get_prio_tc_map( 1811 route->path_rec->sl = id_priv->tos >> 5;
1843 ndev->priv_flags & IFF_802_1Q_VLAN ?
1844 vlan_dev_real_dev(ndev) : ndev,
1845 rt_tos2priority(id_priv->tos));
1846 1812
1847 route->path_rec->mtu = iboe_get_mtu(ndev->mtu); 1813 route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
1848 route->path_rec->rate_selector = IB_SA_EQ; 1814 route->path_rec->rate_selector = IB_SA_EQ;
@@ -2021,11 +1987,11 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
2021 if (cma_zero_addr(src)) { 1987 if (cma_zero_addr(src)) {
2022 dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; 1988 dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
2023 if ((src->sa_family = dst->sa_family) == AF_INET) { 1989 if ((src->sa_family = dst->sa_family) == AF_INET) {
2024 ((struct sockaddr_in *)src)->sin_addr = 1990 ((struct sockaddr_in *) src)->sin_addr.s_addr =
2025 ((struct sockaddr_in *)dst)->sin_addr; 1991 ((struct sockaddr_in *) dst)->sin_addr.s_addr;
2026 } else { 1992 } else {
2027 ((struct sockaddr_in6 *)src)->sin6_addr = 1993 ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
2028 ((struct sockaddr_in6 *)dst)->sin6_addr; 1994 &((struct sockaddr_in6 *) dst)->sin6_addr);
2029 } 1995 }
2030 } 1996 }
2031 1997
@@ -2108,26 +2074,6 @@ int rdma_set_reuseaddr(struct rdma_cm_id *id, int reuse)
2108} 2074}
2109EXPORT_SYMBOL(rdma_set_reuseaddr); 2075EXPORT_SYMBOL(rdma_set_reuseaddr);
2110 2076
2111int rdma_set_afonly(struct rdma_cm_id *id, int afonly)
2112{
2113 struct rdma_id_private *id_priv;
2114 unsigned long flags;
2115 int ret;
2116
2117 id_priv = container_of(id, struct rdma_id_private, id);
2118 spin_lock_irqsave(&id_priv->lock, flags);
2119 if (id_priv->state == RDMA_CM_IDLE || id_priv->state == RDMA_CM_ADDR_BOUND) {
2120 id_priv->options |= (1 << CMA_OPTION_AFONLY);
2121 id_priv->afonly = afonly;
2122 ret = 0;
2123 } else {
2124 ret = -EINVAL;
2125 }
2126 spin_unlock_irqrestore(&id_priv->lock, flags);
2127 return ret;
2128}
2129EXPORT_SYMBOL(rdma_set_afonly);
2130
2131static void cma_bind_port(struct rdma_bind_list *bind_list, 2077static void cma_bind_port(struct rdma_bind_list *bind_list,
2132 struct rdma_id_private *id_priv) 2078 struct rdma_id_private *id_priv)
2133{ 2079{
@@ -2217,24 +2163,22 @@ static int cma_check_port(struct rdma_bind_list *bind_list,
2217 struct hlist_node *node; 2163 struct hlist_node *node;
2218 2164
2219 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr; 2165 addr = (struct sockaddr *) &id_priv->id.route.addr.src_addr;
2166 if (cma_any_addr(addr) && !reuseaddr)
2167 return -EADDRNOTAVAIL;
2168
2220 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) { 2169 hlist_for_each_entry(cur_id, node, &bind_list->owners, node) {
2221 if (id_priv == cur_id) 2170 if (id_priv == cur_id)
2222 continue; 2171 continue;
2223 2172
2224 if ((cur_id->state != RDMA_CM_LISTEN) && reuseaddr && 2173 if ((cur_id->state == RDMA_CM_LISTEN) ||
2225 cur_id->reuseaddr) 2174 !reuseaddr || !cur_id->reuseaddr) {
2226 continue; 2175 cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr;
2227 2176 if (cma_any_addr(cur_addr))
2228 cur_addr = (struct sockaddr *) &cur_id->id.route.addr.src_addr; 2177 return -EADDRNOTAVAIL;
2229 if (id_priv->afonly && cur_id->afonly &&
2230 (addr->sa_family != cur_addr->sa_family))
2231 continue;
2232
2233 if (cma_any_addr(addr) || cma_any_addr(cur_addr))
2234 return -EADDRNOTAVAIL;
2235 2178
2236 if (!cma_addr_cmp(addr, cur_addr)) 2179 if (!cma_addr_cmp(addr, cur_addr))
2237 return -EADDRINUSE; 2180 return -EADDRINUSE;
2181 }
2238 } 2182 }
2239 return 0; 2183 return 0;
2240} 2184}
@@ -2290,9 +2234,6 @@ static int cma_get_port(struct rdma_id_private *id_priv)
2290 case RDMA_PS_IPOIB: 2234 case RDMA_PS_IPOIB:
2291 ps = &ipoib_ps; 2235 ps = &ipoib_ps;
2292 break; 2236 break;
2293 case RDMA_PS_IB:
2294 ps = &ib_ps;
2295 break;
2296 default: 2237 default:
2297 return -EPROTONOSUPPORT; 2238 return -EPROTONOSUPPORT;
2298 } 2239 }
@@ -2310,7 +2251,7 @@ static int cma_get_port(struct rdma_id_private *id_priv)
2310static int cma_check_linklocal(struct rdma_dev_addr *dev_addr, 2251static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
2311 struct sockaddr *addr) 2252 struct sockaddr *addr)
2312{ 2253{
2313#if IS_ENABLED(CONFIG_IPV6) 2254#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
2314 struct sockaddr_in6 *sin6; 2255 struct sockaddr_in6 *sin6;
2315 2256
2316 if (addr->sa_family != AF_INET6) 2257 if (addr->sa_family != AF_INET6)
@@ -2403,14 +2344,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2403 } 2344 }
2404 2345
2405 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr)); 2346 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2406 if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
2407 if (addr->sa_family == AF_INET)
2408 id_priv->afonly = 1;
2409#if IS_ENABLED(CONFIG_IPV6)
2410 else if (addr->sa_family == AF_INET6)
2411 id_priv->afonly = init_net.ipv6.sysctl.bindv6only;
2412#endif
2413 }
2414 ret = cma_get_port(id_priv); 2347 ret = cma_get_port(id_priv);
2415 if (ret) 2348 if (ret)
2416 goto err2; 2349 goto err2;
@@ -2559,9 +2492,6 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2559 2492
2560 req.private_data_len = sizeof(struct cma_hdr) + 2493 req.private_data_len = sizeof(struct cma_hdr) +
2561 conn_param->private_data_len; 2494 conn_param->private_data_len;
2562 if (req.private_data_len < conn_param->private_data_len)
2563 return -EINVAL;
2564
2565 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2495 req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2566 if (!req.private_data) 2496 if (!req.private_data)
2567 return -ENOMEM; 2497 return -ENOMEM;
@@ -2611,9 +2541,6 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2611 memset(&req, 0, sizeof req); 2541 memset(&req, 0, sizeof req);
2612 offset = cma_user_data_offset(id_priv->id.ps); 2542 offset = cma_user_data_offset(id_priv->id.ps);
2613 req.private_data_len = offset + conn_param->private_data_len; 2543 req.private_data_len = offset + conn_param->private_data_len;
2614 if (req.private_data_len < conn_param->private_data_len)
2615 return -EINVAL;
2616
2617 private_data = kzalloc(req.private_data_len, GFP_ATOMIC); 2544 private_data = kzalloc(req.private_data_len, GFP_ATOMIC);
2618 if (!private_data) 2545 if (!private_data)
2619 return -ENOMEM; 2546 return -ENOMEM;
@@ -2642,13 +2569,13 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
2642 req.service_id = cma_get_service_id(id_priv->id.ps, 2569 req.service_id = cma_get_service_id(id_priv->id.ps,
2643 (struct sockaddr *) &route->addr.dst_addr); 2570 (struct sockaddr *) &route->addr.dst_addr);
2644 req.qp_num = id_priv->qp_num; 2571 req.qp_num = id_priv->qp_num;
2645 req.qp_type = id_priv->id.qp_type; 2572 req.qp_type = IB_QPT_RC;
2646 req.starting_psn = id_priv->seq_num; 2573 req.starting_psn = id_priv->seq_num;
2647 req.responder_resources = conn_param->responder_resources; 2574 req.responder_resources = conn_param->responder_resources;
2648 req.initiator_depth = conn_param->initiator_depth; 2575 req.initiator_depth = conn_param->initiator_depth;
2649 req.flow_control = conn_param->flow_control; 2576 req.flow_control = conn_param->flow_control;
2650 req.retry_count = min_t(u8, 7, conn_param->retry_count); 2577 req.retry_count = conn_param->retry_count;
2651 req.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 2578 req.rnr_retry_count = conn_param->rnr_retry_count;
2652 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 2579 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2653 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT; 2580 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2654 req.max_cm_retries = CMA_MAX_CM_RETRIES; 2581 req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -2689,16 +2616,14 @@ static int cma_connect_iw(struct rdma_id_private *id_priv,
2689 if (ret) 2616 if (ret)
2690 goto out; 2617 goto out;
2691 2618
2692 if (conn_param) { 2619 iw_param.ord = conn_param->initiator_depth;
2693 iw_param.ord = conn_param->initiator_depth; 2620 iw_param.ird = conn_param->responder_resources;
2694 iw_param.ird = conn_param->responder_resources; 2621 iw_param.private_data = conn_param->private_data;
2695 iw_param.private_data = conn_param->private_data; 2622 iw_param.private_data_len = conn_param->private_data_len;
2696 iw_param.private_data_len = conn_param->private_data_len; 2623 if (id_priv->id.qp)
2697 iw_param.qpn = id_priv->id.qp ? id_priv->qp_num : conn_param->qp_num;
2698 } else {
2699 memset(&iw_param, 0, sizeof iw_param);
2700 iw_param.qpn = id_priv->qp_num; 2624 iw_param.qpn = id_priv->qp_num;
2701 } 2625 else
2626 iw_param.qpn = conn_param->qp_num;
2702 ret = iw_cm_connect(cm_id, &iw_param); 2627 ret = iw_cm_connect(cm_id, &iw_param);
2703out: 2628out:
2704 if (ret) { 2629 if (ret) {
@@ -2769,7 +2694,7 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
2769 rep.initiator_depth = conn_param->initiator_depth; 2694 rep.initiator_depth = conn_param->initiator_depth;
2770 rep.failover_accepted = 0; 2695 rep.failover_accepted = 0;
2771 rep.flow_control = conn_param->flow_control; 2696 rep.flow_control = conn_param->flow_control;
2772 rep.rnr_retry_count = min_t(u8, 7, conn_param->rnr_retry_count); 2697 rep.rnr_retry_count = conn_param->rnr_retry_count;
2773 rep.srq = id_priv->srq ? 1 : 0; 2698 rep.srq = id_priv->srq ? 1 : 0;
2774 2699
2775 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep); 2700 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
@@ -2840,20 +2765,14 @@ int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2840 2765
2841 switch (rdma_node_get_transport(id->device->node_type)) { 2766 switch (rdma_node_get_transport(id->device->node_type)) {
2842 case RDMA_TRANSPORT_IB: 2767 case RDMA_TRANSPORT_IB:
2843 if (id->qp_type == IB_QPT_UD) { 2768 if (id->qp_type == IB_QPT_UD)
2844 if (conn_param) 2769 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2845 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2770 conn_param->private_data,
2846 conn_param->private_data, 2771 conn_param->private_data_len);
2847 conn_param->private_data_len); 2772 else if (conn_param)
2848 else 2773 ret = cma_accept_ib(id_priv, conn_param);
2849 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS, 2774 else
2850 NULL, 0); 2775 ret = cma_rep_recv(id_priv);
2851 } else {
2852 if (conn_param)
2853 ret = cma_accept_ib(id_priv, conn_param);
2854 else
2855 ret = cma_rep_recv(id_priv);
2856 }
2857 break; 2776 break;
2858 case RDMA_TRANSPORT_IWARP: 2777 case RDMA_TRANSPORT_IWARP:
2859 ret = cma_accept_iw(id_priv, conn_param); 2778 ret = cma_accept_iw(id_priv, conn_param);
@@ -2972,7 +2891,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2972 mutex_lock(&id_priv->qp_mutex); 2891 mutex_lock(&id_priv->qp_mutex);
2973 if (!status && id_priv->id.qp) 2892 if (!status && id_priv->id.qp)
2974 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, 2893 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2975 be16_to_cpu(multicast->rec.mlid)); 2894 multicast->rec.mlid);
2976 mutex_unlock(&id_priv->qp_mutex); 2895 mutex_unlock(&id_priv->qp_mutex);
2977 2896
2978 memset(&event, 0, sizeof event); 2897 memset(&event, 0, sizeof event);
@@ -3057,16 +2976,16 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
3057 2976
3058 if (id_priv->id.ps == RDMA_PS_IPOIB) 2977 if (id_priv->id.ps == RDMA_PS_IPOIB)
3059 comp_mask |= IB_SA_MCMEMBER_REC_RATE | 2978 comp_mask |= IB_SA_MCMEMBER_REC_RATE |
3060 IB_SA_MCMEMBER_REC_RATE_SELECTOR | 2979 IB_SA_MCMEMBER_REC_RATE_SELECTOR;
3061 IB_SA_MCMEMBER_REC_MTU_SELECTOR |
3062 IB_SA_MCMEMBER_REC_MTU |
3063 IB_SA_MCMEMBER_REC_HOP_LIMIT;
3064 2980
3065 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device, 2981 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
3066 id_priv->id.port_num, &rec, 2982 id_priv->id.port_num, &rec,
3067 comp_mask, GFP_KERNEL, 2983 comp_mask, GFP_KERNEL,
3068 cma_ib_mc_handler, mc); 2984 cma_ib_mc_handler, mc);
3069 return PTR_RET(mc->multicast.ib); 2985 if (IS_ERR(mc->multicast.ib))
2986 return PTR_ERR(mc->multicast.ib);
2987
2988 return 0;
3070} 2989}
3071 2990
3072static void iboe_mcast_work_handler(struct work_struct *work) 2991static void iboe_mcast_work_handler(struct work_struct *work)
@@ -3233,7 +3152,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
3233 if (id->qp) 3152 if (id->qp)
3234 ib_detach_mcast(id->qp, 3153 ib_detach_mcast(id->qp,
3235 &mc->multicast.ib->rec.mgid, 3154 &mc->multicast.ib->rec.mgid,
3236 be16_to_cpu(mc->multicast.ib->rec.mlid)); 3155 mc->multicast.ib->rec.mlid);
3237 if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { 3156 if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
3238 switch (rdma_port_get_link_layer(id->device, id->port_num)) { 3157 switch (rdma_port_get_link_layer(id->device, id->port_num)) {
3239 case IB_LINK_LAYER_INFINIBAND: 3158 case IB_LINK_LAYER_INFINIBAND:
@@ -3497,8 +3416,7 @@ out:
3497} 3416}
3498 3417
3499static const struct ibnl_client_cbs cma_cb_table[] = { 3418static const struct ibnl_client_cbs cma_cb_table[] = {
3500 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats, 3419 [RDMA_NL_RDMA_CM_ID_STATS] = { .dump = cma_get_id_stats },
3501 .module = THIS_MODULE },
3502}; 3420};
3503 3421
3504static int __init cma_init(void) 3422static int __init cma_init(void)
@@ -3542,7 +3460,6 @@ static void __exit cma_cleanup(void)
3542 idr_destroy(&tcp_ps); 3460 idr_destroy(&tcp_ps);
3543 idr_destroy(&udp_ps); 3461 idr_destroy(&udp_ps);
3544 idr_destroy(&ipoib_ps); 3462 idr_destroy(&ipoib_ps);
3545 idr_destroy(&ib_ps);
3546} 3463}
3547 3464
3548module_init(cma_init); 3465module_init(cma_init);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 18c1ece765f..e711de400a0 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -707,28 +707,18 @@ int ib_find_pkey(struct ib_device *device,
707{ 707{
708 int ret, i; 708 int ret, i;
709 u16 tmp_pkey; 709 u16 tmp_pkey;
710 int partial_ix = -1;
711 710
712 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) { 711 for (i = 0; i < device->pkey_tbl_len[port_num - start_port(device)]; ++i) {
713 ret = ib_query_pkey(device, port_num, i, &tmp_pkey); 712 ret = ib_query_pkey(device, port_num, i, &tmp_pkey);
714 if (ret) 713 if (ret)
715 return ret; 714 return ret;
715
716 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) { 716 if ((pkey & 0x7fff) == (tmp_pkey & 0x7fff)) {
717 /* if there is full-member pkey take it.*/ 717 *index = i;
718 if (tmp_pkey & 0x8000) { 718 return 0;
719 *index = i;
720 return 0;
721 }
722 if (partial_ix < 0)
723 partial_ix = i;
724 } 719 }
725 } 720 }
726 721
727 /*no full-member, if exists take the limited*/
728 if (partial_ix >= 0) {
729 *index = partial_ix;
730 return 0;
731 }
732 return -ENOENT; 722 return -ENOENT;
733} 723}
734EXPORT_SYMBOL(ib_find_pkey); 724EXPORT_SYMBOL(ib_find_pkey);
diff --git a/drivers/infiniband/core/fmr_pool.c b/drivers/infiniband/core/fmr_pool.c
index 176c8f90f2b..4507043d24c 100644
--- a/drivers/infiniband/core/fmr_pool.c
+++ b/drivers/infiniband/core/fmr_pool.c
@@ -33,7 +33,6 @@
33 33
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/spinlock.h> 35#include <linux/spinlock.h>
36#include <linux/export.h>
37#include <linux/slab.h> 36#include <linux/slab.h>
38#include <linux/jhash.h> 37#include <linux/jhash.h>
39#include <linux/kthread.h> 38#include <linux/kthread.h>
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 0bb99bb3880..a9c042345c6 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -45,7 +45,6 @@
45#include <linux/workqueue.h> 45#include <linux/workqueue.h>
46#include <linux/completion.h> 46#include <linux/completion.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/module.h>
49 48
50#include <rdma/iw_cm.h> 49#include <rdma/iw_cm.h>
51#include <rdma/ib_addr.h> 50#include <rdma/ib_addr.h>
@@ -624,6 +623,17 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
624 */ 623 */
625 BUG_ON(iw_event->status); 624 BUG_ON(iw_event->status);
626 625
626 /*
627 * We could be destroying the listening id. If so, ignore this
628 * upcall.
629 */
630 spin_lock_irqsave(&listen_id_priv->lock, flags);
631 if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
632 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
633 goto out;
634 }
635 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
636
627 cm_id = iw_create_cm_id(listen_id_priv->id.device, 637 cm_id = iw_create_cm_id(listen_id_priv->id.device,
628 listen_id_priv->id.cm_handler, 638 listen_id_priv->id.cm_handler,
629 listen_id_priv->id.context); 639 listen_id_priv->id.context);
@@ -638,19 +648,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
638 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 648 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
639 cm_id_priv->state = IW_CM_STATE_CONN_RECV; 649 cm_id_priv->state = IW_CM_STATE_CONN_RECV;
640 650
641 /*
642 * We could be destroying the listening id. If so, ignore this
643 * upcall.
644 */
645 spin_lock_irqsave(&listen_id_priv->lock, flags);
646 if (listen_id_priv->state != IW_CM_STATE_LISTEN) {
647 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
648 iw_cm_reject(cm_id, NULL, 0);
649 iw_destroy_cm_id(cm_id);
650 goto out;
651 }
652 spin_unlock_irqrestore(&listen_id_priv->lock, flags);
653
654 ret = alloc_work_entries(cm_id_priv, 3); 651 ret = alloc_work_entries(cm_id_priv, 3);
655 if (ret) { 652 if (ret) {
656 iw_cm_reject(cm_id, NULL, 0); 653 iw_cm_reject(cm_id, NULL, 0);
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index dc3fd1e8af0..b4d8672a3e4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -35,7 +35,6 @@
35 */ 35 */
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/module.h>
39#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
40 39
41#include "mad_priv.h" 40#include "mad_priv.h"
@@ -1597,9 +1596,6 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1597 mad->mad_hdr.class_version].class; 1596 mad->mad_hdr.class_version].class;
1598 if (!class) 1597 if (!class)
1599 goto out; 1598 goto out;
1600 if (convert_mgmt_class(mad->mad_hdr.mgmt_class) >=
1601 IB_MGMT_MAX_METHODS)
1602 goto out;
1603 method = class->method_table[convert_mgmt_class( 1599 method = class->method_table[convert_mgmt_class(
1604 mad->mad_hdr.mgmt_class)]; 1600 mad->mad_hdr.mgmt_class)];
1605 if (method) 1601 if (method)
@@ -1842,26 +1838,6 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1842 } 1838 }
1843} 1839}
1844 1840
1845static bool generate_unmatched_resp(struct ib_mad_private *recv,
1846 struct ib_mad_private *response)
1847{
1848 if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET ||
1849 recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) {
1850 memcpy(response, recv, sizeof *response);
1851 response->header.recv_wc.wc = &response->header.wc;
1852 response->header.recv_wc.recv_buf.mad = &response->mad.mad;
1853 response->header.recv_wc.recv_buf.grh = &response->grh;
1854 response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
1855 response->mad.mad.mad_hdr.status =
1856 cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1857 if (recv->mad.mad.mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1858 response->mad.mad.mad_hdr.status |= IB_SMP_DIRECTION;
1859
1860 return true;
1861 } else {
1862 return false;
1863 }
1864}
1865static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, 1841static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1866 struct ib_wc *wc) 1842 struct ib_wc *wc)
1867{ 1843{
@@ -1871,7 +1847,6 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1871 struct ib_mad_list_head *mad_list; 1847 struct ib_mad_list_head *mad_list;
1872 struct ib_mad_agent_private *mad_agent; 1848 struct ib_mad_agent_private *mad_agent;
1873 int port_num; 1849 int port_num;
1874 int ret = IB_MAD_RESULT_SUCCESS;
1875 1850
1876 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id; 1851 mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1877 qp_info = mad_list->mad_queue->qp_info; 1852 qp_info = mad_list->mad_queue->qp_info;
@@ -1955,6 +1930,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1955local: 1930local:
1956 /* Give driver "right of first refusal" on incoming MAD */ 1931 /* Give driver "right of first refusal" on incoming MAD */
1957 if (port_priv->device->process_mad) { 1932 if (port_priv->device->process_mad) {
1933 int ret;
1934
1958 ret = port_priv->device->process_mad(port_priv->device, 0, 1935 ret = port_priv->device->process_mad(port_priv->device, 0,
1959 port_priv->port_num, 1936 port_priv->port_num,
1960 wc, &recv->grh, 1937 wc, &recv->grh,
@@ -1982,10 +1959,6 @@ local:
1982 * or via recv_handler in ib_mad_complete_recv() 1959 * or via recv_handler in ib_mad_complete_recv()
1983 */ 1960 */
1984 recv = NULL; 1961 recv = NULL;
1985 } else if ((ret & IB_MAD_RESULT_SUCCESS) &&
1986 generate_unmatched_resp(recv, response)) {
1987 agent_send_response(&response->mad.mad, &recv->grh, wc,
1988 port_priv->device, port_num, qp_info->qp->qp_num);
1989 } 1962 }
1990 1963
1991out: 1964out:
@@ -2004,7 +1977,7 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2004 unsigned long delay; 1977 unsigned long delay;
2005 1978
2006 if (list_empty(&mad_agent_priv->wait_list)) { 1979 if (list_empty(&mad_agent_priv->wait_list)) {
2007 cancel_delayed_work(&mad_agent_priv->timed_work); 1980 __cancel_delayed_work(&mad_agent_priv->timed_work);
2008 } else { 1981 } else {
2009 mad_send_wr = list_entry(mad_agent_priv->wait_list.next, 1982 mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2010 struct ib_mad_send_wr_private, 1983 struct ib_mad_send_wr_private,
@@ -2013,11 +1986,13 @@ static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2013 if (time_after(mad_agent_priv->timeout, 1986 if (time_after(mad_agent_priv->timeout,
2014 mad_send_wr->timeout)) { 1987 mad_send_wr->timeout)) {
2015 mad_agent_priv->timeout = mad_send_wr->timeout; 1988 mad_agent_priv->timeout = mad_send_wr->timeout;
1989 __cancel_delayed_work(&mad_agent_priv->timed_work);
2016 delay = mad_send_wr->timeout - jiffies; 1990 delay = mad_send_wr->timeout - jiffies;
2017 if ((long)delay <= 0) 1991 if ((long)delay <= 0)
2018 delay = 1; 1992 delay = 1;
2019 mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, 1993 queue_delayed_work(mad_agent_priv->qp_info->
2020 &mad_agent_priv->timed_work, delay); 1994 port_priv->wq,
1995 &mad_agent_priv->timed_work, delay);
2021 } 1996 }
2022 } 1997 }
2023} 1998}
@@ -2050,9 +2025,11 @@ static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2050 list_add(&mad_send_wr->agent_list, list_item); 2025 list_add(&mad_send_wr->agent_list, list_item);
2051 2026
2052 /* Reschedule a work item if we have a shorter timeout */ 2027 /* Reschedule a work item if we have a shorter timeout */
2053 if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) 2028 if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
2054 mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq, 2029 __cancel_delayed_work(&mad_agent_priv->timed_work);
2055 &mad_agent_priv->timed_work, delay); 2030 queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2031 &mad_agent_priv->timed_work, delay);
2032 }
2056} 2033}
2057 2034
2058void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr, 2035void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d2360a8ef0b..68b4162fd9d 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -34,7 +34,6 @@
34#include <linux/dma-mapping.h> 34#include <linux/dma-mapping.h>
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/interrupt.h> 36#include <linux/interrupt.h>
37#include <linux/export.h>
38#include <linux/slab.h> 37#include <linux/slab.h>
39#include <linux/bitops.h> 38#include <linux/bitops.h>
40#include <linux/random.h> 39#include <linux/random.h>
diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c
index da06abde9e0..9227f4acd79 100644
--- a/drivers/infiniband/core/netlink.c
+++ b/drivers/infiniband/core/netlink.c
@@ -32,7 +32,6 @@
32 32
33#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ 33#define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__
34 34
35#include <linux/export.h>
36#include <net/netlink.h> 35#include <net/netlink.h>
37#include <net/net_namespace.h> 36#include <net/net_namespace.h>
38#include <net/sock.h> 37#include <net/sock.h>
@@ -108,14 +107,12 @@ void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq,
108 unsigned char *prev_tail; 107 unsigned char *prev_tail;
109 108
110 prev_tail = skb_tail_pointer(skb); 109 prev_tail = skb_tail_pointer(skb);
111 *nlh = nlmsg_put(skb, 0, seq, RDMA_NL_GET_TYPE(client, op), 110 *nlh = NLMSG_NEW(skb, 0, seq, RDMA_NL_GET_TYPE(client, op),
112 len, NLM_F_MULTI); 111 len, NLM_F_MULTI);
113 if (!*nlh)
114 goto out_nlmsg_trim;
115 (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail; 112 (*nlh)->nlmsg_len = skb_tail_pointer(skb) - prev_tail;
116 return nlmsg_data(*nlh); 113 return NLMSG_DATA(*nlh);
117 114
118out_nlmsg_trim: 115nlmsg_failure:
119 nlmsg_trim(skb, prev_tail); 116 nlmsg_trim(skb, prev_tail);
120 return NULL; 117 return NULL;
121} 118}
@@ -127,8 +124,7 @@ int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh,
127 unsigned char *prev_tail; 124 unsigned char *prev_tail;
128 125
129 prev_tail = skb_tail_pointer(skb); 126 prev_tail = skb_tail_pointer(skb);
130 if (nla_put(skb, type, len, data)) 127 NLA_PUT(skb, type, len, data);
131 goto nla_put_failure;
132 nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail; 128 nlh->nlmsg_len += skb_tail_pointer(skb) - prev_tail;
133 return 0; 129 return 0;
134 130
@@ -150,14 +146,9 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
150 if (op < 0 || op >= client->nops || 146 if (op < 0 || op >= client->nops ||
151 !client->cb_table[RDMA_NL_GET_OP(op)].dump) 147 !client->cb_table[RDMA_NL_GET_OP(op)].dump)
152 return -EINVAL; 148 return -EINVAL;
153 149 return netlink_dump_start(nls, skb, nlh,
154 { 150 client->cb_table[op].dump,
155 struct netlink_dump_control c = { 151 NULL, 0);
156 .dump = client->cb_table[op].dump,
157 .module = client->cb_table[op].module,
158 };
159 return netlink_dump_start(nls, skb, nlh, &c);
160 }
161 } 152 }
162 } 153 }
163 154
@@ -174,11 +165,8 @@ static void ibnl_rcv(struct sk_buff *skb)
174 165
175int __init ibnl_init(void) 166int __init ibnl_init(void)
176{ 167{
177 struct netlink_kernel_cfg cfg = { 168 nls = netlink_kernel_create(&init_net, NETLINK_RDMA, 0, ibnl_rcv,
178 .input = ibnl_rcv, 169 NULL, THIS_MODULE);
179 };
180
181 nls = netlink_kernel_create(&init_net, NETLINK_RDMA, &cfg);
182 if (!nls) { 170 if (!nls) {
183 pr_warn("Failed to create netlink socket\n"); 171 pr_warn("Failed to create netlink socket\n");
184 return -ENOMEM; 172 return -ENOMEM;
diff --git a/drivers/infiniband/core/packer.c b/drivers/infiniband/core/packer.c
index 1b65986c0be..019bd4b0863 100644
--- a/drivers/infiniband/core/packer.c
+++ b/drivers/infiniband/core/packer.c
@@ -31,7 +31,6 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/export.h>
35#include <linux/string.h> 34#include <linux/string.h>
36 35
37#include <rdma/ib_pack.h> 36#include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a8905abc56e..fbbfa24cf57 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -94,12 +94,6 @@ struct ib_sa_path_query {
94 struct ib_sa_query sa_query; 94 struct ib_sa_query sa_query;
95}; 95};
96 96
97struct ib_sa_guidinfo_query {
98 void (*callback)(int, struct ib_sa_guidinfo_rec *, void *);
99 void *context;
100 struct ib_sa_query sa_query;
101};
102
103struct ib_sa_mcmember_query { 97struct ib_sa_mcmember_query {
104 void (*callback)(int, struct ib_sa_mcmember_rec *, void *); 98 void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
105 void *context; 99 void *context;
@@ -353,34 +347,6 @@ static const struct ib_field service_rec_table[] = {
353 .size_bits = 2*64 }, 347 .size_bits = 2*64 },
354}; 348};
355 349
356#define GUIDINFO_REC_FIELD(field) \
357 .struct_offset_bytes = offsetof(struct ib_sa_guidinfo_rec, field), \
358 .struct_size_bytes = sizeof((struct ib_sa_guidinfo_rec *) 0)->field, \
359 .field_name = "sa_guidinfo_rec:" #field
360
361static const struct ib_field guidinfo_rec_table[] = {
362 { GUIDINFO_REC_FIELD(lid),
363 .offset_words = 0,
364 .offset_bits = 0,
365 .size_bits = 16 },
366 { GUIDINFO_REC_FIELD(block_num),
367 .offset_words = 0,
368 .offset_bits = 16,
369 .size_bits = 8 },
370 { GUIDINFO_REC_FIELD(res1),
371 .offset_words = 0,
372 .offset_bits = 24,
373 .size_bits = 8 },
374 { GUIDINFO_REC_FIELD(res2),
375 .offset_words = 1,
376 .offset_bits = 0,
377 .size_bits = 32 },
378 { GUIDINFO_REC_FIELD(guid_info_list),
379 .offset_words = 2,
380 .offset_bits = 0,
381 .size_bits = 512 },
382};
383
384static void free_sm_ah(struct kref *kref) 350static void free_sm_ah(struct kref *kref)
385{ 351{
386 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref); 352 struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -979,105 +945,6 @@ err1:
979 return ret; 945 return ret;
980} 946}
981 947
982/* Support GuidInfoRecord */
983static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query,
984 int status,
985 struct ib_sa_mad *mad)
986{
987 struct ib_sa_guidinfo_query *query =
988 container_of(sa_query, struct ib_sa_guidinfo_query, sa_query);
989
990 if (mad) {
991 struct ib_sa_guidinfo_rec rec;
992
993 ib_unpack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table),
994 mad->data, &rec);
995 query->callback(status, &rec, query->context);
996 } else
997 query->callback(status, NULL, query->context);
998}
999
1000static void ib_sa_guidinfo_rec_release(struct ib_sa_query *sa_query)
1001{
1002 kfree(container_of(sa_query, struct ib_sa_guidinfo_query, sa_query));
1003}
1004
1005int ib_sa_guid_info_rec_query(struct ib_sa_client *client,
1006 struct ib_device *device, u8 port_num,
1007 struct ib_sa_guidinfo_rec *rec,
1008 ib_sa_comp_mask comp_mask, u8 method,
1009 int timeout_ms, gfp_t gfp_mask,
1010 void (*callback)(int status,
1011 struct ib_sa_guidinfo_rec *resp,
1012 void *context),
1013 void *context,
1014 struct ib_sa_query **sa_query)
1015{
1016 struct ib_sa_guidinfo_query *query;
1017 struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1018 struct ib_sa_port *port;
1019 struct ib_mad_agent *agent;
1020 struct ib_sa_mad *mad;
1021 int ret;
1022
1023 if (!sa_dev)
1024 return -ENODEV;
1025
1026 if (method != IB_MGMT_METHOD_GET &&
1027 method != IB_MGMT_METHOD_SET &&
1028 method != IB_SA_METHOD_DELETE) {
1029 return -EINVAL;
1030 }
1031
1032 port = &sa_dev->port[port_num - sa_dev->start_port];
1033 agent = port->agent;
1034
1035 query = kmalloc(sizeof *query, gfp_mask);
1036 if (!query)
1037 return -ENOMEM;
1038
1039 query->sa_query.port = port;
1040 ret = alloc_mad(&query->sa_query, gfp_mask);
1041 if (ret)
1042 goto err1;
1043
1044 ib_sa_client_get(client);
1045 query->sa_query.client = client;
1046 query->callback = callback;
1047 query->context = context;
1048
1049 mad = query->sa_query.mad_buf->mad;
1050 init_mad(mad, agent);
1051
1052 query->sa_query.callback = callback ? ib_sa_guidinfo_rec_callback : NULL;
1053 query->sa_query.release = ib_sa_guidinfo_rec_release;
1054
1055 mad->mad_hdr.method = method;
1056 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_GUID_INFO_REC);
1057 mad->sa_hdr.comp_mask = comp_mask;
1058
1059 ib_pack(guidinfo_rec_table, ARRAY_SIZE(guidinfo_rec_table), rec,
1060 mad->data);
1061
1062 *sa_query = &query->sa_query;
1063
1064 ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
1065 if (ret < 0)
1066 goto err2;
1067
1068 return ret;
1069
1070err2:
1071 *sa_query = NULL;
1072 ib_sa_client_put(query->sa_query.client);
1073 free_mad(&query->sa_query);
1074
1075err1:
1076 kfree(query);
1077 return ret;
1078}
1079EXPORT_SYMBOL(ib_sa_guid_info_rec_query);
1080
1081static void send_handler(struct ib_mad_agent *agent, 948static void send_handler(struct ib_mad_agent *agent,
1082 struct ib_mad_send_wc *mad_send_wc) 949 struct ib_mad_send_wc *mad_send_wc)
1083{ 950{
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 246fdc15165..9ab5df72df7 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -35,7 +35,6 @@
35#include "core_priv.h" 35#include "core_priv.h"
36 36
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/stat.h>
39#include <linux/string.h> 38#include <linux/string.h>
40 39
41#include <rdma/ib_mad.h> 40#include <rdma/ib_mad.h>
@@ -179,7 +178,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
179{ 178{
180 struct ib_port_attr attr; 179 struct ib_port_attr attr;
181 char *speed = ""; 180 char *speed = "";
182 int rate; /* in deci-Gb/sec */ 181 int rate;
183 ssize_t ret; 182 ssize_t ret;
184 183
185 ret = ib_query_port(p->ibdev, p->port_num, &attr); 184 ret = ib_query_port(p->ibdev, p->port_num, &attr);
@@ -187,33 +186,11 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused,
187 return ret; 186 return ret;
188 187
189 switch (attr.active_speed) { 188 switch (attr.active_speed) {
190 case IB_SPEED_DDR: 189 case 2: speed = " DDR"; break;
191 speed = " DDR"; 190 case 4: speed = " QDR"; break;
192 rate = 50;
193 break;
194 case IB_SPEED_QDR:
195 speed = " QDR";
196 rate = 100;
197 break;
198 case IB_SPEED_FDR10:
199 speed = " FDR10";
200 rate = 100;
201 break;
202 case IB_SPEED_FDR:
203 speed = " FDR";
204 rate = 140;
205 break;
206 case IB_SPEED_EDR:
207 speed = " EDR";
208 rate = 250;
209 break;
210 case IB_SPEED_SDR:
211 default: /* default to SDR for invalid rates */
212 rate = 25;
213 break;
214 } 191 }
215 192
216 rate *= ib_width_enum_to_int(attr.active_width); 193 rate = 25 * ib_width_enum_to_int(attr.active_width) * attr.active_speed;
217 if (rate < 0) 194 if (rate < 0)
218 return -EINVAL; 195 return -EINVAL;
219 196
diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c
index 49b15ac1987..08f948df8fa 100644
--- a/drivers/infiniband/core/ucm.c
+++ b/drivers/infiniband/core/ucm.c
@@ -106,6 +106,9 @@ enum {
106 IB_UCM_MAX_DEVICES = 32 106 IB_UCM_MAX_DEVICES = 32
107}; 107};
108 108
109/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */
110extern struct class cm_class;
111
109#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) 112#define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR)
110 113
111static void ib_ucm_add_one(struct ib_device *device); 114static void ib_ucm_add_one(struct ib_device *device);
@@ -397,6 +400,7 @@ static ssize_t ib_ucm_event(struct ib_ucm_file *file,
397 struct ib_ucm_event_get cmd; 400 struct ib_ucm_event_get cmd;
398 struct ib_ucm_event *uevent; 401 struct ib_ucm_event *uevent;
399 int result = 0; 402 int result = 0;
403 DEFINE_WAIT(wait);
400 404
401 if (out_len < sizeof(struct ib_ucm_event_resp)) 405 if (out_len < sizeof(struct ib_ucm_event_resp))
402 return -ENOSPC; 406 return -ENOSPC;
@@ -1118,7 +1122,7 @@ static ssize_t ib_ucm_write(struct file *filp, const char __user *buf,
1118 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1122 if (copy_from_user(&hdr, buf, sizeof(hdr)))
1119 return -EFAULT; 1123 return -EFAULT;
1120 1124
1121 if (hdr.cmd >= ARRAY_SIZE(ucm_cmd_table)) 1125 if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucm_cmd_table))
1122 return -EINVAL; 1126 return -EINVAL;
1123 1127
1124 if (hdr.in + sizeof(hdr) > len) 1128 if (hdr.in + sizeof(hdr) > len)
diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c
index 2709ff58139..71be5eebd68 100644
--- a/drivers/infiniband/core/ucma.c
+++ b/drivers/infiniband/core/ucma.c
@@ -41,7 +41,6 @@
41#include <linux/miscdevice.h> 41#include <linux/miscdevice.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sysctl.h> 43#include <linux/sysctl.h>
44#include <linux/module.h>
45 44
46#include <rdma/rdma_user_cm.h> 45#include <rdma/rdma_user_cm.h>
47#include <rdma/ib_marshall.h> 46#include <rdma/ib_marshall.h>
@@ -66,6 +65,12 @@ static ctl_table ucma_ctl_table[] = {
66 { } 65 { }
67}; 66};
68 67
68static struct ctl_path ucma_ctl_path[] = {
69 { .procname = "net" },
70 { .procname = "rdma_ucm" },
71 { }
72};
73
69struct ucma_file { 74struct ucma_file {
70 struct mutex mut; 75 struct mutex mut;
71 struct file *filp; 76 struct file *filp;
@@ -267,17 +272,17 @@ static int ucma_event_handler(struct rdma_cm_id *cm_id,
267 if (!uevent) 272 if (!uevent)
268 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST; 273 return event->event == RDMA_CM_EVENT_CONNECT_REQUEST;
269 274
270 mutex_lock(&ctx->file->mut);
271 uevent->cm_id = cm_id; 275 uevent->cm_id = cm_id;
272 ucma_set_event_context(ctx, event, uevent); 276 ucma_set_event_context(ctx, event, uevent);
273 uevent->resp.event = event->event; 277 uevent->resp.event = event->event;
274 uevent->resp.status = event->status; 278 uevent->resp.status = event->status;
275 if (cm_id->qp_type == IB_QPT_UD) 279 if (cm_id->ps == RDMA_PS_UDP || cm_id->ps == RDMA_PS_IPOIB)
276 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud); 280 ucma_copy_ud_event(&uevent->resp.param.ud, &event->param.ud);
277 else 281 else
278 ucma_copy_conn_event(&uevent->resp.param.conn, 282 ucma_copy_conn_event(&uevent->resp.param.conn,
279 &event->param.conn); 283 &event->param.conn);
280 284
285 mutex_lock(&ctx->file->mut);
281 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) { 286 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) {
282 if (!ctx->backlog) { 287 if (!ctx->backlog) {
283 ret = -ENOMEM; 288 ret = -ENOMEM;
@@ -310,6 +315,7 @@ static ssize_t ucma_get_event(struct ucma_file *file, const char __user *inbuf,
310 struct rdma_ucm_get_event cmd; 315 struct rdma_ucm_get_event cmd;
311 struct ucma_event *uevent; 316 struct ucma_event *uevent;
312 int ret = 0; 317 int ret = 0;
318 DEFINE_WAIT(wait);
313 319
314 if (out_len < sizeof uevent->resp) 320 if (out_len < sizeof uevent->resp)
315 return -ENOSPC; 321 return -ENOSPC;
@@ -371,9 +377,6 @@ static int ucma_get_qp_type(struct rdma_ucm_create_id *cmd, enum ib_qp_type *qp_
371 case RDMA_PS_IPOIB: 377 case RDMA_PS_IPOIB:
372 *qp_type = IB_QPT_UD; 378 *qp_type = IB_QPT_UD;
373 return 0; 379 return 0;
374 case RDMA_PS_IB:
375 *qp_type = cmd->qp_type;
376 return 0;
377 default: 380 default:
378 return -EINVAL; 381 return -EINVAL;
379 } 382 }
@@ -442,6 +445,24 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx)
442 mutex_unlock(&mut); 445 mutex_unlock(&mut);
443} 446}
444 447
448static void ucma_cleanup_events(struct ucma_context *ctx)
449{
450 struct ucma_event *uevent, *tmp;
451
452 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) {
453 if (uevent->ctx != ctx)
454 continue;
455
456 list_del(&uevent->list);
457
458 /* clear incoming connections. */
459 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
460 rdma_destroy_id(uevent->cm_id);
461
462 kfree(uevent);
463 }
464}
465
445static void ucma_cleanup_mc_events(struct ucma_multicast *mc) 466static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
446{ 467{
447 struct ucma_event *uevent, *tmp; 468 struct ucma_event *uevent, *tmp;
@@ -455,16 +476,9 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc)
455 } 476 }
456} 477}
457 478
458/*
459 * We cannot hold file->mut when calling rdma_destroy_id() or we can
460 * deadlock. We also acquire file->mut in ucma_event_handler(), and
461 * rdma_destroy_id() will wait until all callbacks have completed.
462 */
463static int ucma_free_ctx(struct ucma_context *ctx) 479static int ucma_free_ctx(struct ucma_context *ctx)
464{ 480{
465 int events_reported; 481 int events_reported;
466 struct ucma_event *uevent, *tmp;
467 LIST_HEAD(list);
468 482
469 /* No new events will be generated after destroying the id. */ 483 /* No new events will be generated after destroying the id. */
470 rdma_destroy_id(ctx->cm_id); 484 rdma_destroy_id(ctx->cm_id);
@@ -473,20 +487,10 @@ static int ucma_free_ctx(struct ucma_context *ctx)
473 487
474 /* Cleanup events not yet reported to the user. */ 488 /* Cleanup events not yet reported to the user. */
475 mutex_lock(&ctx->file->mut); 489 mutex_lock(&ctx->file->mut);
476 list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { 490 ucma_cleanup_events(ctx);
477 if (uevent->ctx == ctx)
478 list_move_tail(&uevent->list, &list);
479 }
480 list_del(&ctx->list); 491 list_del(&ctx->list);
481 mutex_unlock(&ctx->file->mut); 492 mutex_unlock(&ctx->file->mut);
482 493
483 list_for_each_entry_safe(uevent, tmp, &list, list) {
484 list_del(&uevent->list);
485 if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST)
486 rdma_destroy_id(uevent->cm_id);
487 kfree(uevent);
488 }
489
490 events_reported = ctx->events_reported; 494 events_reported = ctx->events_reported;
491 kfree(ctx); 495 kfree(ctx);
492 return events_reported; 496 return events_reported;
@@ -800,12 +804,9 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf,
800 return PTR_ERR(ctx); 804 return PTR_ERR(ctx);
801 805
802 if (cmd.conn_param.valid) { 806 if (cmd.conn_param.valid) {
807 ctx->uid = cmd.uid;
803 ucma_copy_conn_param(&conn_param, &cmd.conn_param); 808 ucma_copy_conn_param(&conn_param, &cmd.conn_param);
804 mutex_lock(&file->mut);
805 ret = rdma_accept(ctx->cm_id, &conn_param); 809 ret = rdma_accept(ctx->cm_id, &conn_param);
806 if (!ret)
807 ctx->uid = cmd.uid;
808 mutex_unlock(&file->mut);
809 } else 810 } else
810 ret = rdma_accept(ctx->cm_id, NULL); 811 ret = rdma_accept(ctx->cm_id, NULL);
811 812
@@ -908,13 +909,6 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname,
908 } 909 }
909 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0); 910 ret = rdma_set_reuseaddr(ctx->cm_id, *((int *) optval) ? 1 : 0);
910 break; 911 break;
911 case RDMA_OPTION_ID_AFONLY:
912 if (optlen != sizeof(int)) {
913 ret = -EINVAL;
914 break;
915 }
916 ret = rdma_set_afonly(ctx->cm_id, *((int *) optval) ? 1 : 0);
917 break;
918 default: 912 default:
919 ret = -ENOSYS; 913 ret = -ENOSYS;
920 } 914 }
@@ -1001,18 +995,23 @@ static ssize_t ucma_set_option(struct ucma_file *file, const char __user *inbuf,
1001 if (IS_ERR(ctx)) 995 if (IS_ERR(ctx))
1002 return PTR_ERR(ctx); 996 return PTR_ERR(ctx);
1003 997
1004 optval = memdup_user((void __user *) (unsigned long) cmd.optval, 998 optval = kmalloc(cmd.optlen, GFP_KERNEL);
1005 cmd.optlen); 999 if (!optval) {
1006 if (IS_ERR(optval)) { 1000 ret = -ENOMEM;
1007 ret = PTR_ERR(optval); 1001 goto out1;
1008 goto out; 1002 }
1003
1004 if (copy_from_user(optval, (void __user *) (unsigned long) cmd.optval,
1005 cmd.optlen)) {
1006 ret = -EFAULT;
1007 goto out2;
1009 } 1008 }
1010 1009
1011 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval, 1010 ret = ucma_set_option_level(ctx, cmd.level, cmd.optname, optval,
1012 cmd.optlen); 1011 cmd.optlen);
1012out2:
1013 kfree(optval); 1013 kfree(optval);
1014 1014out1:
1015out:
1016 ucma_put_ctx(ctx); 1015 ucma_put_ctx(ctx);
1017 return ret; 1016 return ret;
1018} 1017}
@@ -1183,7 +1182,7 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1183 struct rdma_ucm_migrate_id cmd; 1182 struct rdma_ucm_migrate_id cmd;
1184 struct rdma_ucm_migrate_resp resp; 1183 struct rdma_ucm_migrate_resp resp;
1185 struct ucma_context *ctx; 1184 struct ucma_context *ctx;
1186 struct fd f; 1185 struct file *filp;
1187 struct ucma_file *cur_file; 1186 struct ucma_file *cur_file;
1188 int ret = 0; 1187 int ret = 0;
1189 1188
@@ -1191,12 +1190,12 @@ static ssize_t ucma_migrate_id(struct ucma_file *new_file,
1191 return -EFAULT; 1190 return -EFAULT;
1192 1191
1193 /* Get current fd to protect against it being closed */ 1192 /* Get current fd to protect against it being closed */
1194 f = fdget(cmd.fd); 1193 filp = fget(cmd.fd);
1195 if (!f.file) 1194 if (!filp)
1196 return -ENOENT; 1195 return -ENOENT;
1197 1196
1198 /* Validate current fd and prevent destruction of id. */ 1197 /* Validate current fd and prevent destruction of id. */
1199 ctx = ucma_get_ctx(f.file->private_data, cmd.id); 1198 ctx = ucma_get_ctx(filp->private_data, cmd.id);
1200 if (IS_ERR(ctx)) { 1199 if (IS_ERR(ctx)) {
1201 ret = PTR_ERR(ctx); 1200 ret = PTR_ERR(ctx);
1202 goto file_put; 1201 goto file_put;
@@ -1230,7 +1229,7 @@ response:
1230 1229
1231 ucma_put_ctx(ctx); 1230 ucma_put_ctx(ctx);
1232file_put: 1231file_put:
1233 fdput(f); 1232 fput(filp);
1234 return ret; 1233 return ret;
1235} 1234}
1236 1235
@@ -1271,7 +1270,7 @@ static ssize_t ucma_write(struct file *filp, const char __user *buf,
1271 if (copy_from_user(&hdr, buf, sizeof(hdr))) 1270 if (copy_from_user(&hdr, buf, sizeof(hdr)))
1272 return -EFAULT; 1271 return -EFAULT;
1273 1272
1274 if (hdr.cmd >= ARRAY_SIZE(ucma_cmd_table)) 1273 if (hdr.cmd < 0 || hdr.cmd >= ARRAY_SIZE(ucma_cmd_table))
1275 return -EINVAL; 1274 return -EINVAL;
1276 1275
1277 if (hdr.in + sizeof(hdr) > len) 1276 if (hdr.in + sizeof(hdr) > len)
@@ -1387,7 +1386,7 @@ static int __init ucma_init(void)
1387 goto err1; 1386 goto err1;
1388 } 1387 }
1389 1388
1390 ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table); 1389 ucma_ctl_table_hdr = register_sysctl_paths(ucma_ctl_path, ucma_ctl_table);
1391 if (!ucma_ctl_table_hdr) { 1390 if (!ucma_ctl_table_hdr) {
1392 printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n"); 1391 printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
1393 ret = -ENOMEM; 1392 ret = -ENOMEM;
@@ -1403,7 +1402,7 @@ err1:
1403 1402
1404static void __exit ucma_cleanup(void) 1403static void __exit ucma_cleanup(void)
1405{ 1404{
1406 unregister_net_sysctl_table(ucma_ctl_table_hdr); 1405 unregister_sysctl_table(ucma_ctl_table_hdr);
1407 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version); 1406 device_remove_file(ucma_misc.this_device, &dev_attr_abi_version);
1408 misc_deregister(&ucma_misc); 1407 misc_deregister(&ucma_misc);
1409 idr_destroy(&ctx_idr); 1408 idr_destroy(&ctx_idr);
diff --git a/drivers/infiniband/core/ud_header.c b/drivers/infiniband/core/ud_header.c
index 72feee620eb..9b737ff133e 100644
--- a/drivers/infiniband/core/ud_header.c
+++ b/drivers/infiniband/core/ud_header.c
@@ -33,7 +33,6 @@
33 33
34#include <linux/errno.h> 34#include <linux/errno.h>
35#include <linux/string.h> 35#include <linux/string.h>
36#include <linux/export.h>
37#include <linux/if_ether.h> 36#include <linux/if_ether.h>
38 37
39#include <rdma/ib_pack.h> 38#include <rdma/ib_pack.h>
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index a8411232207..b645e558876 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -35,7 +35,6 @@
35#include <linux/mm.h> 35#include <linux/mm.h>
36#include <linux/dma-mapping.h> 36#include <linux/dma-mapping.h>
37#include <linux/sched.h> 37#include <linux/sched.h>
38#include <linux/export.h>
39#include <linux/hugetlb.h> 38#include <linux/hugetlb.h>
40#include <linux/dma-attrs.h> 39#include <linux/dma-attrs.h>
41#include <linux/slab.h> 40#include <linux/slab.h>
@@ -137,7 +136,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
137 136
138 down_write(&current->mm->mmap_sem); 137 down_write(&current->mm->mmap_sem);
139 138
140 locked = npages + current->mm->pinned_vm; 139 locked = npages + current->mm->locked_vm;
141 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 140 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
142 141
143 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { 142 if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -207,7 +206,7 @@ out:
207 __ib_umem_release(context->device, umem, 0); 206 __ib_umem_release(context->device, umem, 0);
208 kfree(umem); 207 kfree(umem);
209 } else 208 } else
210 current->mm->pinned_vm = locked; 209 current->mm->locked_vm = locked;
211 210
212 up_write(&current->mm->mmap_sem); 211 up_write(&current->mm->mmap_sem);
213 if (vma_list) 212 if (vma_list)
@@ -223,7 +222,7 @@ static void ib_umem_account(struct work_struct *work)
223 struct ib_umem *umem = container_of(work, struct ib_umem, work); 222 struct ib_umem *umem = container_of(work, struct ib_umem, work);
224 223
225 down_write(&umem->mm->mmap_sem); 224 down_write(&umem->mm->mmap_sem);
226 umem->mm->pinned_vm -= umem->diff; 225 umem->mm->locked_vm -= umem->diff;
227 up_write(&umem->mm->mmap_sem); 226 up_write(&umem->mm->mmap_sem);
228 mmput(umem->mm); 227 mmput(umem->mm);
229 kfree(umem); 228 kfree(umem);
@@ -269,7 +268,7 @@ void ib_umem_release(struct ib_umem *umem)
269 } else 268 } else
270 down_write(&mm->mmap_sem); 269 down_write(&mm->mmap_sem);
271 270
272 current->mm->pinned_vm -= diff; 271 current->mm->locked_vm -= diff;
273 up_write(&mm->mmap_sem); 272 up_write(&mm->mmap_sem);
274 mmput(mm); 273 mmput(mm);
275 kfree(umem); 274 kfree(umem);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index f0d588f8859..8d261b6ea5f 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -458,7 +458,8 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
458 goto err; 458 goto err;
459 } 459 }
460 460
461 if (packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) { 461 if (packet->mad.hdr.id < 0 ||
462 packet->mad.hdr.id >= IB_UMAD_MAX_AGENTS) {
462 ret = -EINVAL; 463 ret = -EINVAL;
463 goto err; 464 goto err;
464 } 465 }
@@ -702,7 +703,7 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
702 mutex_lock(&file->port->file_mutex); 703 mutex_lock(&file->port->file_mutex);
703 mutex_lock(&file->mutex); 704 mutex_lock(&file->mutex);
704 705
705 if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) { 706 if (id < 0 || id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
706 ret = -EINVAL; 707 ret = -EINVAL;
707 goto out; 708 goto out;
708 } 709 }
@@ -1175,7 +1176,7 @@ static void ib_umad_remove_one(struct ib_device *device)
1175 kref_put(&umad_dev->ref, ib_umad_release_dev); 1176 kref_put(&umad_dev->ref, ib_umad_release_dev);
1176} 1177}
1177 1178
1178static char *umad_devnode(struct device *dev, umode_t *mode) 1179static char *umad_devnode(struct device *dev, mode_t *mode)
1179{ 1180{
1180 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); 1181 return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1181} 1182}
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5bcb2afd3dc..a078e5624d2 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -76,8 +76,6 @@ struct ib_uverbs_device {
76 struct ib_device *ib_dev; 76 struct ib_device *ib_dev;
77 int devnum; 77 int devnum;
78 struct cdev cdev; 78 struct cdev cdev;
79 struct rb_root xrcd_tree;
80 struct mutex xrcd_tree_mutex;
81}; 79};
82 80
83struct ib_uverbs_event_file { 81struct ib_uverbs_event_file {
@@ -122,16 +120,6 @@ struct ib_uevent_object {
122 u32 events_reported; 120 u32 events_reported;
123}; 121};
124 122
125struct ib_uxrcd_object {
126 struct ib_uobject uobject;
127 atomic_t refcnt;
128};
129
130struct ib_usrq_object {
131 struct ib_uevent_object uevent;
132 struct ib_uxrcd_object *uxrcd;
133};
134
135struct ib_uqp_object { 123struct ib_uqp_object {
136 struct ib_uevent_object uevent; 124 struct ib_uevent_object uevent;
137 struct list_head mcast_list; 125 struct list_head mcast_list;
@@ -154,7 +142,6 @@ extern struct idr ib_uverbs_ah_idr;
154extern struct idr ib_uverbs_cq_idr; 142extern struct idr ib_uverbs_cq_idr;
155extern struct idr ib_uverbs_qp_idr; 143extern struct idr ib_uverbs_qp_idr;
156extern struct idr ib_uverbs_srq_idr; 144extern struct idr ib_uverbs_srq_idr;
157extern struct idr ib_uverbs_xrcd_idr;
158 145
159void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj); 146void idr_remove_uobj(struct idr *idp, struct ib_uobject *uobj);
160 147
@@ -174,7 +161,6 @@ void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr);
174void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr); 161void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
175void ib_uverbs_event_handler(struct ib_event_handler *handler, 162void ib_uverbs_event_handler(struct ib_event_handler *handler,
176 struct ib_event *event); 163 struct ib_event *event);
177void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd);
178 164
179#define IB_UVERBS_DECLARE_CMD(name) \ 165#define IB_UVERBS_DECLARE_CMD(name) \
180 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ 166 ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \
@@ -195,7 +181,6 @@ IB_UVERBS_DECLARE_CMD(poll_cq);
195IB_UVERBS_DECLARE_CMD(req_notify_cq); 181IB_UVERBS_DECLARE_CMD(req_notify_cq);
196IB_UVERBS_DECLARE_CMD(destroy_cq); 182IB_UVERBS_DECLARE_CMD(destroy_cq);
197IB_UVERBS_DECLARE_CMD(create_qp); 183IB_UVERBS_DECLARE_CMD(create_qp);
198IB_UVERBS_DECLARE_CMD(open_qp);
199IB_UVERBS_DECLARE_CMD(query_qp); 184IB_UVERBS_DECLARE_CMD(query_qp);
200IB_UVERBS_DECLARE_CMD(modify_qp); 185IB_UVERBS_DECLARE_CMD(modify_qp);
201IB_UVERBS_DECLARE_CMD(destroy_qp); 186IB_UVERBS_DECLARE_CMD(destroy_qp);
@@ -210,8 +195,5 @@ IB_UVERBS_DECLARE_CMD(create_srq);
210IB_UVERBS_DECLARE_CMD(modify_srq); 195IB_UVERBS_DECLARE_CMD(modify_srq);
211IB_UVERBS_DECLARE_CMD(query_srq); 196IB_UVERBS_DECLARE_CMD(query_srq);
212IB_UVERBS_DECLARE_CMD(destroy_srq); 197IB_UVERBS_DECLARE_CMD(destroy_srq);
213IB_UVERBS_DECLARE_CMD(create_xsrq);
214IB_UVERBS_DECLARE_CMD(open_xrcd);
215IB_UVERBS_DECLARE_CMD(close_xrcd);
216 198
217#endif /* UVERBS_H */ 199#endif /* UVERBS_H */
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 0cb0007724a..c42699285f8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -41,18 +41,12 @@
41 41
42#include "uverbs.h" 42#include "uverbs.h"
43 43
44struct uverbs_lock_class { 44static struct lock_class_key pd_lock_key;
45 struct lock_class_key key; 45static struct lock_class_key mr_lock_key;
46 char name[16]; 46static struct lock_class_key cq_lock_key;
47}; 47static struct lock_class_key qp_lock_key;
48 48static struct lock_class_key ah_lock_key;
49static struct uverbs_lock_class pd_lock_class = { .name = "PD-uobj" }; 49static struct lock_class_key srq_lock_key;
50static struct uverbs_lock_class mr_lock_class = { .name = "MR-uobj" };
51static struct uverbs_lock_class cq_lock_class = { .name = "CQ-uobj" };
52static struct uverbs_lock_class qp_lock_class = { .name = "QP-uobj" };
53static struct uverbs_lock_class ah_lock_class = { .name = "AH-uobj" };
54static struct uverbs_lock_class srq_lock_class = { .name = "SRQ-uobj" };
55static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
56 50
57#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ 51#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \
58 do { \ 52 do { \
@@ -88,13 +82,13 @@ static struct uverbs_lock_class xrcd_lock_class = { .name = "XRCD-uobj" };
88 */ 82 */
89 83
90static void init_uobj(struct ib_uobject *uobj, u64 user_handle, 84static void init_uobj(struct ib_uobject *uobj, u64 user_handle,
91 struct ib_ucontext *context, struct uverbs_lock_class *c) 85 struct ib_ucontext *context, struct lock_class_key *key)
92{ 86{
93 uobj->user_handle = user_handle; 87 uobj->user_handle = user_handle;
94 uobj->context = context; 88 uobj->context = context;
95 kref_init(&uobj->ref); 89 kref_init(&uobj->ref);
96 init_rwsem(&uobj->mutex); 90 init_rwsem(&uobj->mutex);
97 lockdep_set_class_and_name(&uobj->mutex, &c->key, c->name); 91 lockdep_set_class(&uobj->mutex, key);
98 uobj->live = 0; 92 uobj->live = 0;
99} 93}
100 94
@@ -246,24 +240,11 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context)
246 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); 240 return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0);
247} 241}
248 242
249static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context)
250{
251 struct ib_uobject *uobj;
252
253 uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context);
254 return uobj ? uobj->object : NULL;
255}
256
257static void put_qp_read(struct ib_qp *qp) 243static void put_qp_read(struct ib_qp *qp)
258{ 244{
259 put_uobj_read(qp->uobject); 245 put_uobj_read(qp->uobject);
260} 246}
261 247
262static void put_qp_write(struct ib_qp *qp)
263{
264 put_uobj_write(qp->uobject);
265}
266
267static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) 248static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context)
268{ 249{
269 return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); 250 return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0);
@@ -274,18 +255,6 @@ static void put_srq_read(struct ib_srq *srq)
274 put_uobj_read(srq->uobject); 255 put_uobj_read(srq->uobject);
275} 256}
276 257
277static struct ib_xrcd *idr_read_xrcd(int xrcd_handle, struct ib_ucontext *context,
278 struct ib_uobject **uobj)
279{
280 *uobj = idr_read_uobj(&ib_uverbs_xrcd_idr, xrcd_handle, context, 0);
281 return *uobj ? (*uobj)->object : NULL;
282}
283
284static void put_xrcd_read(struct ib_uobject *uobj)
285{
286 put_uobj_read(uobj);
287}
288
289ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, 258ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
290 const char __user *buf, 259 const char __user *buf,
291 int in_len, int out_len) 260 int in_len, int out_len)
@@ -329,7 +298,6 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
329 INIT_LIST_HEAD(&ucontext->qp_list); 298 INIT_LIST_HEAD(&ucontext->qp_list);
330 INIT_LIST_HEAD(&ucontext->srq_list); 299 INIT_LIST_HEAD(&ucontext->srq_list);
331 INIT_LIST_HEAD(&ucontext->ah_list); 300 INIT_LIST_HEAD(&ucontext->ah_list);
332 INIT_LIST_HEAD(&ucontext->xrcd_list);
333 ucontext->closing = 0; 301 ucontext->closing = 0;
334 302
335 resp.num_comp_vectors = file->device->num_comp_vectors; 303 resp.num_comp_vectors = file->device->num_comp_vectors;
@@ -527,7 +495,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
527 if (!uobj) 495 if (!uobj)
528 return -ENOMEM; 496 return -ENOMEM;
529 497
530 init_uobj(uobj, 0, file->ucontext, &pd_lock_class); 498 init_uobj(uobj, 0, file->ucontext, &pd_lock_key);
531 down_write(&uobj->mutex); 499 down_write(&uobj->mutex);
532 500
533 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, 501 pd = file->device->ib_dev->alloc_pd(file->device->ib_dev,
@@ -611,305 +579,6 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
611 return in_len; 579 return in_len;
612} 580}
613 581
614struct xrcd_table_entry {
615 struct rb_node node;
616 struct ib_xrcd *xrcd;
617 struct inode *inode;
618};
619
620static int xrcd_table_insert(struct ib_uverbs_device *dev,
621 struct inode *inode,
622 struct ib_xrcd *xrcd)
623{
624 struct xrcd_table_entry *entry, *scan;
625 struct rb_node **p = &dev->xrcd_tree.rb_node;
626 struct rb_node *parent = NULL;
627
628 entry = kmalloc(sizeof *entry, GFP_KERNEL);
629 if (!entry)
630 return -ENOMEM;
631
632 entry->xrcd = xrcd;
633 entry->inode = inode;
634
635 while (*p) {
636 parent = *p;
637 scan = rb_entry(parent, struct xrcd_table_entry, node);
638
639 if (inode < scan->inode) {
640 p = &(*p)->rb_left;
641 } else if (inode > scan->inode) {
642 p = &(*p)->rb_right;
643 } else {
644 kfree(entry);
645 return -EEXIST;
646 }
647 }
648
649 rb_link_node(&entry->node, parent, p);
650 rb_insert_color(&entry->node, &dev->xrcd_tree);
651 igrab(inode);
652 return 0;
653}
654
655static struct xrcd_table_entry *xrcd_table_search(struct ib_uverbs_device *dev,
656 struct inode *inode)
657{
658 struct xrcd_table_entry *entry;
659 struct rb_node *p = dev->xrcd_tree.rb_node;
660
661 while (p) {
662 entry = rb_entry(p, struct xrcd_table_entry, node);
663
664 if (inode < entry->inode)
665 p = p->rb_left;
666 else if (inode > entry->inode)
667 p = p->rb_right;
668 else
669 return entry;
670 }
671
672 return NULL;
673}
674
675static struct ib_xrcd *find_xrcd(struct ib_uverbs_device *dev, struct inode *inode)
676{
677 struct xrcd_table_entry *entry;
678
679 entry = xrcd_table_search(dev, inode);
680 if (!entry)
681 return NULL;
682
683 return entry->xrcd;
684}
685
686static void xrcd_table_delete(struct ib_uverbs_device *dev,
687 struct inode *inode)
688{
689 struct xrcd_table_entry *entry;
690
691 entry = xrcd_table_search(dev, inode);
692 if (entry) {
693 iput(inode);
694 rb_erase(&entry->node, &dev->xrcd_tree);
695 kfree(entry);
696 }
697}
698
699ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
700 const char __user *buf, int in_len,
701 int out_len)
702{
703 struct ib_uverbs_open_xrcd cmd;
704 struct ib_uverbs_open_xrcd_resp resp;
705 struct ib_udata udata;
706 struct ib_uxrcd_object *obj;
707 struct ib_xrcd *xrcd = NULL;
708 struct fd f = {NULL, 0};
709 struct inode *inode = NULL;
710 int ret = 0;
711 int new_xrcd = 0;
712
713 if (out_len < sizeof resp)
714 return -ENOSPC;
715
716 if (copy_from_user(&cmd, buf, sizeof cmd))
717 return -EFAULT;
718
719 INIT_UDATA(&udata, buf + sizeof cmd,
720 (unsigned long) cmd.response + sizeof resp,
721 in_len - sizeof cmd, out_len - sizeof resp);
722
723 mutex_lock(&file->device->xrcd_tree_mutex);
724
725 if (cmd.fd != -1) {
726 /* search for file descriptor */
727 f = fdget(cmd.fd);
728 if (!f.file) {
729 ret = -EBADF;
730 goto err_tree_mutex_unlock;
731 }
732
733 inode = f.file->f_path.dentry->d_inode;
734 xrcd = find_xrcd(file->device, inode);
735 if (!xrcd && !(cmd.oflags & O_CREAT)) {
736 /* no file descriptor. Need CREATE flag */
737 ret = -EAGAIN;
738 goto err_tree_mutex_unlock;
739 }
740
741 if (xrcd && cmd.oflags & O_EXCL) {
742 ret = -EINVAL;
743 goto err_tree_mutex_unlock;
744 }
745 }
746
747 obj = kmalloc(sizeof *obj, GFP_KERNEL);
748 if (!obj) {
749 ret = -ENOMEM;
750 goto err_tree_mutex_unlock;
751 }
752
753 init_uobj(&obj->uobject, 0, file->ucontext, &xrcd_lock_class);
754
755 down_write(&obj->uobject.mutex);
756
757 if (!xrcd) {
758 xrcd = file->device->ib_dev->alloc_xrcd(file->device->ib_dev,
759 file->ucontext, &udata);
760 if (IS_ERR(xrcd)) {
761 ret = PTR_ERR(xrcd);
762 goto err;
763 }
764
765 xrcd->inode = inode;
766 xrcd->device = file->device->ib_dev;
767 atomic_set(&xrcd->usecnt, 0);
768 mutex_init(&xrcd->tgt_qp_mutex);
769 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
770 new_xrcd = 1;
771 }
772
773 atomic_set(&obj->refcnt, 0);
774 obj->uobject.object = xrcd;
775 ret = idr_add_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
776 if (ret)
777 goto err_idr;
778
779 memset(&resp, 0, sizeof resp);
780 resp.xrcd_handle = obj->uobject.id;
781
782 if (inode) {
783 if (new_xrcd) {
784 /* create new inode/xrcd table entry */
785 ret = xrcd_table_insert(file->device, inode, xrcd);
786 if (ret)
787 goto err_insert_xrcd;
788 }
789 atomic_inc(&xrcd->usecnt);
790 }
791
792 if (copy_to_user((void __user *) (unsigned long) cmd.response,
793 &resp, sizeof resp)) {
794 ret = -EFAULT;
795 goto err_copy;
796 }
797
798 if (f.file)
799 fdput(f);
800
801 mutex_lock(&file->mutex);
802 list_add_tail(&obj->uobject.list, &file->ucontext->xrcd_list);
803 mutex_unlock(&file->mutex);
804
805 obj->uobject.live = 1;
806 up_write(&obj->uobject.mutex);
807
808 mutex_unlock(&file->device->xrcd_tree_mutex);
809 return in_len;
810
811err_copy:
812 if (inode) {
813 if (new_xrcd)
814 xrcd_table_delete(file->device, inode);
815 atomic_dec(&xrcd->usecnt);
816 }
817
818err_insert_xrcd:
819 idr_remove_uobj(&ib_uverbs_xrcd_idr, &obj->uobject);
820
821err_idr:
822 ib_dealloc_xrcd(xrcd);
823
824err:
825 put_uobj_write(&obj->uobject);
826
827err_tree_mutex_unlock:
828 if (f.file)
829 fdput(f);
830
831 mutex_unlock(&file->device->xrcd_tree_mutex);
832
833 return ret;
834}
835
836ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
837 const char __user *buf, int in_len,
838 int out_len)
839{
840 struct ib_uverbs_close_xrcd cmd;
841 struct ib_uobject *uobj;
842 struct ib_xrcd *xrcd = NULL;
843 struct inode *inode = NULL;
844 struct ib_uxrcd_object *obj;
845 int live;
846 int ret = 0;
847
848 if (copy_from_user(&cmd, buf, sizeof cmd))
849 return -EFAULT;
850
851 mutex_lock(&file->device->xrcd_tree_mutex);
852 uobj = idr_write_uobj(&ib_uverbs_xrcd_idr, cmd.xrcd_handle, file->ucontext);
853 if (!uobj) {
854 ret = -EINVAL;
855 goto out;
856 }
857
858 xrcd = uobj->object;
859 inode = xrcd->inode;
860 obj = container_of(uobj, struct ib_uxrcd_object, uobject);
861 if (atomic_read(&obj->refcnt)) {
862 put_uobj_write(uobj);
863 ret = -EBUSY;
864 goto out;
865 }
866
867 if (!inode || atomic_dec_and_test(&xrcd->usecnt)) {
868 ret = ib_dealloc_xrcd(uobj->object);
869 if (!ret)
870 uobj->live = 0;
871 }
872
873 live = uobj->live;
874 if (inode && ret)
875 atomic_inc(&xrcd->usecnt);
876
877 put_uobj_write(uobj);
878
879 if (ret)
880 goto out;
881
882 if (inode && !live)
883 xrcd_table_delete(file->device, inode);
884
885 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
886 mutex_lock(&file->mutex);
887 list_del(&uobj->list);
888 mutex_unlock(&file->mutex);
889
890 put_uobj(uobj);
891 ret = in_len;
892
893out:
894 mutex_unlock(&file->device->xrcd_tree_mutex);
895 return ret;
896}
897
898void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
899 struct ib_xrcd *xrcd)
900{
901 struct inode *inode;
902
903 inode = xrcd->inode;
904 if (inode && !atomic_dec_and_test(&xrcd->usecnt))
905 return;
906
907 ib_dealloc_xrcd(xrcd);
908
909 if (inode)
910 xrcd_table_delete(dev, inode);
911}
912
913ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, 582ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
914 const char __user *buf, int in_len, 583 const char __user *buf, int in_len,
915 int out_len) 584 int out_len)
@@ -947,7 +616,7 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
947 if (!uobj) 616 if (!uobj)
948 return -ENOMEM; 617 return -ENOMEM;
949 618
950 init_uobj(uobj, 0, file->ucontext, &mr_lock_class); 619 init_uobj(uobj, 0, file->ucontext, &mr_lock_key);
951 down_write(&uobj->mutex); 620 down_write(&uobj->mutex);
952 621
953 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 622 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -1115,7 +784,7 @@ ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file,
1115 if (!obj) 784 if (!obj)
1116 return -ENOMEM; 785 return -ENOMEM;
1117 786
1118 init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_class); 787 init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &cq_lock_key);
1119 down_write(&obj->uobject.mutex); 788 down_write(&obj->uobject.mutex);
1120 789
1121 if (cmd.comp_channel >= 0) { 790 if (cmd.comp_channel >= 0) {
@@ -1383,12 +1052,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1383 struct ib_uverbs_create_qp_resp resp; 1052 struct ib_uverbs_create_qp_resp resp;
1384 struct ib_udata udata; 1053 struct ib_udata udata;
1385 struct ib_uqp_object *obj; 1054 struct ib_uqp_object *obj;
1386 struct ib_device *device; 1055 struct ib_pd *pd;
1387 struct ib_pd *pd = NULL; 1056 struct ib_cq *scq, *rcq;
1388 struct ib_xrcd *xrcd = NULL; 1057 struct ib_srq *srq;
1389 struct ib_uobject *uninitialized_var(xrcd_uobj);
1390 struct ib_cq *scq = NULL, *rcq = NULL;
1391 struct ib_srq *srq = NULL;
1392 struct ib_qp *qp; 1058 struct ib_qp *qp;
1393 struct ib_qp_init_attr attr; 1059 struct ib_qp_init_attr attr;
1394 int ret; 1060 int ret;
@@ -1399,9 +1065,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1399 if (copy_from_user(&cmd, buf, sizeof cmd)) 1065 if (copy_from_user(&cmd, buf, sizeof cmd))
1400 return -EFAULT; 1066 return -EFAULT;
1401 1067
1402 if (cmd.qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
1403 return -EPERM;
1404
1405 INIT_UDATA(&udata, buf + sizeof cmd, 1068 INIT_UDATA(&udata, buf + sizeof cmd,
1406 (unsigned long) cmd.response + sizeof resp, 1069 (unsigned long) cmd.response + sizeof resp,
1407 in_len - sizeof cmd, out_len - sizeof resp); 1070 in_len - sizeof cmd, out_len - sizeof resp);
@@ -1410,46 +1073,18 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1410 if (!obj) 1073 if (!obj)
1411 return -ENOMEM; 1074 return -ENOMEM;
1412 1075
1413 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class); 1076 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_key);
1414 down_write(&obj->uevent.uobject.mutex); 1077 down_write(&obj->uevent.uobject.mutex);
1415 1078
1416 if (cmd.qp_type == IB_QPT_XRC_TGT) { 1079 srq = cmd.is_srq ? idr_read_srq(cmd.srq_handle, file->ucontext) : NULL;
1417 xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj); 1080 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1418 if (!xrcd) { 1081 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, 0);
1419 ret = -EINVAL; 1082 rcq = cmd.recv_cq_handle == cmd.send_cq_handle ?
1420 goto err_put; 1083 scq : idr_read_cq(cmd.recv_cq_handle, file->ucontext, 1);
1421 }
1422 device = xrcd->device;
1423 } else {
1424 if (cmd.qp_type == IB_QPT_XRC_INI) {
1425 cmd.max_recv_wr = cmd.max_recv_sge = 0;
1426 } else {
1427 if (cmd.is_srq) {
1428 srq = idr_read_srq(cmd.srq_handle, file->ucontext);
1429 if (!srq || srq->srq_type != IB_SRQT_BASIC) {
1430 ret = -EINVAL;
1431 goto err_put;
1432 }
1433 }
1434
1435 if (cmd.recv_cq_handle != cmd.send_cq_handle) {
1436 rcq = idr_read_cq(cmd.recv_cq_handle, file->ucontext, 0);
1437 if (!rcq) {
1438 ret = -EINVAL;
1439 goto err_put;
1440 }
1441 }
1442 }
1443
1444 scq = idr_read_cq(cmd.send_cq_handle, file->ucontext, !!rcq);
1445 rcq = rcq ?: scq;
1446 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1447 if (!pd || !scq) {
1448 ret = -EINVAL;
1449 goto err_put;
1450 }
1451 1084
1452 device = pd->device; 1085 if (!pd || !scq || !rcq || (cmd.is_srq && !srq)) {
1086 ret = -EINVAL;
1087 goto err_put;
1453 } 1088 }
1454 1089
1455 attr.event_handler = ib_uverbs_qp_event_handler; 1090 attr.event_handler = ib_uverbs_qp_event_handler;
@@ -1457,7 +1092,6 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1457 attr.send_cq = scq; 1092 attr.send_cq = scq;
1458 attr.recv_cq = rcq; 1093 attr.recv_cq = rcq;
1459 attr.srq = srq; 1094 attr.srq = srq;
1460 attr.xrcd = xrcd;
1461 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; 1095 attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
1462 attr.qp_type = cmd.qp_type; 1096 attr.qp_type = cmd.qp_type;
1463 attr.create_flags = 0; 1097 attr.create_flags = 0;
@@ -1472,35 +1106,26 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1472 INIT_LIST_HEAD(&obj->uevent.event_list); 1106 INIT_LIST_HEAD(&obj->uevent.event_list);
1473 INIT_LIST_HEAD(&obj->mcast_list); 1107 INIT_LIST_HEAD(&obj->mcast_list);
1474 1108
1475 if (cmd.qp_type == IB_QPT_XRC_TGT) 1109 qp = pd->device->create_qp(pd, &attr, &udata);
1476 qp = ib_create_qp(pd, &attr);
1477 else
1478 qp = device->create_qp(pd, &attr, &udata);
1479
1480 if (IS_ERR(qp)) { 1110 if (IS_ERR(qp)) {
1481 ret = PTR_ERR(qp); 1111 ret = PTR_ERR(qp);
1482 goto err_put; 1112 goto err_put;
1483 } 1113 }
1484 1114
1485 if (cmd.qp_type != IB_QPT_XRC_TGT) { 1115 qp->device = pd->device;
1486 qp->real_qp = qp; 1116 qp->pd = pd;
1487 qp->device = device; 1117 qp->send_cq = attr.send_cq;
1488 qp->pd = pd; 1118 qp->recv_cq = attr.recv_cq;
1489 qp->send_cq = attr.send_cq; 1119 qp->srq = attr.srq;
1490 qp->recv_cq = attr.recv_cq; 1120 qp->uobject = &obj->uevent.uobject;
1491 qp->srq = attr.srq; 1121 qp->event_handler = attr.event_handler;
1492 qp->event_handler = attr.event_handler; 1122 qp->qp_context = attr.qp_context;
1493 qp->qp_context = attr.qp_context; 1123 qp->qp_type = attr.qp_type;
1494 qp->qp_type = attr.qp_type; 1124 atomic_inc(&pd->usecnt);
1495 atomic_set(&qp->usecnt, 0); 1125 atomic_inc(&attr.send_cq->usecnt);
1496 atomic_inc(&pd->usecnt); 1126 atomic_inc(&attr.recv_cq->usecnt);
1497 atomic_inc(&attr.send_cq->usecnt); 1127 if (attr.srq)
1498 if (attr.recv_cq) 1128 atomic_inc(&attr.srq->usecnt);
1499 atomic_inc(&attr.recv_cq->usecnt);
1500 if (attr.srq)
1501 atomic_inc(&attr.srq->usecnt);
1502 }
1503 qp->uobject = &obj->uevent.uobject;
1504 1129
1505 obj->uevent.uobject.object = qp; 1130 obj->uevent.uobject.object = qp;
1506 ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject); 1131 ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
@@ -1522,13 +1147,9 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
1522 goto err_copy; 1147 goto err_copy;
1523 } 1148 }
1524 1149
1525 if (xrcd) 1150 put_pd_read(pd);
1526 put_xrcd_read(xrcd_uobj); 1151 put_cq_read(scq);
1527 if (pd) 1152 if (rcq != scq)
1528 put_pd_read(pd);
1529 if (scq)
1530 put_cq_read(scq);
1531 if (rcq && rcq != scq)
1532 put_cq_read(rcq); 1153 put_cq_read(rcq);
1533 if (srq) 1154 if (srq)
1534 put_srq_read(srq); 1155 put_srq_read(srq);
@@ -1550,8 +1171,6 @@ err_destroy:
1550 ib_destroy_qp(qp); 1171 ib_destroy_qp(qp);
1551 1172
1552err_put: 1173err_put:
1553 if (xrcd)
1554 put_xrcd_read(xrcd_uobj);
1555 if (pd) 1174 if (pd)
1556 put_pd_read(pd); 1175 put_pd_read(pd);
1557 if (scq) 1176 if (scq)
@@ -1565,98 +1184,6 @@ err_put:
1565 return ret; 1184 return ret;
1566} 1185}
1567 1186
1568ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
1569 const char __user *buf, int in_len, int out_len)
1570{
1571 struct ib_uverbs_open_qp cmd;
1572 struct ib_uverbs_create_qp_resp resp;
1573 struct ib_udata udata;
1574 struct ib_uqp_object *obj;
1575 struct ib_xrcd *xrcd;
1576 struct ib_uobject *uninitialized_var(xrcd_uobj);
1577 struct ib_qp *qp;
1578 struct ib_qp_open_attr attr;
1579 int ret;
1580
1581 if (out_len < sizeof resp)
1582 return -ENOSPC;
1583
1584 if (copy_from_user(&cmd, buf, sizeof cmd))
1585 return -EFAULT;
1586
1587 INIT_UDATA(&udata, buf + sizeof cmd,
1588 (unsigned long) cmd.response + sizeof resp,
1589 in_len - sizeof cmd, out_len - sizeof resp);
1590
1591 obj = kmalloc(sizeof *obj, GFP_KERNEL);
1592 if (!obj)
1593 return -ENOMEM;
1594
1595 init_uobj(&obj->uevent.uobject, cmd.user_handle, file->ucontext, &qp_lock_class);
1596 down_write(&obj->uevent.uobject.mutex);
1597
1598 xrcd = idr_read_xrcd(cmd.pd_handle, file->ucontext, &xrcd_uobj);
1599 if (!xrcd) {
1600 ret = -EINVAL;
1601 goto err_put;
1602 }
1603
1604 attr.event_handler = ib_uverbs_qp_event_handler;
1605 attr.qp_context = file;
1606 attr.qp_num = cmd.qpn;
1607 attr.qp_type = cmd.qp_type;
1608
1609 obj->uevent.events_reported = 0;
1610 INIT_LIST_HEAD(&obj->uevent.event_list);
1611 INIT_LIST_HEAD(&obj->mcast_list);
1612
1613 qp = ib_open_qp(xrcd, &attr);
1614 if (IS_ERR(qp)) {
1615 ret = PTR_ERR(qp);
1616 goto err_put;
1617 }
1618
1619 qp->uobject = &obj->uevent.uobject;
1620
1621 obj->uevent.uobject.object = qp;
1622 ret = idr_add_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1623 if (ret)
1624 goto err_destroy;
1625
1626 memset(&resp, 0, sizeof resp);
1627 resp.qpn = qp->qp_num;
1628 resp.qp_handle = obj->uevent.uobject.id;
1629
1630 if (copy_to_user((void __user *) (unsigned long) cmd.response,
1631 &resp, sizeof resp)) {
1632 ret = -EFAULT;
1633 goto err_remove;
1634 }
1635
1636 put_xrcd_read(xrcd_uobj);
1637
1638 mutex_lock(&file->mutex);
1639 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->qp_list);
1640 mutex_unlock(&file->mutex);
1641
1642 obj->uevent.uobject.live = 1;
1643
1644 up_write(&obj->uevent.uobject.mutex);
1645
1646 return in_len;
1647
1648err_remove:
1649 idr_remove_uobj(&ib_uverbs_qp_idr, &obj->uevent.uobject);
1650
1651err_destroy:
1652 ib_destroy_qp(qp);
1653
1654err_put:
1655 put_xrcd_read(xrcd_uobj);
1656 put_uobj_write(&obj->uevent.uobject);
1657 return ret;
1658}
1659
1660ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file, 1187ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
1661 const char __user *buf, int in_len, 1188 const char __user *buf, int in_len,
1662 int out_len) 1189 int out_len)
@@ -1757,20 +1284,6 @@ out:
1757 return ret ? ret : in_len; 1284 return ret ? ret : in_len;
1758} 1285}
1759 1286
1760/* Remove ignored fields set in the attribute mask */
1761static int modify_qp_mask(enum ib_qp_type qp_type, int mask)
1762{
1763 switch (qp_type) {
1764 case IB_QPT_XRC_INI:
1765 return mask & ~(IB_QP_MAX_DEST_RD_ATOMIC | IB_QP_MIN_RNR_TIMER);
1766 case IB_QPT_XRC_TGT:
1767 return mask & ~(IB_QP_MAX_QP_RD_ATOMIC | IB_QP_RETRY_CNT |
1768 IB_QP_RNR_RETRY);
1769 default:
1770 return mask;
1771 }
1772}
1773
1774ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, 1287ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1775 const char __user *buf, int in_len, 1288 const char __user *buf, int in_len,
1776 int out_len) 1289 int out_len)
@@ -1843,12 +1356,7 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
1843 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; 1356 attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
1844 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; 1357 attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
1845 1358
1846 if (qp->real_qp == qp) { 1359 ret = qp->device->modify_qp(qp, attr, cmd.attr_mask, &udata);
1847 ret = qp->device->modify_qp(qp, attr,
1848 modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
1849 } else {
1850 ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask));
1851 }
1852 1360
1853 put_qp_read(qp); 1361 put_qp_read(qp);
1854 1362
@@ -2045,7 +1553,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
2045 } 1553 }
2046 1554
2047 resp.bad_wr = 0; 1555 resp.bad_wr = 0;
2048 ret = qp->device->post_send(qp->real_qp, wr, &bad_wr); 1556 ret = qp->device->post_send(qp, wr, &bad_wr);
2049 if (ret) 1557 if (ret)
2050 for (next = wr; next; next = next->next) { 1558 for (next = wr; next; next = next->next) {
2051 ++resp.bad_wr; 1559 ++resp.bad_wr;
@@ -2183,7 +1691,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
2183 goto out; 1691 goto out;
2184 1692
2185 resp.bad_wr = 0; 1693 resp.bad_wr = 0;
2186 ret = qp->device->post_recv(qp->real_qp, wr, &bad_wr); 1694 ret = qp->device->post_recv(qp, wr, &bad_wr);
2187 1695
2188 put_qp_read(qp); 1696 put_qp_read(qp);
2189 1697
@@ -2279,7 +1787,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
2279 if (!uobj) 1787 if (!uobj)
2280 return -ENOMEM; 1788 return -ENOMEM;
2281 1789
2282 init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_class); 1790 init_uobj(uobj, cmd.user_handle, file->ucontext, &ah_lock_key);
2283 down_write(&uobj->mutex); 1791 down_write(&uobj->mutex);
2284 1792
2285 pd = idr_read_pd(cmd.pd_handle, file->ucontext); 1793 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
@@ -2396,7 +1904,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2396 if (copy_from_user(&cmd, buf, sizeof cmd)) 1904 if (copy_from_user(&cmd, buf, sizeof cmd))
2397 return -EFAULT; 1905 return -EFAULT;
2398 1906
2399 qp = idr_write_qp(cmd.qp_handle, file->ucontext); 1907 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2400 if (!qp) 1908 if (!qp)
2401 return -EINVAL; 1909 return -EINVAL;
2402 1910
@@ -2425,7 +1933,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
2425 kfree(mcast); 1933 kfree(mcast);
2426 1934
2427out_put: 1935out_put:
2428 put_qp_write(qp); 1936 put_qp_read(qp);
2429 1937
2430 return ret ? ret : in_len; 1938 return ret ? ret : in_len;
2431} 1939}
@@ -2443,7 +1951,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2443 if (copy_from_user(&cmd, buf, sizeof cmd)) 1951 if (copy_from_user(&cmd, buf, sizeof cmd))
2444 return -EFAULT; 1952 return -EFAULT;
2445 1953
2446 qp = idr_write_qp(cmd.qp_handle, file->ucontext); 1954 qp = idr_read_qp(cmd.qp_handle, file->ucontext);
2447 if (!qp) 1955 if (!qp)
2448 return -EINVAL; 1956 return -EINVAL;
2449 1957
@@ -2462,122 +1970,100 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
2462 } 1970 }
2463 1971
2464out_put: 1972out_put:
2465 put_qp_write(qp); 1973 put_qp_read(qp);
2466 1974
2467 return ret ? ret : in_len; 1975 return ret ? ret : in_len;
2468} 1976}
2469 1977
2470static int __uverbs_create_xsrq(struct ib_uverbs_file *file, 1978ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
2471 struct ib_uverbs_create_xsrq *cmd, 1979 const char __user *buf, int in_len,
2472 struct ib_udata *udata) 1980 int out_len)
2473{ 1981{
1982 struct ib_uverbs_create_srq cmd;
2474 struct ib_uverbs_create_srq_resp resp; 1983 struct ib_uverbs_create_srq_resp resp;
2475 struct ib_usrq_object *obj; 1984 struct ib_udata udata;
1985 struct ib_uevent_object *obj;
2476 struct ib_pd *pd; 1986 struct ib_pd *pd;
2477 struct ib_srq *srq; 1987 struct ib_srq *srq;
2478 struct ib_uobject *uninitialized_var(xrcd_uobj);
2479 struct ib_srq_init_attr attr; 1988 struct ib_srq_init_attr attr;
2480 int ret; 1989 int ret;
2481 1990
2482 obj = kmalloc(sizeof *obj, GFP_KERNEL); 1991 if (out_len < sizeof resp)
2483 if (!obj) 1992 return -ENOSPC;
2484 return -ENOMEM;
2485 1993
2486 init_uobj(&obj->uevent.uobject, cmd->user_handle, file->ucontext, &srq_lock_class); 1994 if (copy_from_user(&cmd, buf, sizeof cmd))
2487 down_write(&obj->uevent.uobject.mutex); 1995 return -EFAULT;
2488 1996
2489 if (cmd->srq_type == IB_SRQT_XRC) { 1997 INIT_UDATA(&udata, buf + sizeof cmd,
2490 attr.ext.xrc.xrcd = idr_read_xrcd(cmd->xrcd_handle, file->ucontext, &xrcd_uobj); 1998 (unsigned long) cmd.response + sizeof resp,
2491 if (!attr.ext.xrc.xrcd) { 1999 in_len - sizeof cmd, out_len - sizeof resp);
2492 ret = -EINVAL;
2493 goto err;
2494 }
2495 2000
2496 obj->uxrcd = container_of(xrcd_uobj, struct ib_uxrcd_object, uobject); 2001 obj = kmalloc(sizeof *obj, GFP_KERNEL);
2497 atomic_inc(&obj->uxrcd->refcnt); 2002 if (!obj)
2003 return -ENOMEM;
2498 2004
2499 attr.ext.xrc.cq = idr_read_cq(cmd->cq_handle, file->ucontext, 0); 2005 init_uobj(&obj->uobject, cmd.user_handle, file->ucontext, &srq_lock_key);
2500 if (!attr.ext.xrc.cq) { 2006 down_write(&obj->uobject.mutex);
2501 ret = -EINVAL;
2502 goto err_put_xrcd;
2503 }
2504 }
2505 2007
2506 pd = idr_read_pd(cmd->pd_handle, file->ucontext); 2008 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
2507 if (!pd) { 2009 if (!pd) {
2508 ret = -EINVAL; 2010 ret = -EINVAL;
2509 goto err_put_cq; 2011 goto err;
2510 } 2012 }
2511 2013
2512 attr.event_handler = ib_uverbs_srq_event_handler; 2014 attr.event_handler = ib_uverbs_srq_event_handler;
2513 attr.srq_context = file; 2015 attr.srq_context = file;
2514 attr.srq_type = cmd->srq_type; 2016 attr.attr.max_wr = cmd.max_wr;
2515 attr.attr.max_wr = cmd->max_wr; 2017 attr.attr.max_sge = cmd.max_sge;
2516 attr.attr.max_sge = cmd->max_sge; 2018 attr.attr.srq_limit = cmd.srq_limit;
2517 attr.attr.srq_limit = cmd->srq_limit;
2518 2019
2519 obj->uevent.events_reported = 0; 2020 obj->events_reported = 0;
2520 INIT_LIST_HEAD(&obj->uevent.event_list); 2021 INIT_LIST_HEAD(&obj->event_list);
2521 2022
2522 srq = pd->device->create_srq(pd, &attr, udata); 2023 srq = pd->device->create_srq(pd, &attr, &udata);
2523 if (IS_ERR(srq)) { 2024 if (IS_ERR(srq)) {
2524 ret = PTR_ERR(srq); 2025 ret = PTR_ERR(srq);
2525 goto err_put; 2026 goto err_put;
2526 } 2027 }
2527 2028
2528 srq->device = pd->device; 2029 srq->device = pd->device;
2529 srq->pd = pd; 2030 srq->pd = pd;
2530 srq->srq_type = cmd->srq_type; 2031 srq->uobject = &obj->uobject;
2531 srq->uobject = &obj->uevent.uobject;
2532 srq->event_handler = attr.event_handler; 2032 srq->event_handler = attr.event_handler;
2533 srq->srq_context = attr.srq_context; 2033 srq->srq_context = attr.srq_context;
2534
2535 if (cmd->srq_type == IB_SRQT_XRC) {
2536 srq->ext.xrc.cq = attr.ext.xrc.cq;
2537 srq->ext.xrc.xrcd = attr.ext.xrc.xrcd;
2538 atomic_inc(&attr.ext.xrc.cq->usecnt);
2539 atomic_inc(&attr.ext.xrc.xrcd->usecnt);
2540 }
2541
2542 atomic_inc(&pd->usecnt); 2034 atomic_inc(&pd->usecnt);
2543 atomic_set(&srq->usecnt, 0); 2035 atomic_set(&srq->usecnt, 0);
2544 2036
2545 obj->uevent.uobject.object = srq; 2037 obj->uobject.object = srq;
2546 ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); 2038 ret = idr_add_uobj(&ib_uverbs_srq_idr, &obj->uobject);
2547 if (ret) 2039 if (ret)
2548 goto err_destroy; 2040 goto err_destroy;
2549 2041
2550 memset(&resp, 0, sizeof resp); 2042 memset(&resp, 0, sizeof resp);
2551 resp.srq_handle = obj->uevent.uobject.id; 2043 resp.srq_handle = obj->uobject.id;
2552 resp.max_wr = attr.attr.max_wr; 2044 resp.max_wr = attr.attr.max_wr;
2553 resp.max_sge = attr.attr.max_sge; 2045 resp.max_sge = attr.attr.max_sge;
2554 if (cmd->srq_type == IB_SRQT_XRC)
2555 resp.srqn = srq->ext.xrc.srq_num;
2556 2046
2557 if (copy_to_user((void __user *) (unsigned long) cmd->response, 2047 if (copy_to_user((void __user *) (unsigned long) cmd.response,
2558 &resp, sizeof resp)) { 2048 &resp, sizeof resp)) {
2559 ret = -EFAULT; 2049 ret = -EFAULT;
2560 goto err_copy; 2050 goto err_copy;
2561 } 2051 }
2562 2052
2563 if (cmd->srq_type == IB_SRQT_XRC) {
2564 put_uobj_read(xrcd_uobj);
2565 put_cq_read(attr.ext.xrc.cq);
2566 }
2567 put_pd_read(pd); 2053 put_pd_read(pd);
2568 2054
2569 mutex_lock(&file->mutex); 2055 mutex_lock(&file->mutex);
2570 list_add_tail(&obj->uevent.uobject.list, &file->ucontext->srq_list); 2056 list_add_tail(&obj->uobject.list, &file->ucontext->srq_list);
2571 mutex_unlock(&file->mutex); 2057 mutex_unlock(&file->mutex);
2572 2058
2573 obj->uevent.uobject.live = 1; 2059 obj->uobject.live = 1;
2574 2060
2575 up_write(&obj->uevent.uobject.mutex); 2061 up_write(&obj->uobject.mutex);
2576 2062
2577 return 0; 2063 return in_len;
2578 2064
2579err_copy: 2065err_copy:
2580 idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uevent.uobject); 2066 idr_remove_uobj(&ib_uverbs_srq_idr, &obj->uobject);
2581 2067
2582err_destroy: 2068err_destroy:
2583 ib_destroy_srq(srq); 2069 ib_destroy_srq(srq);
@@ -2585,81 +2071,11 @@ err_destroy:
2585err_put: 2071err_put:
2586 put_pd_read(pd); 2072 put_pd_read(pd);
2587 2073
2588err_put_cq:
2589 if (cmd->srq_type == IB_SRQT_XRC)
2590 put_cq_read(attr.ext.xrc.cq);
2591
2592err_put_xrcd:
2593 if (cmd->srq_type == IB_SRQT_XRC) {
2594 atomic_dec(&obj->uxrcd->refcnt);
2595 put_uobj_read(xrcd_uobj);
2596 }
2597
2598err: 2074err:
2599 put_uobj_write(&obj->uevent.uobject); 2075 put_uobj_write(&obj->uobject);
2600 return ret; 2076 return ret;
2601} 2077}
2602 2078
2603ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
2604 const char __user *buf, int in_len,
2605 int out_len)
2606{
2607 struct ib_uverbs_create_srq cmd;
2608 struct ib_uverbs_create_xsrq xcmd;
2609 struct ib_uverbs_create_srq_resp resp;
2610 struct ib_udata udata;
2611 int ret;
2612
2613 if (out_len < sizeof resp)
2614 return -ENOSPC;
2615
2616 if (copy_from_user(&cmd, buf, sizeof cmd))
2617 return -EFAULT;
2618
2619 xcmd.response = cmd.response;
2620 xcmd.user_handle = cmd.user_handle;
2621 xcmd.srq_type = IB_SRQT_BASIC;
2622 xcmd.pd_handle = cmd.pd_handle;
2623 xcmd.max_wr = cmd.max_wr;
2624 xcmd.max_sge = cmd.max_sge;
2625 xcmd.srq_limit = cmd.srq_limit;
2626
2627 INIT_UDATA(&udata, buf + sizeof cmd,
2628 (unsigned long) cmd.response + sizeof resp,
2629 in_len - sizeof cmd, out_len - sizeof resp);
2630
2631 ret = __uverbs_create_xsrq(file, &xcmd, &udata);
2632 if (ret)
2633 return ret;
2634
2635 return in_len;
2636}
2637
2638ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
2639 const char __user *buf, int in_len, int out_len)
2640{
2641 struct ib_uverbs_create_xsrq cmd;
2642 struct ib_uverbs_create_srq_resp resp;
2643 struct ib_udata udata;
2644 int ret;
2645
2646 if (out_len < sizeof resp)
2647 return -ENOSPC;
2648
2649 if (copy_from_user(&cmd, buf, sizeof cmd))
2650 return -EFAULT;
2651
2652 INIT_UDATA(&udata, buf + sizeof cmd,
2653 (unsigned long) cmd.response + sizeof resp,
2654 in_len - sizeof cmd, out_len - sizeof resp);
2655
2656 ret = __uverbs_create_xsrq(file, &cmd, &udata);
2657 if (ret)
2658 return ret;
2659
2660 return in_len;
2661}
2662
2663ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file, 2079ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
2664 const char __user *buf, int in_len, 2080 const char __user *buf, int in_len,
2665 int out_len) 2081 int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 6f2ce6fa98f..56898b6578a 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -72,7 +72,6 @@ DEFINE_IDR(ib_uverbs_ah_idr);
72DEFINE_IDR(ib_uverbs_cq_idr); 72DEFINE_IDR(ib_uverbs_cq_idr);
73DEFINE_IDR(ib_uverbs_qp_idr); 73DEFINE_IDR(ib_uverbs_qp_idr);
74DEFINE_IDR(ib_uverbs_srq_idr); 74DEFINE_IDR(ib_uverbs_srq_idr);
75DEFINE_IDR(ib_uverbs_xrcd_idr);
76 75
77static DEFINE_SPINLOCK(map_lock); 76static DEFINE_SPINLOCK(map_lock);
78static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); 77static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -108,10 +107,6 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
108 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq, 107 [IB_USER_VERBS_CMD_MODIFY_SRQ] = ib_uverbs_modify_srq,
109 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq, 108 [IB_USER_VERBS_CMD_QUERY_SRQ] = ib_uverbs_query_srq,
110 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq, 109 [IB_USER_VERBS_CMD_DESTROY_SRQ] = ib_uverbs_destroy_srq,
111 [IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
112 [IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
113 [IB_USER_VERBS_CMD_CREATE_XSRQ] = ib_uverbs_create_xsrq,
114 [IB_USER_VERBS_CMD_OPEN_QP] = ib_uverbs_open_qp
115}; 110};
116 111
117static void ib_uverbs_add_one(struct ib_device *device); 112static void ib_uverbs_add_one(struct ib_device *device);
@@ -207,12 +202,8 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
207 container_of(uobj, struct ib_uqp_object, uevent.uobject); 202 container_of(uobj, struct ib_uqp_object, uevent.uobject);
208 203
209 idr_remove_uobj(&ib_uverbs_qp_idr, uobj); 204 idr_remove_uobj(&ib_uverbs_qp_idr, uobj);
210 if (qp != qp->real_qp) { 205 ib_uverbs_detach_umcast(qp, uqp);
211 ib_close_qp(qp); 206 ib_destroy_qp(qp);
212 } else {
213 ib_uverbs_detach_umcast(qp, uqp);
214 ib_destroy_qp(qp);
215 }
216 ib_uverbs_release_uevent(file, &uqp->uevent); 207 ib_uverbs_release_uevent(file, &uqp->uevent);
217 kfree(uqp); 208 kfree(uqp);
218 } 209 }
@@ -250,18 +241,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
250 kfree(uobj); 241 kfree(uobj);
251 } 242 }
252 243
253 mutex_lock(&file->device->xrcd_tree_mutex);
254 list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
255 struct ib_xrcd *xrcd = uobj->object;
256 struct ib_uxrcd_object *uxrcd =
257 container_of(uobj, struct ib_uxrcd_object, uobject);
258
259 idr_remove_uobj(&ib_uverbs_xrcd_idr, uobj);
260 ib_uverbs_dealloc_xrcd(file->device, xrcd);
261 kfree(uxrcd);
262 }
263 mutex_unlock(&file->device->xrcd_tree_mutex);
264
265 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { 244 list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) {
266 struct ib_pd *pd = uobj->object; 245 struct ib_pd *pd = uobj->object;
267 246
@@ -541,15 +520,16 @@ struct file *ib_uverbs_alloc_event_file(struct ib_uverbs_file *uverbs_file,
541struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd) 520struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
542{ 521{
543 struct ib_uverbs_event_file *ev_file = NULL; 522 struct ib_uverbs_event_file *ev_file = NULL;
544 struct fd f = fdget(fd); 523 struct file *filp;
545 524
546 if (!f.file) 525 filp = fget(fd);
526 if (!filp)
547 return NULL; 527 return NULL;
548 528
549 if (f.file->f_op != &uverbs_event_fops) 529 if (filp->f_op != &uverbs_event_fops)
550 goto out; 530 goto out;
551 531
552 ev_file = f.file->private_data; 532 ev_file = filp->private_data;
553 if (ev_file->is_async) { 533 if (ev_file->is_async) {
554 ev_file = NULL; 534 ev_file = NULL;
555 goto out; 535 goto out;
@@ -558,7 +538,7 @@ struct ib_uverbs_event_file *ib_uverbs_lookup_comp_file(int fd)
558 kref_get(&ev_file->ref); 538 kref_get(&ev_file->ref);
559 539
560out: 540out:
561 fdput(f); 541 fput(filp);
562 return ev_file; 542 return ev_file;
563} 543}
564 544
@@ -577,7 +557,8 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
577 if (hdr.in_words * 4 != count) 557 if (hdr.in_words * 4 != count)
578 return -EINVAL; 558 return -EINVAL;
579 559
580 if (hdr.command >= ARRAY_SIZE(uverbs_cmd_table) || 560 if (hdr.command < 0 ||
561 hdr.command >= ARRAY_SIZE(uverbs_cmd_table) ||
581 !uverbs_cmd_table[hdr.command]) 562 !uverbs_cmd_table[hdr.command])
582 return -EINVAL; 563 return -EINVAL;
583 564
@@ -760,8 +741,6 @@ static void ib_uverbs_add_one(struct ib_device *device)
760 741
761 kref_init(&uverbs_dev->ref); 742 kref_init(&uverbs_dev->ref);
762 init_completion(&uverbs_dev->comp); 743 init_completion(&uverbs_dev->comp);
763 uverbs_dev->xrcd_tree = RB_ROOT;
764 mutex_init(&uverbs_dev->xrcd_tree_mutex);
765 744
766 spin_lock(&map_lock); 745 spin_lock(&map_lock);
767 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); 746 devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -845,7 +824,7 @@ static void ib_uverbs_remove_one(struct ib_device *device)
845 kfree(uverbs_dev); 824 kfree(uverbs_dev);
846} 825}
847 826
848static char *uverbs_devnode(struct device *dev, umode_t *mode) 827static char *uverbs_devnode(struct device *dev, mode_t *mode)
849{ 828{
850 if (mode) 829 if (mode)
851 *mode = 0666; 830 *mode = 0666;
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index e7bee46868d..1b1146f8712 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -30,7 +30,6 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#include <linux/export.h>
34#include <rdma/ib_marshall.h> 33#include <rdma/ib_marshall.h>
35 34
36void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst, 35void ib_copy_ah_attr_to_user(struct ib_uverbs_ah_attr *dst,
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 30f199e8579..af7a8b08b2e 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -38,9 +38,7 @@
38 38
39#include <linux/errno.h> 39#include <linux/errno.h>
40#include <linux/err.h> 40#include <linux/err.h>
41#include <linux/export.h>
42#include <linux/string.h> 41#include <linux/string.h>
43#include <linux/slab.h>
44 42
45#include <rdma/ib_verbs.h> 43#include <rdma/ib_verbs.h>
46#include <rdma/ib_cache.h> 44#include <rdma/ib_cache.h>
@@ -79,31 +77,6 @@ enum ib_rate mult_to_ib_rate(int mult)
79} 77}
80EXPORT_SYMBOL(mult_to_ib_rate); 78EXPORT_SYMBOL(mult_to_ib_rate);
81 79
82int ib_rate_to_mbps(enum ib_rate rate)
83{
84 switch (rate) {
85 case IB_RATE_2_5_GBPS: return 2500;
86 case IB_RATE_5_GBPS: return 5000;
87 case IB_RATE_10_GBPS: return 10000;
88 case IB_RATE_20_GBPS: return 20000;
89 case IB_RATE_30_GBPS: return 30000;
90 case IB_RATE_40_GBPS: return 40000;
91 case IB_RATE_60_GBPS: return 60000;
92 case IB_RATE_80_GBPS: return 80000;
93 case IB_RATE_120_GBPS: return 120000;
94 case IB_RATE_14_GBPS: return 14062;
95 case IB_RATE_56_GBPS: return 56250;
96 case IB_RATE_112_GBPS: return 112500;
97 case IB_RATE_168_GBPS: return 168750;
98 case IB_RATE_25_GBPS: return 25781;
99 case IB_RATE_100_GBPS: return 103125;
100 case IB_RATE_200_GBPS: return 206250;
101 case IB_RATE_300_GBPS: return 309375;
102 default: return -1;
103 }
104}
105EXPORT_SYMBOL(ib_rate_to_mbps);
106
107enum rdma_transport_type 80enum rdma_transport_type
108rdma_node_get_transport(enum rdma_node_type node_type) 81rdma_node_get_transport(enum rdma_node_type node_type)
109{ 82{
@@ -277,13 +250,6 @@ struct ib_srq *ib_create_srq(struct ib_pd *pd,
277 srq->uobject = NULL; 250 srq->uobject = NULL;
278 srq->event_handler = srq_init_attr->event_handler; 251 srq->event_handler = srq_init_attr->event_handler;
279 srq->srq_context = srq_init_attr->srq_context; 252 srq->srq_context = srq_init_attr->srq_context;
280 srq->srq_type = srq_init_attr->srq_type;
281 if (srq->srq_type == IB_SRQT_XRC) {
282 srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
283 srq->ext.xrc.cq = srq_init_attr->ext.xrc.cq;
284 atomic_inc(&srq->ext.xrc.xrcd->usecnt);
285 atomic_inc(&srq->ext.xrc.cq->usecnt);
286 }
287 atomic_inc(&pd->usecnt); 253 atomic_inc(&pd->usecnt);
288 atomic_set(&srq->usecnt, 0); 254 atomic_set(&srq->usecnt, 0);
289 } 255 }
@@ -313,29 +279,16 @@ EXPORT_SYMBOL(ib_query_srq);
313int ib_destroy_srq(struct ib_srq *srq) 279int ib_destroy_srq(struct ib_srq *srq)
314{ 280{
315 struct ib_pd *pd; 281 struct ib_pd *pd;
316 enum ib_srq_type srq_type;
317 struct ib_xrcd *uninitialized_var(xrcd);
318 struct ib_cq *uninitialized_var(cq);
319 int ret; 282 int ret;
320 283
321 if (atomic_read(&srq->usecnt)) 284 if (atomic_read(&srq->usecnt))
322 return -EBUSY; 285 return -EBUSY;
323 286
324 pd = srq->pd; 287 pd = srq->pd;
325 srq_type = srq->srq_type;
326 if (srq_type == IB_SRQT_XRC) {
327 xrcd = srq->ext.xrc.xrcd;
328 cq = srq->ext.xrc.cq;
329 }
330 288
331 ret = srq->device->destroy_srq(srq); 289 ret = srq->device->destroy_srq(srq);
332 if (!ret) { 290 if (!ret)
333 atomic_dec(&pd->usecnt); 291 atomic_dec(&pd->usecnt);
334 if (srq_type == IB_SRQT_XRC) {
335 atomic_dec(&xrcd->usecnt);
336 atomic_dec(&cq->usecnt);
337 }
338 }
339 292
340 return ret; 293 return ret;
341} 294}
@@ -343,123 +296,28 @@ EXPORT_SYMBOL(ib_destroy_srq);
343 296
344/* Queue pairs */ 297/* Queue pairs */
345 298
346static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
347{
348 struct ib_qp *qp = context;
349
350 list_for_each_entry(event->element.qp, &qp->open_list, open_list)
351 event->element.qp->event_handler(event, event->element.qp->qp_context);
352}
353
354static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
355{
356 mutex_lock(&xrcd->tgt_qp_mutex);
357 list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
358 mutex_unlock(&xrcd->tgt_qp_mutex);
359}
360
361static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
362 void (*event_handler)(struct ib_event *, void *),
363 void *qp_context)
364{
365 struct ib_qp *qp;
366 unsigned long flags;
367
368 qp = kzalloc(sizeof *qp, GFP_KERNEL);
369 if (!qp)
370 return ERR_PTR(-ENOMEM);
371
372 qp->real_qp = real_qp;
373 atomic_inc(&real_qp->usecnt);
374 qp->device = real_qp->device;
375 qp->event_handler = event_handler;
376 qp->qp_context = qp_context;
377 qp->qp_num = real_qp->qp_num;
378 qp->qp_type = real_qp->qp_type;
379
380 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
381 list_add(&qp->open_list, &real_qp->open_list);
382 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
383
384 return qp;
385}
386
387struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
388 struct ib_qp_open_attr *qp_open_attr)
389{
390 struct ib_qp *qp, *real_qp;
391
392 if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
393 return ERR_PTR(-EINVAL);
394
395 qp = ERR_PTR(-EINVAL);
396 mutex_lock(&xrcd->tgt_qp_mutex);
397 list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
398 if (real_qp->qp_num == qp_open_attr->qp_num) {
399 qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
400 qp_open_attr->qp_context);
401 break;
402 }
403 }
404 mutex_unlock(&xrcd->tgt_qp_mutex);
405 return qp;
406}
407EXPORT_SYMBOL(ib_open_qp);
408
409struct ib_qp *ib_create_qp(struct ib_pd *pd, 299struct ib_qp *ib_create_qp(struct ib_pd *pd,
410 struct ib_qp_init_attr *qp_init_attr) 300 struct ib_qp_init_attr *qp_init_attr)
411{ 301{
412 struct ib_qp *qp, *real_qp; 302 struct ib_qp *qp;
413 struct ib_device *device;
414 303
415 device = pd ? pd->device : qp_init_attr->xrcd->device; 304 qp = pd->device->create_qp(pd, qp_init_attr, NULL);
416 qp = device->create_qp(pd, qp_init_attr, NULL);
417 305
418 if (!IS_ERR(qp)) { 306 if (!IS_ERR(qp)) {
419 qp->device = device; 307 qp->device = pd->device;
420 qp->real_qp = qp; 308 qp->pd = pd;
421 qp->uobject = NULL; 309 qp->send_cq = qp_init_attr->send_cq;
422 qp->qp_type = qp_init_attr->qp_type; 310 qp->recv_cq = qp_init_attr->recv_cq;
423 311 qp->srq = qp_init_attr->srq;
424 atomic_set(&qp->usecnt, 0); 312 qp->uobject = NULL;
425 if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { 313 qp->event_handler = qp_init_attr->event_handler;
426 qp->event_handler = __ib_shared_qp_event_handler; 314 qp->qp_context = qp_init_attr->qp_context;
427 qp->qp_context = qp; 315 qp->qp_type = qp_init_attr->qp_type;
428 qp->pd = NULL; 316 atomic_inc(&pd->usecnt);
429 qp->send_cq = qp->recv_cq = NULL; 317 atomic_inc(&qp_init_attr->send_cq->usecnt);
430 qp->srq = NULL; 318 atomic_inc(&qp_init_attr->recv_cq->usecnt);
431 qp->xrcd = qp_init_attr->xrcd; 319 if (qp_init_attr->srq)
432 atomic_inc(&qp_init_attr->xrcd->usecnt); 320 atomic_inc(&qp_init_attr->srq->usecnt);
433 INIT_LIST_HEAD(&qp->open_list);
434
435 real_qp = qp;
436 qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
437 qp_init_attr->qp_context);
438 if (!IS_ERR(qp))
439 __ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
440 else
441 real_qp->device->destroy_qp(real_qp);
442 } else {
443 qp->event_handler = qp_init_attr->event_handler;
444 qp->qp_context = qp_init_attr->qp_context;
445 if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
446 qp->recv_cq = NULL;
447 qp->srq = NULL;
448 } else {
449 qp->recv_cq = qp_init_attr->recv_cq;
450 atomic_inc(&qp_init_attr->recv_cq->usecnt);
451 qp->srq = qp_init_attr->srq;
452 if (qp->srq)
453 atomic_inc(&qp_init_attr->srq->usecnt);
454 }
455
456 qp->pd = pd;
457 qp->send_cq = qp_init_attr->send_cq;
458 qp->xrcd = NULL;
459
460 atomic_inc(&pd->usecnt);
461 atomic_inc(&qp_init_attr->send_cq->usecnt);
462 }
463 } 321 }
464 322
465 return qp; 323 return qp;
@@ -468,8 +326,8 @@ EXPORT_SYMBOL(ib_create_qp);
468 326
469static const struct { 327static const struct {
470 int valid; 328 int valid;
471 enum ib_qp_attr_mask req_param[IB_QPT_MAX]; 329 enum ib_qp_attr_mask req_param[IB_QPT_RAW_ETHERTYPE + 1];
472 enum ib_qp_attr_mask opt_param[IB_QPT_MAX]; 330 enum ib_qp_attr_mask opt_param[IB_QPT_RAW_ETHERTYPE + 1];
473} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = { 331} qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
474 [IB_QPS_RESET] = { 332 [IB_QPS_RESET] = {
475 [IB_QPS_RESET] = { .valid = 1 }, 333 [IB_QPS_RESET] = { .valid = 1 },
@@ -479,19 +337,12 @@ static const struct {
479 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 337 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
480 IB_QP_PORT | 338 IB_QP_PORT |
481 IB_QP_QKEY), 339 IB_QP_QKEY),
482 [IB_QPT_RAW_PACKET] = IB_QP_PORT,
483 [IB_QPT_UC] = (IB_QP_PKEY_INDEX | 340 [IB_QPT_UC] = (IB_QP_PKEY_INDEX |
484 IB_QP_PORT | 341 IB_QP_PORT |
485 IB_QP_ACCESS_FLAGS), 342 IB_QP_ACCESS_FLAGS),
486 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 343 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
487 IB_QP_PORT | 344 IB_QP_PORT |
488 IB_QP_ACCESS_FLAGS), 345 IB_QP_ACCESS_FLAGS),
489 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
490 IB_QP_PORT |
491 IB_QP_ACCESS_FLAGS),
492 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
493 IB_QP_PORT |
494 IB_QP_ACCESS_FLAGS),
495 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 346 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
496 IB_QP_QKEY), 347 IB_QP_QKEY),
497 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 348 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -514,12 +365,6 @@ static const struct {
514 [IB_QPT_RC] = (IB_QP_PKEY_INDEX | 365 [IB_QPT_RC] = (IB_QP_PKEY_INDEX |
515 IB_QP_PORT | 366 IB_QP_PORT |
516 IB_QP_ACCESS_FLAGS), 367 IB_QP_ACCESS_FLAGS),
517 [IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX |
518 IB_QP_PORT |
519 IB_QP_ACCESS_FLAGS),
520 [IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX |
521 IB_QP_PORT |
522 IB_QP_ACCESS_FLAGS),
523 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 368 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
524 IB_QP_QKEY), 369 IB_QP_QKEY),
525 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 370 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -539,16 +384,6 @@ static const struct {
539 IB_QP_RQ_PSN | 384 IB_QP_RQ_PSN |
540 IB_QP_MAX_DEST_RD_ATOMIC | 385 IB_QP_MAX_DEST_RD_ATOMIC |
541 IB_QP_MIN_RNR_TIMER), 386 IB_QP_MIN_RNR_TIMER),
542 [IB_QPT_XRC_INI] = (IB_QP_AV |
543 IB_QP_PATH_MTU |
544 IB_QP_DEST_QPN |
545 IB_QP_RQ_PSN),
546 [IB_QPT_XRC_TGT] = (IB_QP_AV |
547 IB_QP_PATH_MTU |
548 IB_QP_DEST_QPN |
549 IB_QP_RQ_PSN |
550 IB_QP_MAX_DEST_RD_ATOMIC |
551 IB_QP_MIN_RNR_TIMER),
552 }, 387 },
553 .opt_param = { 388 .opt_param = {
554 [IB_QPT_UD] = (IB_QP_PKEY_INDEX | 389 [IB_QPT_UD] = (IB_QP_PKEY_INDEX |
@@ -559,12 +394,6 @@ static const struct {
559 [IB_QPT_RC] = (IB_QP_ALT_PATH | 394 [IB_QPT_RC] = (IB_QP_ALT_PATH |
560 IB_QP_ACCESS_FLAGS | 395 IB_QP_ACCESS_FLAGS |
561 IB_QP_PKEY_INDEX), 396 IB_QP_PKEY_INDEX),
562 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH |
563 IB_QP_ACCESS_FLAGS |
564 IB_QP_PKEY_INDEX),
565 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH |
566 IB_QP_ACCESS_FLAGS |
567 IB_QP_PKEY_INDEX),
568 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 397 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
569 IB_QP_QKEY), 398 IB_QP_QKEY),
570 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 399 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -585,13 +414,6 @@ static const struct {
585 IB_QP_RNR_RETRY | 414 IB_QP_RNR_RETRY |
586 IB_QP_SQ_PSN | 415 IB_QP_SQ_PSN |
587 IB_QP_MAX_QP_RD_ATOMIC), 416 IB_QP_MAX_QP_RD_ATOMIC),
588 [IB_QPT_XRC_INI] = (IB_QP_TIMEOUT |
589 IB_QP_RETRY_CNT |
590 IB_QP_RNR_RETRY |
591 IB_QP_SQ_PSN |
592 IB_QP_MAX_QP_RD_ATOMIC),
593 [IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT |
594 IB_QP_SQ_PSN),
595 [IB_QPT_SMI] = IB_QP_SQ_PSN, 417 [IB_QPT_SMI] = IB_QP_SQ_PSN,
596 [IB_QPT_GSI] = IB_QP_SQ_PSN, 418 [IB_QPT_GSI] = IB_QP_SQ_PSN,
597 }, 419 },
@@ -607,15 +429,6 @@ static const struct {
607 IB_QP_ACCESS_FLAGS | 429 IB_QP_ACCESS_FLAGS |
608 IB_QP_MIN_RNR_TIMER | 430 IB_QP_MIN_RNR_TIMER |
609 IB_QP_PATH_MIG_STATE), 431 IB_QP_PATH_MIG_STATE),
610 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
611 IB_QP_ALT_PATH |
612 IB_QP_ACCESS_FLAGS |
613 IB_QP_PATH_MIG_STATE),
614 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
615 IB_QP_ALT_PATH |
616 IB_QP_ACCESS_FLAGS |
617 IB_QP_MIN_RNR_TIMER |
618 IB_QP_PATH_MIG_STATE),
619 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 432 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
620 IB_QP_QKEY), 433 IB_QP_QKEY),
621 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 434 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -640,15 +453,6 @@ static const struct {
640 IB_QP_ALT_PATH | 453 IB_QP_ALT_PATH |
641 IB_QP_PATH_MIG_STATE | 454 IB_QP_PATH_MIG_STATE |
642 IB_QP_MIN_RNR_TIMER), 455 IB_QP_MIN_RNR_TIMER),
643 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
644 IB_QP_ACCESS_FLAGS |
645 IB_QP_ALT_PATH |
646 IB_QP_PATH_MIG_STATE),
647 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
648 IB_QP_ACCESS_FLAGS |
649 IB_QP_ALT_PATH |
650 IB_QP_PATH_MIG_STATE |
651 IB_QP_MIN_RNR_TIMER),
652 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 456 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
653 IB_QP_QKEY), 457 IB_QP_QKEY),
654 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 458 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -661,8 +465,6 @@ static const struct {
661 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY, 465 [IB_QPT_UD] = IB_QP_EN_SQD_ASYNC_NOTIFY,
662 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 466 [IB_QPT_UC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
663 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY, 467 [IB_QPT_RC] = IB_QP_EN_SQD_ASYNC_NOTIFY,
664 [IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
665 [IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
666 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY, 468 [IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
667 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY 469 [IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
668 } 470 }
@@ -685,15 +487,6 @@ static const struct {
685 IB_QP_ACCESS_FLAGS | 487 IB_QP_ACCESS_FLAGS |
686 IB_QP_MIN_RNR_TIMER | 488 IB_QP_MIN_RNR_TIMER |
687 IB_QP_PATH_MIG_STATE), 489 IB_QP_PATH_MIG_STATE),
688 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE |
689 IB_QP_ALT_PATH |
690 IB_QP_ACCESS_FLAGS |
691 IB_QP_PATH_MIG_STATE),
692 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE |
693 IB_QP_ALT_PATH |
694 IB_QP_ACCESS_FLAGS |
695 IB_QP_MIN_RNR_TIMER |
696 IB_QP_PATH_MIG_STATE),
697 [IB_QPT_SMI] = (IB_QP_CUR_STATE | 490 [IB_QPT_SMI] = (IB_QP_CUR_STATE |
698 IB_QP_QKEY), 491 IB_QP_QKEY),
699 [IB_QPT_GSI] = (IB_QP_CUR_STATE | 492 [IB_QPT_GSI] = (IB_QP_CUR_STATE |
@@ -722,25 +515,6 @@ static const struct {
722 IB_QP_PKEY_INDEX | 515 IB_QP_PKEY_INDEX |
723 IB_QP_MIN_RNR_TIMER | 516 IB_QP_MIN_RNR_TIMER |
724 IB_QP_PATH_MIG_STATE), 517 IB_QP_PATH_MIG_STATE),
725 [IB_QPT_XRC_INI] = (IB_QP_PORT |
726 IB_QP_AV |
727 IB_QP_TIMEOUT |
728 IB_QP_RETRY_CNT |
729 IB_QP_RNR_RETRY |
730 IB_QP_MAX_QP_RD_ATOMIC |
731 IB_QP_ALT_PATH |
732 IB_QP_ACCESS_FLAGS |
733 IB_QP_PKEY_INDEX |
734 IB_QP_PATH_MIG_STATE),
735 [IB_QPT_XRC_TGT] = (IB_QP_PORT |
736 IB_QP_AV |
737 IB_QP_TIMEOUT |
738 IB_QP_MAX_DEST_RD_ATOMIC |
739 IB_QP_ALT_PATH |
740 IB_QP_ACCESS_FLAGS |
741 IB_QP_PKEY_INDEX |
742 IB_QP_MIN_RNR_TIMER |
743 IB_QP_PATH_MIG_STATE),
744 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX | 518 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX |
745 IB_QP_QKEY), 519 IB_QP_QKEY),
746 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX | 520 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX |
@@ -805,7 +579,7 @@ int ib_modify_qp(struct ib_qp *qp,
805 struct ib_qp_attr *qp_attr, 579 struct ib_qp_attr *qp_attr,
806 int qp_attr_mask) 580 int qp_attr_mask)
807{ 581{
808 return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); 582 return qp->device->modify_qp(qp, qp_attr, qp_attr_mask, NULL);
809} 583}
810EXPORT_SYMBOL(ib_modify_qp); 584EXPORT_SYMBOL(ib_modify_qp);
811 585
@@ -815,59 +589,11 @@ int ib_query_qp(struct ib_qp *qp,
815 struct ib_qp_init_attr *qp_init_attr) 589 struct ib_qp_init_attr *qp_init_attr)
816{ 590{
817 return qp->device->query_qp ? 591 return qp->device->query_qp ?
818 qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) : 592 qp->device->query_qp(qp, qp_attr, qp_attr_mask, qp_init_attr) :
819 -ENOSYS; 593 -ENOSYS;
820} 594}
821EXPORT_SYMBOL(ib_query_qp); 595EXPORT_SYMBOL(ib_query_qp);
822 596
823int ib_close_qp(struct ib_qp *qp)
824{
825 struct ib_qp *real_qp;
826 unsigned long flags;
827
828 real_qp = qp->real_qp;
829 if (real_qp == qp)
830 return -EINVAL;
831
832 spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
833 list_del(&qp->open_list);
834 spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
835
836 atomic_dec(&real_qp->usecnt);
837 kfree(qp);
838
839 return 0;
840}
841EXPORT_SYMBOL(ib_close_qp);
842
843static int __ib_destroy_shared_qp(struct ib_qp *qp)
844{
845 struct ib_xrcd *xrcd;
846 struct ib_qp *real_qp;
847 int ret;
848
849 real_qp = qp->real_qp;
850 xrcd = real_qp->xrcd;
851
852 mutex_lock(&xrcd->tgt_qp_mutex);
853 ib_close_qp(qp);
854 if (atomic_read(&real_qp->usecnt) == 0)
855 list_del(&real_qp->xrcd_list);
856 else
857 real_qp = NULL;
858 mutex_unlock(&xrcd->tgt_qp_mutex);
859
860 if (real_qp) {
861 ret = ib_destroy_qp(real_qp);
862 if (!ret)
863 atomic_dec(&xrcd->usecnt);
864 else
865 __ib_insert_xrcd_qp(xrcd, real_qp);
866 }
867
868 return 0;
869}
870
871int ib_destroy_qp(struct ib_qp *qp) 597int ib_destroy_qp(struct ib_qp *qp)
872{ 598{
873 struct ib_pd *pd; 599 struct ib_pd *pd;
@@ -875,25 +601,16 @@ int ib_destroy_qp(struct ib_qp *qp)
875 struct ib_srq *srq; 601 struct ib_srq *srq;
876 int ret; 602 int ret;
877 603
878 if (atomic_read(&qp->usecnt)) 604 pd = qp->pd;
879 return -EBUSY; 605 scq = qp->send_cq;
880 606 rcq = qp->recv_cq;
881 if (qp->real_qp != qp) 607 srq = qp->srq;
882 return __ib_destroy_shared_qp(qp);
883
884 pd = qp->pd;
885 scq = qp->send_cq;
886 rcq = qp->recv_cq;
887 srq = qp->srq;
888 608
889 ret = qp->device->destroy_qp(qp); 609 ret = qp->device->destroy_qp(qp);
890 if (!ret) { 610 if (!ret) {
891 if (pd) 611 atomic_dec(&pd->usecnt);
892 atomic_dec(&pd->usecnt); 612 atomic_dec(&scq->usecnt);
893 if (scq) 613 atomic_dec(&rcq->usecnt);
894 atomic_dec(&scq->usecnt);
895 if (rcq)
896 atomic_dec(&rcq->usecnt);
897 if (srq) 614 if (srq)
898 atomic_dec(&srq->usecnt); 615 atomic_dec(&srq->usecnt);
899 } 616 }
@@ -1184,71 +901,22 @@ EXPORT_SYMBOL(ib_dealloc_fmr);
1184 901
1185int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 902int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
1186{ 903{
1187 int ret;
1188
1189 if (!qp->device->attach_mcast) 904 if (!qp->device->attach_mcast)
1190 return -ENOSYS; 905 return -ENOSYS;
1191 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 906 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
1192 return -EINVAL; 907 return -EINVAL;
1193 908
1194 ret = qp->device->attach_mcast(qp, gid, lid); 909 return qp->device->attach_mcast(qp, gid, lid);
1195 if (!ret)
1196 atomic_inc(&qp->usecnt);
1197 return ret;
1198} 910}
1199EXPORT_SYMBOL(ib_attach_mcast); 911EXPORT_SYMBOL(ib_attach_mcast);
1200 912
1201int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid) 913int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
1202{ 914{
1203 int ret;
1204
1205 if (!qp->device->detach_mcast) 915 if (!qp->device->detach_mcast)
1206 return -ENOSYS; 916 return -ENOSYS;
1207 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD) 917 if (gid->raw[0] != 0xff || qp->qp_type != IB_QPT_UD)
1208 return -EINVAL; 918 return -EINVAL;
1209 919
1210 ret = qp->device->detach_mcast(qp, gid, lid); 920 return qp->device->detach_mcast(qp, gid, lid);
1211 if (!ret)
1212 atomic_dec(&qp->usecnt);
1213 return ret;
1214} 921}
1215EXPORT_SYMBOL(ib_detach_mcast); 922EXPORT_SYMBOL(ib_detach_mcast);
1216
1217struct ib_xrcd *ib_alloc_xrcd(struct ib_device *device)
1218{
1219 struct ib_xrcd *xrcd;
1220
1221 if (!device->alloc_xrcd)
1222 return ERR_PTR(-ENOSYS);
1223
1224 xrcd = device->alloc_xrcd(device, NULL, NULL);
1225 if (!IS_ERR(xrcd)) {
1226 xrcd->device = device;
1227 xrcd->inode = NULL;
1228 atomic_set(&xrcd->usecnt, 0);
1229 mutex_init(&xrcd->tgt_qp_mutex);
1230 INIT_LIST_HEAD(&xrcd->tgt_qp_list);
1231 }
1232
1233 return xrcd;
1234}
1235EXPORT_SYMBOL(ib_alloc_xrcd);
1236
1237int ib_dealloc_xrcd(struct ib_xrcd *xrcd)
1238{
1239 struct ib_qp *qp;
1240 int ret;
1241
1242 if (atomic_read(&xrcd->usecnt))
1243 return -EBUSY;
1244
1245 while (!list_empty(&xrcd->tgt_qp_list)) {
1246 qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
1247 ret = ib_destroy_qp(qp);
1248 if (ret)
1249 return ret;
1250 }
1251
1252 return xrcd->device->dealloc_xrcd(xrcd);
1253}
1254EXPORT_SYMBOL(ib_dealloc_xrcd);
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 7275e727e0f..444470a28de 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -800,10 +800,13 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
800 /* Loop thru additional data fragments and queue them */ 800 /* Loop thru additional data fragments and queue them */
801 if (skb_shinfo(skb)->nr_frags) { 801 if (skb_shinfo(skb)->nr_frags) {
802 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 802 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
803 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 803 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
804 maplen = skb_frag_size(frag); 804 maplen = frag->size;
805 mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, 805 mapaddr =
806 0, maplen, DMA_TO_DEVICE); 806 pci_map_page(c2dev->pcidev, frag->page,
807 frag->page_offset, maplen,
808 PCI_DMA_TODEVICE);
809
807 elem = elem->next; 810 elem = elem->next;
808 elem->skb = NULL; 811 elem->skb = NULL;
809 elem->mapaddr = mapaddr; 812 elem->mapaddr = mapaddr;
@@ -920,7 +923,8 @@ static struct net_device *c2_devinit(struct c2_dev *c2dev,
920 return netdev; 923 return netdev;
921} 924}
922 925
923static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) 926static int __devinit c2_probe(struct pci_dev *pcidev,
927 const struct pci_device_id *ent)
924{ 928{
925 int ret = 0, i; 929 int ret = 0, i;
926 unsigned long reg0_start, reg0_flags, reg0_len; 930 unsigned long reg0_start, reg0_flags, reg0_len;
@@ -1190,7 +1194,7 @@ static int c2_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
1190 return ret; 1194 return ret;
1191} 1195}
1192 1196
1193static void c2_remove(struct pci_dev *pcidev) 1197static void __devexit c2_remove(struct pci_dev *pcidev)
1194{ 1198{
1195 struct c2_dev *c2dev = pci_get_drvdata(pcidev); 1199 struct c2_dev *c2dev = pci_get_drvdata(pcidev);
1196 struct net_device *netdev = c2dev->netdev; 1200 struct net_device *netdev = c2dev->netdev;
@@ -1235,7 +1239,7 @@ static struct pci_driver c2_pci_driver = {
1235 .name = DRV_NAME, 1239 .name = DRV_NAME,
1236 .id_table = c2_pci_table, 1240 .id_table = c2_pci_table,
1237 .probe = c2_probe, 1241 .probe = c2_probe,
1238 .remove = c2_remove, 1242 .remove = __devexit_p(c2_remove),
1239}; 1243};
1240 1244
1241static int __init c2_init_module(void) 1245static int __init c2_init_module(void)
diff --git a/drivers/infiniband/hw/amso1100/c2.h b/drivers/infiniband/hw/amso1100/c2.h
index ba7a1208ff9..6ae698e6877 100644
--- a/drivers/infiniband/hw/amso1100/c2.h
+++ b/drivers/infiniband/hw/amso1100/c2.h
@@ -498,16 +498,16 @@ extern int c2_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
498 struct ib_send_wr **bad_wr); 498 struct ib_send_wr **bad_wr);
499extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr, 499extern int c2_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *ib_wr,
500 struct ib_recv_wr **bad_wr); 500 struct ib_recv_wr **bad_wr);
501extern void c2_init_qp_table(struct c2_dev *c2dev); 501extern void __devinit c2_init_qp_table(struct c2_dev *c2dev);
502extern void c2_cleanup_qp_table(struct c2_dev *c2dev); 502extern void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev);
503extern void c2_set_qp_state(struct c2_qp *, int); 503extern void c2_set_qp_state(struct c2_qp *, int);
504extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn); 504extern struct c2_qp *c2_find_qpn(struct c2_dev *c2dev, int qpn);
505 505
506/* PDs */ 506/* PDs */
507extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd); 507extern int c2_pd_alloc(struct c2_dev *c2dev, int privileged, struct c2_pd *pd);
508extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd); 508extern void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd);
509extern int c2_init_pd_table(struct c2_dev *c2dev); 509extern int __devinit c2_init_pd_table(struct c2_dev *c2dev);
510extern void c2_cleanup_pd_table(struct c2_dev *c2dev); 510extern void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev);
511 511
512/* CQs */ 512/* CQs */
513extern int c2_init_cq(struct c2_dev *c2dev, int entries, 513extern int c2_init_cq(struct c2_dev *c2dev, int entries,
diff --git a/drivers/infiniband/hw/amso1100/c2_ae.c b/drivers/infiniband/hw/amso1100/c2_ae.c
index 706cf97cbe8..24f9e3a90e8 100644
--- a/drivers/infiniband/hw/amso1100/c2_ae.c
+++ b/drivers/infiniband/hw/amso1100/c2_ae.c
@@ -288,11 +288,6 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
288 cm_event.private_data_len = 288 cm_event.private_data_len =
289 be32_to_cpu(req->private_data_length); 289 be32_to_cpu(req->private_data_length);
290 cm_event.private_data = req->private_data; 290 cm_event.private_data = req->private_data;
291 /*
292 * Until ird/ord negotiation via MPAv2 support is added, send
293 * max supported values
294 */
295 cm_event.ird = cm_event.ord = 128;
296 291
297 if (cm_id->event_handler) 292 if (cm_id->event_handler)
298 cm_id->event_handler(cm_id, &cm_event); 293 cm_id->event_handler(cm_id, &cm_event);
@@ -311,7 +306,6 @@ void c2_ae_event(struct c2_dev *c2dev, u32 mq_index)
311 if (cq->ibcq.event_handler) 306 if (cq->ibcq.event_handler)
312 cq->ibcq.event_handler(&ib_event, 307 cq->ibcq.event_handler(&ib_event,
313 cq->ibcq.cq_context); 308 cq->ibcq.cq_context);
314 break;
315 } 309 }
316 310
317 default: 311 default:
diff --git a/drivers/infiniband/hw/amso1100/c2_intr.c b/drivers/infiniband/hw/amso1100/c2_intr.c
index 8951db4ae29..0ebe4e806b8 100644
--- a/drivers/infiniband/hw/amso1100/c2_intr.c
+++ b/drivers/infiniband/hw/amso1100/c2_intr.c
@@ -183,11 +183,6 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
183 case IW_CM_EVENT_ESTABLISHED: 183 case IW_CM_EVENT_ESTABLISHED:
184 c2_set_qp_state(req->qp, 184 c2_set_qp_state(req->qp,
185 C2_QP_STATE_RTS); 185 C2_QP_STATE_RTS);
186 /*
187 * Until ird/ord negotiation via MPAv2 support is added, send
188 * max supported values
189 */
190 cm_event.ird = cm_event.ord = 128;
191 case IW_CM_EVENT_CLOSE: 186 case IW_CM_EVENT_CLOSE:
192 187
193 /* 188 /*
diff --git a/drivers/infiniband/hw/amso1100/c2_pd.c b/drivers/infiniband/hw/amso1100/c2_pd.c
index f3e81dc357b..161f2a28535 100644
--- a/drivers/infiniband/hw/amso1100/c2_pd.c
+++ b/drivers/infiniband/hw/amso1100/c2_pd.c
@@ -70,7 +70,7 @@ void c2_pd_free(struct c2_dev *c2dev, struct c2_pd *pd)
70 spin_unlock(&c2dev->pd_table.lock); 70 spin_unlock(&c2dev->pd_table.lock);
71} 71}
72 72
73int c2_init_pd_table(struct c2_dev *c2dev) 73int __devinit c2_init_pd_table(struct c2_dev *c2dev)
74{ 74{
75 75
76 c2dev->pd_table.last = 0; 76 c2dev->pd_table.last = 0;
@@ -84,7 +84,7 @@ int c2_init_pd_table(struct c2_dev *c2dev)
84 return 0; 84 return 0;
85} 85}
86 86
87void c2_cleanup_pd_table(struct c2_dev *c2dev) 87void __devexit c2_cleanup_pd_table(struct c2_dev *c2dev)
88{ 88{
89 kfree(c2dev->pd_table.table); 89 kfree(c2dev->pd_table.table);
90} 90}
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 07eb3a8067d..f101bb73be6 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -94,7 +94,7 @@ static int c2_query_port(struct ib_device *ibdev,
94 props->pkey_tbl_len = 1; 94 props->pkey_tbl_len = 1;
95 props->qkey_viol_cntr = 0; 95 props->qkey_viol_cntr = 0;
96 props->active_width = 1; 96 props->active_width = 1;
97 props->active_speed = IB_SPEED_SDR; 97 props->active_speed = 1;
98 98
99 return 0; 99 return 0;
100} 100}
@@ -753,7 +753,10 @@ static struct net_device *c2_pseudo_netdev_init(struct c2_dev *c2dev)
753 memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6); 753 memcpy_fromio(netdev->dev_addr, c2dev->kva + C2_REGS_RDMA_ENADDR, 6);
754 754
755 /* Print out the MAC address */ 755 /* Print out the MAC address */
756 pr_debug("%s: MAC %pM\n", netdev->name, netdev->dev_addr); 756 pr_debug("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X\n",
757 netdev->name,
758 netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
759 netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5]);
757 760
758#if 0 761#if 0
759 /* Disable network packets */ 762 /* Disable network packets */
diff --git a/drivers/infiniband/hw/amso1100/c2_qp.c b/drivers/infiniband/hw/amso1100/c2_qp.c
index 28cd5cb5185..0d7b6f23caf 100644
--- a/drivers/infiniband/hw/amso1100/c2_qp.c
+++ b/drivers/infiniband/hw/amso1100/c2_qp.c
@@ -1010,13 +1010,13 @@ out:
1010 return err; 1010 return err;
1011} 1011}
1012 1012
1013void c2_init_qp_table(struct c2_dev *c2dev) 1013void __devinit c2_init_qp_table(struct c2_dev *c2dev)
1014{ 1014{
1015 spin_lock_init(&c2dev->qp_table.lock); 1015 spin_lock_init(&c2dev->qp_table.lock);
1016 idr_init(&c2dev->qp_table.idr); 1016 idr_init(&c2dev->qp_table.idr);
1017} 1017}
1018 1018
1019void c2_cleanup_qp_table(struct c2_dev *c2dev) 1019void __devexit c2_cleanup_qp_table(struct c2_dev *c2dev)
1020{ 1020{
1021 idr_destroy(&c2dev->qp_table.idr); 1021 idr_destroy(&c2dev->qp_table.idr);
1022} 1022}
diff --git a/drivers/infiniband/hw/amso1100/c2_rnic.c b/drivers/infiniband/hw/amso1100/c2_rnic.c
index b7c98699005..8c81992fa6d 100644
--- a/drivers/infiniband/hw/amso1100/c2_rnic.c
+++ b/drivers/infiniband/hw/amso1100/c2_rnic.c
@@ -439,10 +439,10 @@ static int c2_rnic_close(struct c2_dev *c2dev)
439 439
440/* 440/*
441 * Called by c2_probe to initialize the RNIC. This principally 441 * Called by c2_probe to initialize the RNIC. This principally
442 * involves initializing the various limits and resource pools that 442 * involves initalizing the various limits and resouce pools that
443 * comprise the RNIC instance. 443 * comprise the RNIC instance.
444 */ 444 */
445int c2_rnic_init(struct c2_dev *c2dev) 445int __devinit c2_rnic_init(struct c2_dev *c2dev)
446{ 446{
447 int err; 447 int err;
448 u32 qsize, msgsize; 448 u32 qsize, msgsize;
@@ -611,7 +611,7 @@ int c2_rnic_init(struct c2_dev *c2dev)
611/* 611/*
612 * Called by c2_remove to cleanup the RNIC resources. 612 * Called by c2_remove to cleanup the RNIC resources.
613 */ 613 */
614void c2_rnic_term(struct c2_dev *c2dev) 614void __devexit c2_rnic_term(struct c2_dev *c2dev)
615{ 615{
616 616
617 /* Close the open adapter instance */ 617 /* Close the open adapter instance */
diff --git a/drivers/infiniband/hw/cxgb3/Makefile b/drivers/infiniband/hw/cxgb3/Makefile
index 2761364185a..621619c794e 100644
--- a/drivers/infiniband/hw/cxgb3/Makefile
+++ b/drivers/infiniband/hw/cxgb3/Makefile
@@ -1,4 +1,4 @@
1ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb3 1ccflags-y := -Idrivers/net/cxgb3
2 2
3obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o 3obj-$(CONFIG_INFINIBAND_CXGB3) += iw_cxgb3.o
4 4
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
index 3e094cd6a0e..e55ce7a428b 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
@@ -128,8 +128,9 @@ static void stop_ep_timer(struct iwch_ep *ep)
128{ 128{
129 PDBG("%s ep %p\n", __func__, ep); 129 PDBG("%s ep %p\n", __func__, ep);
130 if (!timer_pending(&ep->timer)) { 130 if (!timer_pending(&ep->timer)) {
131 WARN(1, "%s timer stopped when its not running! ep %p state %u\n", 131 printk(KERN_ERR "%s timer stopped when its not running! ep %p state %u\n",
132 __func__, ep, ep->com.state); 132 __func__, ep, ep->com.state);
133 WARN_ON(1);
133 return; 134 return;
134 } 135 }
135 del_timer_sync(&ep->timer); 136 del_timer_sync(&ep->timer);
@@ -752,11 +753,6 @@ static void connect_request_upcall(struct iwch_ep *ep)
752 event.private_data_len = ep->plen; 753 event.private_data_len = ep->plen;
753 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message); 754 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
754 event.provider_data = ep; 755 event.provider_data = ep;
755 /*
756 * Until ird/ord negotiation via MPAv2 support is added, send max
757 * supported values
758 */
759 event.ird = event.ord = 8;
760 if (state_read(&ep->parent_ep->com) != DEAD) { 756 if (state_read(&ep->parent_ep->com) != DEAD) {
761 get_ep(&ep->com); 757 get_ep(&ep->com);
762 ep->parent_ep->com.cm_id->event_handler( 758 ep->parent_ep->com.cm_id->event_handler(
@@ -774,11 +770,6 @@ static void established_upcall(struct iwch_ep *ep)
774 PDBG("%s ep %p\n", __func__, ep); 770 PDBG("%s ep %p\n", __func__, ep);
775 memset(&event, 0, sizeof(event)); 771 memset(&event, 0, sizeof(event));
776 event.event = IW_CM_EVENT_ESTABLISHED; 772 event.event = IW_CM_EVENT_ESTABLISHED;
777 /*
778 * Until ird/ord negotiation via MPAv2 support is added, send max
779 * supported values
780 */
781 event.ird = event.ord = 8;
782 if (ep->com.cm_id) { 773 if (ep->com.cm_id) {
783 PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid); 774 PDBG("%s ep %p tid %d\n", __func__, ep, ep->hwtid);
784 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 775 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
@@ -1337,6 +1328,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1337 struct iwch_ep *child_ep, *parent_ep = ctx; 1328 struct iwch_ep *child_ep, *parent_ep = ctx;
1338 struct cpl_pass_accept_req *req = cplhdr(skb); 1329 struct cpl_pass_accept_req *req = cplhdr(skb);
1339 unsigned int hwtid = GET_TID(req); 1330 unsigned int hwtid = GET_TID(req);
1331 struct neighbour *neigh;
1340 struct dst_entry *dst; 1332 struct dst_entry *dst;
1341 struct l2t_entry *l2t; 1333 struct l2t_entry *l2t;
1342 struct rtable *rt; 1334 struct rtable *rt;
@@ -1373,7 +1365,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1373 goto reject; 1365 goto reject;
1374 } 1366 }
1375 dst = &rt->dst; 1367 dst = &rt->dst;
1376 l2t = t3_l2t_get(tdev, dst, NULL, &req->peer_ip); 1368 rcu_read_lock();
1369 neigh = dst_get_neighbour(dst);
1370 l2t = t3_l2t_get(tdev, neigh, neigh->dev);
1371 rcu_read_unlock();
1377 if (!l2t) { 1372 if (!l2t) {
1378 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", 1373 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1379 __func__); 1374 __func__);
@@ -1679,7 +1674,7 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
1679 * T3A does 3 things when a TERM is received: 1674 * T3A does 3 things when a TERM is received:
1680 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet 1675 * 1) send up a CPL_RDMA_TERMINATE message with the TERM packet
1681 * 2) generate an async event on the QP with the TERMINATE opcode 1676 * 2) generate an async event on the QP with the TERMINATE opcode
1682 * 3) post a TERMINATE opcode cqe into the associated CQ. 1677 * 3) post a TERMINATE opcde cqe into the associated CQ.
1683 * 1678 *
1684 * For (1), we save the message in the qp for later consumer consumption. 1679 * For (1), we save the message in the qp for later consumer consumption.
1685 * For (2), we move the QP into TERMINATE, post a QP event and disconnect. 1680 * For (2), we move the QP into TERMINATE, post a QP event and disconnect.
@@ -1755,8 +1750,9 @@ static void ep_timeout(unsigned long arg)
1755 __state_set(&ep->com, ABORTING); 1750 __state_set(&ep->com, ABORTING);
1756 break; 1751 break;
1757 default: 1752 default:
1758 WARN(1, "%s unexpected state ep %p state %u\n", 1753 printk(KERN_ERR "%s unexpected state ep %p state %u\n",
1759 __func__, ep, ep->com.state); 1754 __func__, ep, ep->com.state);
1755 WARN_ON(1);
1760 abort = 0; 1756 abort = 0;
1761 } 1757 }
1762 spin_unlock_irqrestore(&ep->com.lock, flags); 1758 spin_unlock_irqrestore(&ep->com.lock, flags);
@@ -1883,6 +1879,7 @@ static int is_loopback_dst(struct iw_cm_id *cm_id)
1883int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1879int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1884{ 1880{
1885 struct iwch_dev *h = to_iwch_dev(cm_id->device); 1881 struct iwch_dev *h = to_iwch_dev(cm_id->device);
1882 struct neighbour *neigh;
1886 struct iwch_ep *ep; 1883 struct iwch_ep *ep;
1887 struct rtable *rt; 1884 struct rtable *rt;
1888 int err = 0; 1885 int err = 0;
@@ -1940,8 +1937,13 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
1940 goto fail3; 1937 goto fail3;
1941 } 1938 }
1942 ep->dst = &rt->dst; 1939 ep->dst = &rt->dst;
1943 ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL, 1940
1944 &cm_id->remote_addr.sin_addr.s_addr); 1941 rcu_read_lock();
1942 neigh = dst_get_neighbour(ep->dst);
1943
1944 /* get a l2t entry */
1945 ep->l2t = t3_l2t_get(ep->com.tdev, neigh, neigh->dev);
1946 rcu_read_unlock();
1945 if (!ep->l2t) { 1947 if (!ep->l2t) {
1946 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 1948 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1947 err = -ENOMEM; 1949 err = -ENOMEM;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_ev.c b/drivers/infiniband/hw/cxgb3/iwch_ev.c
index abcc9e76962..71e0d845da3 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_ev.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_ev.c
@@ -46,7 +46,6 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
46 struct ib_event event; 46 struct ib_event event;
47 struct iwch_qp_attributes attrs; 47 struct iwch_qp_attributes attrs;
48 struct iwch_qp *qhp; 48 struct iwch_qp *qhp;
49 unsigned long flag;
50 49
51 spin_lock(&rnicp->lock); 50 spin_lock(&rnicp->lock);
52 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe)); 51 qhp = get_qhp(rnicp, CQE_QPID(rsp_msg->cqe));
@@ -95,9 +94,7 @@ static void post_qp_event(struct iwch_dev *rnicp, struct iwch_cq *chp,
95 if (qhp->ibqp.event_handler) 94 if (qhp->ibqp.event_handler)
96 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); 95 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
97 96
98 spin_lock_irqsave(&chp->comp_handler_lock, flag);
99 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); 97 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
100 spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
101 98
102 if (atomic_dec_and_test(&qhp->refcnt)) 99 if (atomic_dec_and_test(&qhp->refcnt))
103 wake_up(&qhp->wait); 100 wake_up(&qhp->wait);
@@ -110,7 +107,6 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
110 struct iwch_cq *chp; 107 struct iwch_cq *chp;
111 struct iwch_qp *qhp; 108 struct iwch_qp *qhp;
112 u32 cqid = RSPQ_CQID(rsp_msg); 109 u32 cqid = RSPQ_CQID(rsp_msg);
113 unsigned long flag;
114 110
115 rnicp = (struct iwch_dev *) rdev_p->ulp; 111 rnicp = (struct iwch_dev *) rdev_p->ulp;
116 spin_lock(&rnicp->lock); 112 spin_lock(&rnicp->lock);
@@ -174,9 +170,7 @@ void iwch_ev_dispatch(struct cxio_rdev *rdev_p, struct sk_buff *skb)
174 */ 170 */
175 if (qhp->ep && SQ_TYPE(rsp_msg->cqe)) 171 if (qhp->ep && SQ_TYPE(rsp_msg->cqe))
176 dst_confirm(qhp->ep->dst); 172 dst_confirm(qhp->ep->dst);
177 spin_lock_irqsave(&chp->comp_handler_lock, flag);
178 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); 173 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
179 spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
180 break; 174 break;
181 175
182 case TPT_ERR_STAG: 176 case TPT_ERR_STAG:
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 0bdf09aa6f4..c7d9411f295 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -190,7 +190,6 @@ static struct ib_cq *iwch_create_cq(struct ib_device *ibdev, int entries, int ve
190 chp->rhp = rhp; 190 chp->rhp = rhp;
191 chp->ibcq.cqe = 1 << chp->cq.size_log2; 191 chp->ibcq.cqe = 1 << chp->cq.size_log2;
192 spin_lock_init(&chp->lock); 192 spin_lock_init(&chp->lock);
193 spin_lock_init(&chp->comp_handler_lock);
194 atomic_set(&chp->refcnt, 1); 193 atomic_set(&chp->refcnt, 1);
195 init_waitqueue_head(&chp->wait); 194 init_waitqueue_head(&chp->wait);
196 if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) { 195 if (insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid)) {
@@ -1227,7 +1226,7 @@ static int iwch_query_port(struct ib_device *ibdev,
1227 props->gid_tbl_len = 1; 1226 props->gid_tbl_len = 1;
1228 props->pkey_tbl_len = 1; 1227 props->pkey_tbl_len = 1;
1229 props->active_width = 2; 1228 props->active_width = 2;
1230 props->active_speed = IB_SPEED_DDR; 1229 props->active_speed = 2;
1231 props->max_msg_sz = -1; 1230 props->max_msg_sz = -1;
1232 1231
1233 return 0; 1232 return 0;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
index 87c14b0c5ac..9a342c9b220 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
@@ -103,7 +103,6 @@ struct iwch_cq {
103 struct iwch_dev *rhp; 103 struct iwch_dev *rhp;
104 struct t3_cq cq; 104 struct t3_cq cq;
105 spinlock_t lock; 105 spinlock_t lock;
106 spinlock_t comp_handler_lock;
107 atomic_t refcnt; 106 atomic_t refcnt;
108 wait_queue_head_t wait; 107 wait_queue_head_t wait;
109 u32 __user *user_rptr_addr; 108 u32 __user *user_rptr_addr;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
index 6de8463f453..ecd313f359a 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
@@ -803,7 +803,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
803 * Assumes qhp lock is held. 803 * Assumes qhp lock is held.
804 */ 804 */
805static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, 805static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
806 struct iwch_cq *schp) 806 struct iwch_cq *schp, unsigned long *flag)
807{ 807{
808 int count; 808 int count;
809 int flushed; 809 int flushed;
@@ -812,44 +812,38 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp,
812 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); 812 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
813 /* take a ref on the qhp since we must release the lock */ 813 /* take a ref on the qhp since we must release the lock */
814 atomic_inc(&qhp->refcnt); 814 atomic_inc(&qhp->refcnt);
815 spin_unlock(&qhp->lock); 815 spin_unlock_irqrestore(&qhp->lock, *flag);
816 816
817 /* locking hierarchy: cq lock first, then qp lock. */ 817 /* locking hierarchy: cq lock first, then qp lock. */
818 spin_lock(&rchp->lock); 818 spin_lock_irqsave(&rchp->lock, *flag);
819 spin_lock(&qhp->lock); 819 spin_lock(&qhp->lock);
820 cxio_flush_hw_cq(&rchp->cq); 820 cxio_flush_hw_cq(&rchp->cq);
821 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); 821 cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
822 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); 822 flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
823 spin_unlock(&qhp->lock); 823 spin_unlock(&qhp->lock);
824 spin_unlock(&rchp->lock); 824 spin_unlock_irqrestore(&rchp->lock, *flag);
825 if (flushed) { 825 if (flushed)
826 spin_lock(&rchp->comp_handler_lock);
827 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 826 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
828 spin_unlock(&rchp->comp_handler_lock);
829 }
830 827
831 /* locking hierarchy: cq lock first, then qp lock. */ 828 /* locking hierarchy: cq lock first, then qp lock. */
832 spin_lock(&schp->lock); 829 spin_lock_irqsave(&schp->lock, *flag);
833 spin_lock(&qhp->lock); 830 spin_lock(&qhp->lock);
834 cxio_flush_hw_cq(&schp->cq); 831 cxio_flush_hw_cq(&schp->cq);
835 cxio_count_scqes(&schp->cq, &qhp->wq, &count); 832 cxio_count_scqes(&schp->cq, &qhp->wq, &count);
836 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); 833 flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
837 spin_unlock(&qhp->lock); 834 spin_unlock(&qhp->lock);
838 spin_unlock(&schp->lock); 835 spin_unlock_irqrestore(&schp->lock, *flag);
839 if (flushed) { 836 if (flushed)
840 spin_lock(&schp->comp_handler_lock);
841 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); 837 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
842 spin_unlock(&schp->comp_handler_lock);
843 }
844 838
845 /* deref */ 839 /* deref */
846 if (atomic_dec_and_test(&qhp->refcnt)) 840 if (atomic_dec_and_test(&qhp->refcnt))
847 wake_up(&qhp->wait); 841 wake_up(&qhp->wait);
848 842
849 spin_lock(&qhp->lock); 843 spin_lock_irqsave(&qhp->lock, *flag);
850} 844}
851 845
852static void flush_qp(struct iwch_qp *qhp) 846static void flush_qp(struct iwch_qp *qhp, unsigned long *flag)
853{ 847{
854 struct iwch_cq *rchp, *schp; 848 struct iwch_cq *rchp, *schp;
855 849
@@ -859,19 +853,15 @@ static void flush_qp(struct iwch_qp *qhp)
859 if (qhp->ibqp.uobject) { 853 if (qhp->ibqp.uobject) {
860 cxio_set_wq_in_error(&qhp->wq); 854 cxio_set_wq_in_error(&qhp->wq);
861 cxio_set_cq_in_error(&rchp->cq); 855 cxio_set_cq_in_error(&rchp->cq);
862 spin_lock(&rchp->comp_handler_lock);
863 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 856 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
864 spin_unlock(&rchp->comp_handler_lock);
865 if (schp != rchp) { 857 if (schp != rchp) {
866 cxio_set_cq_in_error(&schp->cq); 858 cxio_set_cq_in_error(&schp->cq);
867 spin_lock(&schp->comp_handler_lock);
868 (*schp->ibcq.comp_handler)(&schp->ibcq, 859 (*schp->ibcq.comp_handler)(&schp->ibcq,
869 schp->ibcq.cq_context); 860 schp->ibcq.cq_context);
870 spin_unlock(&schp->comp_handler_lock);
871 } 861 }
872 return; 862 return;
873 } 863 }
874 __flush_qp(qhp, rchp, schp); 864 __flush_qp(qhp, rchp, schp, flag);
875} 865}
876 866
877 867
@@ -1030,7 +1020,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
1030 break; 1020 break;
1031 case IWCH_QP_STATE_ERROR: 1021 case IWCH_QP_STATE_ERROR:
1032 qhp->attr.state = IWCH_QP_STATE_ERROR; 1022 qhp->attr.state = IWCH_QP_STATE_ERROR;
1033 flush_qp(qhp); 1023 flush_qp(qhp, &flag);
1034 break; 1024 break;
1035 default: 1025 default:
1036 ret = -EINVAL; 1026 ret = -EINVAL;
@@ -1078,7 +1068,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
1078 } 1068 }
1079 switch (attrs->next_state) { 1069 switch (attrs->next_state) {
1080 case IWCH_QP_STATE_IDLE: 1070 case IWCH_QP_STATE_IDLE:
1081 flush_qp(qhp); 1071 flush_qp(qhp, &flag);
1082 qhp->attr.state = IWCH_QP_STATE_IDLE; 1072 qhp->attr.state = IWCH_QP_STATE_IDLE;
1083 qhp->attr.llp_stream_handle = NULL; 1073 qhp->attr.llp_stream_handle = NULL;
1084 put_ep(&qhp->ep->com); 1074 put_ep(&qhp->ep->com);
@@ -1132,7 +1122,7 @@ err:
1132 free=1; 1122 free=1;
1133 wake_up(&qhp->wait); 1123 wake_up(&qhp->wait);
1134 BUG_ON(!ep); 1124 BUG_ON(!ep);
1135 flush_qp(qhp); 1125 flush_qp(qhp, &flag);
1136out: 1126out:
1137 spin_unlock_irqrestore(&qhp->lock, flag); 1127 spin_unlock_irqrestore(&qhp->lock, flag);
1138 1128
diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile
index e11cf729994..cd20b1342ae 100644
--- a/drivers/infiniband/hw/cxgb4/Makefile
+++ b/drivers/infiniband/hw/cxgb4/Makefile
@@ -1,5 +1,5 @@
1ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 1ccflags-y := -Idrivers/net/cxgb4
2 2
3obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o 3obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o
4 4
5iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o id_table.o 5iw_cxgb4-y := device.o cm.o provider.o mem.o cq.o qp.o resource.o ev.o
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index c13745cde7f..daa93e942e1 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -38,12 +38,10 @@
38#include <linux/inetdevice.h> 38#include <linux/inetdevice.h>
39#include <linux/ip.h> 39#include <linux/ip.h>
40#include <linux/tcp.h> 40#include <linux/tcp.h>
41#include <linux/if_vlan.h>
42 41
43#include <net/neighbour.h> 42#include <net/neighbour.h>
44#include <net/netevent.h> 43#include <net/netevent.h>
45#include <net/route.h> 44#include <net/route.h>
46#include <net/tcp.h>
47 45
48#include "iw_cxgb4.h" 46#include "iw_cxgb4.h"
49 47
@@ -63,14 +61,6 @@ static char *states[] = {
63 NULL, 61 NULL,
64}; 62};
65 63
66static int nocong;
67module_param(nocong, int, 0644);
68MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
69
70static int enable_ecn;
71module_param(enable_ecn, int, 0644);
72MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
73
74static int dack_mode = 1; 64static int dack_mode = 1;
75module_param(dack_mode, int, 0644); 65module_param(dack_mode, int, 0644);
76MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)"); 66MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=1)");
@@ -113,8 +103,7 @@ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
113static int mpa_rev = 1; 103static int mpa_rev = 1;
114module_param(mpa_rev, int, 0644); 104module_param(mpa_rev, int, 0644);
115MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, " 105MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
116 "1 is RFC0544 spec compliant, 2 is IETF MPA Peer Connect Draft" 106 "1 is spec compliant. (default=1)");
117 " compliant (default=1)");
118 107
119static int markers_enabled; 108static int markers_enabled;
120module_param(markers_enabled, int, 0644); 109module_param(markers_enabled, int, 0644);
@@ -161,8 +150,9 @@ static void stop_ep_timer(struct c4iw_ep *ep)
161{ 150{
162 PDBG("%s ep %p\n", __func__, ep); 151 PDBG("%s ep %p\n", __func__, ep);
163 if (!timer_pending(&ep->timer)) { 152 if (!timer_pending(&ep->timer)) {
164 WARN(1, "%s timer stopped when its not running! " 153 printk(KERN_ERR "%s timer stopped when its not running! "
165 "ep %p state %u\n", __func__, ep, ep->com.state); 154 "ep %p state %u\n", __func__, ep, ep->com.state);
155 WARN_ON(1);
166 return; 156 return;
167 } 157 }
168 del_timer_sync(&ep->timer); 158 del_timer_sync(&ep->timer);
@@ -275,7 +265,6 @@ void _c4iw_free_ep(struct kref *kref)
275 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid); 265 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
276 dst_release(ep->dst); 266 dst_release(ep->dst);
277 cxgb4_l2t_release(ep->l2t); 267 cxgb4_l2t_release(ep->l2t);
278 remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid);
279 } 268 }
280 kfree(ep); 269 kfree(ep);
281} 270}
@@ -452,50 +441,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
452 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 441 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
453} 442}
454 443
455#define VLAN_NONE 0xfff
456#define FILTER_SEL_VLAN_NONE 0xffff
457#define FILTER_SEL_WIDTH_P_FC (3+1) /* port uses 3 bits, FCoE one bit */
458#define FILTER_SEL_WIDTH_VIN_P_FC \
459 (6 + 7 + FILTER_SEL_WIDTH_P_FC) /* 6 bits are unused, VF uses 7 bits*/
460#define FILTER_SEL_WIDTH_TAG_P_FC \
461 (3 + FILTER_SEL_WIDTH_VIN_P_FC) /* PF uses 3 bits */
462#define FILTER_SEL_WIDTH_VLD_TAG_P_FC (1 + FILTER_SEL_WIDTH_TAG_P_FC)
463
464static unsigned int select_ntuple(struct c4iw_dev *dev, struct dst_entry *dst,
465 struct l2t_entry *l2t)
466{
467 unsigned int ntuple = 0;
468 u32 viid;
469
470 switch (dev->rdev.lldi.filt_mode) {
471
472 /* default filter mode */
473 case HW_TPL_FR_MT_PR_IV_P_FC:
474 if (l2t->vlan == VLAN_NONE)
475 ntuple |= FILTER_SEL_VLAN_NONE << FILTER_SEL_WIDTH_P_FC;
476 else {
477 ntuple |= l2t->vlan << FILTER_SEL_WIDTH_P_FC;
478 ntuple |= 1 << FILTER_SEL_WIDTH_VLD_TAG_P_FC;
479 }
480 ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
481 FILTER_SEL_WIDTH_VLD_TAG_P_FC;
482 break;
483 case HW_TPL_FR_MT_PR_OV_P_FC: {
484 viid = cxgb4_port_viid(l2t->neigh->dev);
485
486 ntuple |= FW_VIID_VIN_GET(viid) << FILTER_SEL_WIDTH_P_FC;
487 ntuple |= FW_VIID_PFN_GET(viid) << FILTER_SEL_WIDTH_VIN_P_FC;
488 ntuple |= FW_VIID_VIVLD_GET(viid) << FILTER_SEL_WIDTH_TAG_P_FC;
489 ntuple |= l2t->lport << S_PORT | IPPROTO_TCP <<
490 FILTER_SEL_WIDTH_VLD_TAG_P_FC;
491 break;
492 }
493 default:
494 break;
495 }
496 return ntuple;
497}
498
499static int send_connect(struct c4iw_ep *ep) 444static int send_connect(struct c4iw_ep *ep)
500{ 445{
501 struct cpl_act_open_req *req; 446 struct cpl_act_open_req *req;
@@ -518,8 +463,7 @@ static int send_connect(struct c4iw_ep *ep)
518 463
519 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 464 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
520 wscale = compute_wscale(rcv_win); 465 wscale = compute_wscale(rcv_win);
521 opt0 = (nocong ? NO_CONG(1) : 0) | 466 opt0 = KEEP_ALIVE(1) |
522 KEEP_ALIVE(1) |
523 DELACK(1) | 467 DELACK(1) |
524 WND_SCALE(wscale) | 468 WND_SCALE(wscale) |
525 MSS_IDX(mtu_idx) | 469 MSS_IDX(mtu_idx) |
@@ -530,7 +474,6 @@ static int send_connect(struct c4iw_ep *ep)
530 ULP_MODE(ULP_MODE_TCPDDP) | 474 ULP_MODE(ULP_MODE_TCPDDP) |
531 RCV_BUFSIZ(rcv_win>>10); 475 RCV_BUFSIZ(rcv_win>>10);
532 opt2 = RX_CHANNEL(0) | 476 opt2 = RX_CHANNEL(0) |
533 CCTRL_ECN(enable_ecn) |
534 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid); 477 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
535 if (enable_tcp_timestamps) 478 if (enable_tcp_timestamps)
536 opt2 |= TSTAMPS_EN(1); 479 opt2 |= TSTAMPS_EN(1);
@@ -549,27 +492,22 @@ static int send_connect(struct c4iw_ep *ep)
549 req->local_ip = ep->com.local_addr.sin_addr.s_addr; 492 req->local_ip = ep->com.local_addr.sin_addr.s_addr;
550 req->peer_ip = ep->com.remote_addr.sin_addr.s_addr; 493 req->peer_ip = ep->com.remote_addr.sin_addr.s_addr;
551 req->opt0 = cpu_to_be64(opt0); 494 req->opt0 = cpu_to_be64(opt0);
552 req->params = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst, ep->l2t)); 495 req->params = 0;
553 req->opt2 = cpu_to_be32(opt2); 496 req->opt2 = cpu_to_be32(opt2);
554 set_bit(ACT_OPEN_REQ, &ep->com.history);
555 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); 497 return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
556} 498}
557 499
558static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, 500static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb)
559 u8 mpa_rev_to_use)
560{ 501{
561 int mpalen, wrlen; 502 int mpalen, wrlen;
562 struct fw_ofld_tx_data_wr *req; 503 struct fw_ofld_tx_data_wr *req;
563 struct mpa_message *mpa; 504 struct mpa_message *mpa;
564 struct mpa_v2_conn_params mpa_v2_params;
565 505
566 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 506 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
567 507
568 BUG_ON(skb_cloned(skb)); 508 BUG_ON(skb_cloned(skb));
569 509
570 mpalen = sizeof(*mpa) + ep->plen; 510 mpalen = sizeof(*mpa) + ep->plen;
571 if (mpa_rev_to_use == 2)
572 mpalen += sizeof(struct mpa_v2_conn_params);
573 wrlen = roundup(mpalen + sizeof *req, 16); 511 wrlen = roundup(mpalen + sizeof *req, 16);
574 skb = get_skb(skb, wrlen, GFP_KERNEL); 512 skb = get_skb(skb, wrlen, GFP_KERNEL);
575 if (!skb) { 513 if (!skb) {
@@ -595,41 +533,12 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
595 mpa = (struct mpa_message *)(req + 1); 533 mpa = (struct mpa_message *)(req + 1);
596 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)); 534 memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
597 mpa->flags = (crc_enabled ? MPA_CRC : 0) | 535 mpa->flags = (crc_enabled ? MPA_CRC : 0) |
598 (markers_enabled ? MPA_MARKERS : 0) | 536 (markers_enabled ? MPA_MARKERS : 0);
599 (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0);
600 mpa->private_data_size = htons(ep->plen); 537 mpa->private_data_size = htons(ep->plen);
601 mpa->revision = mpa_rev_to_use; 538 mpa->revision = mpa_rev;
602 if (mpa_rev_to_use == 1) {
603 ep->tried_with_mpa_v1 = 1;
604 ep->retry_with_mpa_v1 = 0;
605 }
606
607 if (mpa_rev_to_use == 2) {
608 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
609 sizeof (struct mpa_v2_conn_params));
610 mpa_v2_params.ird = htons((u16)ep->ird);
611 mpa_v2_params.ord = htons((u16)ep->ord);
612
613 if (peer2peer) {
614 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
615 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
616 mpa_v2_params.ord |=
617 htons(MPA_V2_RDMA_WRITE_RTR);
618 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
619 mpa_v2_params.ord |=
620 htons(MPA_V2_RDMA_READ_RTR);
621 }
622 memcpy(mpa->private_data, &mpa_v2_params,
623 sizeof(struct mpa_v2_conn_params));
624 539
625 if (ep->plen) 540 if (ep->plen)
626 memcpy(mpa->private_data + 541 memcpy(mpa->private_data, ep->mpa_pkt + sizeof(*mpa), ep->plen);
627 sizeof(struct mpa_v2_conn_params),
628 ep->mpa_pkt + sizeof(*mpa), ep->plen);
629 } else
630 if (ep->plen)
631 memcpy(mpa->private_data,
632 ep->mpa_pkt + sizeof(*mpa), ep->plen);
633 542
634 /* 543 /*
635 * Reference the mpa skb. This ensures the data area 544 * Reference the mpa skb. This ensures the data area
@@ -653,13 +562,10 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
653 struct fw_ofld_tx_data_wr *req; 562 struct fw_ofld_tx_data_wr *req;
654 struct mpa_message *mpa; 563 struct mpa_message *mpa;
655 struct sk_buff *skb; 564 struct sk_buff *skb;
656 struct mpa_v2_conn_params mpa_v2_params;
657 565
658 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 566 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
659 567
660 mpalen = sizeof(*mpa) + plen; 568 mpalen = sizeof(*mpa) + plen;
661 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
662 mpalen += sizeof(struct mpa_v2_conn_params);
663 wrlen = roundup(mpalen + sizeof *req, 16); 569 wrlen = roundup(mpalen + sizeof *req, 16);
664 570
665 skb = get_skb(NULL, wrlen, GFP_KERNEL); 571 skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -689,29 +595,8 @@ static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
689 mpa->flags = MPA_REJECT; 595 mpa->flags = MPA_REJECT;
690 mpa->revision = mpa_rev; 596 mpa->revision = mpa_rev;
691 mpa->private_data_size = htons(plen); 597 mpa->private_data_size = htons(plen);
692 598 if (plen)
693 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 599 memcpy(mpa->private_data, pdata, plen);
694 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
695 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
696 sizeof (struct mpa_v2_conn_params));
697 mpa_v2_params.ird = htons(((u16)ep->ird) |
698 (peer2peer ? MPA_V2_PEER2PEER_MODEL :
699 0));
700 mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
701 (p2p_type ==
702 FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
703 MPA_V2_RDMA_WRITE_RTR : p2p_type ==
704 FW_RI_INIT_P2PTYPE_READ_REQ ?
705 MPA_V2_RDMA_READ_RTR : 0) : 0));
706 memcpy(mpa->private_data, &mpa_v2_params,
707 sizeof(struct mpa_v2_conn_params));
708
709 if (ep->plen)
710 memcpy(mpa->private_data +
711 sizeof(struct mpa_v2_conn_params), pdata, plen);
712 } else
713 if (plen)
714 memcpy(mpa->private_data, pdata, plen);
715 600
716 /* 601 /*
717 * Reference the mpa skb again. This ensures the data area 602 * Reference the mpa skb again. This ensures the data area
@@ -732,13 +617,10 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
732 struct fw_ofld_tx_data_wr *req; 617 struct fw_ofld_tx_data_wr *req;
733 struct mpa_message *mpa; 618 struct mpa_message *mpa;
734 struct sk_buff *skb; 619 struct sk_buff *skb;
735 struct mpa_v2_conn_params mpa_v2_params;
736 620
737 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen); 621 PDBG("%s ep %p tid %u pd_len %d\n", __func__, ep, ep->hwtid, ep->plen);
738 622
739 mpalen = sizeof(*mpa) + plen; 623 mpalen = sizeof(*mpa) + plen;
740 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
741 mpalen += sizeof(struct mpa_v2_conn_params);
742 wrlen = roundup(mpalen + sizeof *req, 16); 624 wrlen = roundup(mpalen + sizeof *req, 16);
743 625
744 skb = get_skb(NULL, wrlen, GFP_KERNEL); 626 skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@ -767,36 +649,10 @@ static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
767 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key)); 649 memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
768 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) | 650 mpa->flags = (ep->mpa_attr.crc_enabled ? MPA_CRC : 0) |
769 (markers_enabled ? MPA_MARKERS : 0); 651 (markers_enabled ? MPA_MARKERS : 0);
770 mpa->revision = ep->mpa_attr.version; 652 mpa->revision = mpa_rev;
771 mpa->private_data_size = htons(plen); 653 mpa->private_data_size = htons(plen);
772 654 if (plen)
773 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 655 memcpy(mpa->private_data, pdata, plen);
774 mpa->flags |= MPA_ENHANCED_RDMA_CONN;
775 mpa->private_data_size = htons(ntohs(mpa->private_data_size) +
776 sizeof (struct mpa_v2_conn_params));
777 mpa_v2_params.ird = htons((u16)ep->ird);
778 mpa_v2_params.ord = htons((u16)ep->ord);
779 if (peer2peer && (ep->mpa_attr.p2p_type !=
780 FW_RI_INIT_P2PTYPE_DISABLED)) {
781 mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
782
783 if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
784 mpa_v2_params.ord |=
785 htons(MPA_V2_RDMA_WRITE_RTR);
786 else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
787 mpa_v2_params.ord |=
788 htons(MPA_V2_RDMA_READ_RTR);
789 }
790
791 memcpy(mpa->private_data, &mpa_v2_params,
792 sizeof(struct mpa_v2_conn_params));
793
794 if (ep->plen)
795 memcpy(mpa->private_data +
796 sizeof(struct mpa_v2_conn_params), pdata, plen);
797 } else
798 if (plen)
799 memcpy(mpa->private_data, pdata, plen);
800 656
801 /* 657 /*
802 * Reference the mpa skb. This ensures the data area 658 * Reference the mpa skb. This ensures the data area
@@ -828,7 +684,6 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
828 /* setup the hwtid for this connection */ 684 /* setup the hwtid for this connection */
829 ep->hwtid = tid; 685 ep->hwtid = tid;
830 cxgb4_insert_tid(t, ep, tid); 686 cxgb4_insert_tid(t, ep, tid);
831 insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
832 687
833 ep->snd_seq = be32_to_cpu(req->snd_isn); 688 ep->snd_seq = be32_to_cpu(req->snd_isn);
834 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 689 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
@@ -836,16 +691,11 @@ static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
836 set_emss(ep, ntohs(req->tcp_opt)); 691 set_emss(ep, ntohs(req->tcp_opt));
837 692
838 /* dealloc the atid */ 693 /* dealloc the atid */
839 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
840 cxgb4_free_atid(t, atid); 694 cxgb4_free_atid(t, atid);
841 set_bit(ACT_ESTAB, &ep->com.history);
842 695
843 /* start MPA negotiation */ 696 /* start MPA negotiation */
844 send_flowc(ep, NULL); 697 send_flowc(ep, NULL);
845 if (ep->retry_with_mpa_v1) 698 send_mpa_req(ep, skb);
846 send_mpa_req(ep, skb, 1);
847 else
848 send_mpa_req(ep, skb, mpa_rev);
849 699
850 return 0; 700 return 0;
851} 701}
@@ -864,7 +714,6 @@ static void close_complete_upcall(struct c4iw_ep *ep)
864 ep->com.cm_id->rem_ref(ep->com.cm_id); 714 ep->com.cm_id->rem_ref(ep->com.cm_id);
865 ep->com.cm_id = NULL; 715 ep->com.cm_id = NULL;
866 ep->com.qp = NULL; 716 ep->com.qp = NULL;
867 set_bit(CLOSE_UPCALL, &ep->com.history);
868 } 717 }
869} 718}
870 719
@@ -873,7 +722,6 @@ static int abort_connection(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
873 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 722 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
874 close_complete_upcall(ep); 723 close_complete_upcall(ep);
875 state_set(&ep->com, ABORTING); 724 state_set(&ep->com, ABORTING);
876 set_bit(ABORT_CONN, &ep->com.history);
877 return send_abort(ep, skb, gfp); 725 return send_abort(ep, skb, gfp);
878} 726}
879 727
@@ -888,7 +736,6 @@ static void peer_close_upcall(struct c4iw_ep *ep)
888 PDBG("peer close delivered ep %p cm_id %p tid %u\n", 736 PDBG("peer close delivered ep %p cm_id %p tid %u\n",
889 ep, ep->com.cm_id, ep->hwtid); 737 ep, ep->com.cm_id, ep->hwtid);
890 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 738 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
891 set_bit(DISCONN_UPCALL, &ep->com.history);
892 } 739 }
893} 740}
894 741
@@ -907,7 +754,6 @@ static void peer_abort_upcall(struct c4iw_ep *ep)
907 ep->com.cm_id->rem_ref(ep->com.cm_id); 754 ep->com.cm_id->rem_ref(ep->com.cm_id);
908 ep->com.cm_id = NULL; 755 ep->com.cm_id = NULL;
909 ep->com.qp = NULL; 756 ep->com.qp = NULL;
910 set_bit(ABORT_UPCALL, &ep->com.history);
911 } 757 }
912} 758}
913 759
@@ -923,24 +769,12 @@ static void connect_reply_upcall(struct c4iw_ep *ep, int status)
923 event.remote_addr = ep->com.remote_addr; 769 event.remote_addr = ep->com.remote_addr;
924 770
925 if ((status == 0) || (status == -ECONNREFUSED)) { 771 if ((status == 0) || (status == -ECONNREFUSED)) {
926 if (!ep->tried_with_mpa_v1) { 772 event.private_data_len = ep->plen;
927 /* this means MPA_v2 is used */ 773 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
928 event.private_data_len = ep->plen -
929 sizeof(struct mpa_v2_conn_params);
930 event.private_data = ep->mpa_pkt +
931 sizeof(struct mpa_message) +
932 sizeof(struct mpa_v2_conn_params);
933 } else {
934 /* this means MPA_v1 is used */
935 event.private_data_len = ep->plen;
936 event.private_data = ep->mpa_pkt +
937 sizeof(struct mpa_message);
938 }
939 } 774 }
940 775
941 PDBG("%s ep %p tid %u status %d\n", __func__, ep, 776 PDBG("%s ep %p tid %u status %d\n", __func__, ep,
942 ep->hwtid, status); 777 ep->hwtid, status);
943 set_bit(CONN_RPL_UPCALL, &ep->com.history);
944 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 778 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
945 779
946 if (status < 0) { 780 if (status < 0) {
@@ -959,29 +793,15 @@ static void connect_request_upcall(struct c4iw_ep *ep)
959 event.event = IW_CM_EVENT_CONNECT_REQUEST; 793 event.event = IW_CM_EVENT_CONNECT_REQUEST;
960 event.local_addr = ep->com.local_addr; 794 event.local_addr = ep->com.local_addr;
961 event.remote_addr = ep->com.remote_addr; 795 event.remote_addr = ep->com.remote_addr;
796 event.private_data_len = ep->plen;
797 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
962 event.provider_data = ep; 798 event.provider_data = ep;
963 if (!ep->tried_with_mpa_v1) {
964 /* this means MPA_v2 is used */
965 event.ord = ep->ord;
966 event.ird = ep->ird;
967 event.private_data_len = ep->plen -
968 sizeof(struct mpa_v2_conn_params);
969 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
970 sizeof(struct mpa_v2_conn_params);
971 } else {
972 /* this means MPA_v1 is used. Send max supported */
973 event.ord = c4iw_max_read_depth;
974 event.ird = c4iw_max_read_depth;
975 event.private_data_len = ep->plen;
976 event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
977 }
978 if (state_read(&ep->parent_ep->com) != DEAD) { 799 if (state_read(&ep->parent_ep->com) != DEAD) {
979 c4iw_get_ep(&ep->com); 800 c4iw_get_ep(&ep->com);
980 ep->parent_ep->com.cm_id->event_handler( 801 ep->parent_ep->com.cm_id->event_handler(
981 ep->parent_ep->com.cm_id, 802 ep->parent_ep->com.cm_id,
982 &event); 803 &event);
983 } 804 }
984 set_bit(CONNREQ_UPCALL, &ep->com.history);
985 c4iw_put_ep(&ep->parent_ep->com); 805 c4iw_put_ep(&ep->parent_ep->com);
986 ep->parent_ep = NULL; 806 ep->parent_ep = NULL;
987} 807}
@@ -993,12 +813,9 @@ static void established_upcall(struct c4iw_ep *ep)
993 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 813 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
994 memset(&event, 0, sizeof(event)); 814 memset(&event, 0, sizeof(event));
995 event.event = IW_CM_EVENT_ESTABLISHED; 815 event.event = IW_CM_EVENT_ESTABLISHED;
996 event.ird = ep->ird;
997 event.ord = ep->ord;
998 if (ep->com.cm_id) { 816 if (ep->com.cm_id) {
999 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 817 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1000 ep->com.cm_id->event_handler(ep->com.cm_id, &event); 818 ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1001 set_bit(ESTAB_UPCALL, &ep->com.history);
1002 } 819 }
1003} 820}
1004 821
@@ -1031,10 +848,7 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1031static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) 848static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1032{ 849{
1033 struct mpa_message *mpa; 850 struct mpa_message *mpa;
1034 struct mpa_v2_conn_params *mpa_v2_params;
1035 u16 plen; 851 u16 plen;
1036 u16 resp_ird, resp_ord;
1037 u8 rtr_mismatch = 0, insuff_ird = 0;
1038 struct c4iw_qp_attributes attrs; 852 struct c4iw_qp_attributes attrs;
1039 enum c4iw_qp_attr_mask mask; 853 enum c4iw_qp_attr_mask mask;
1040 int err; 854 int err;
@@ -1074,9 +888,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1074 mpa = (struct mpa_message *) ep->mpa_pkt; 888 mpa = (struct mpa_message *) ep->mpa_pkt;
1075 889
1076 /* Validate MPA header. */ 890 /* Validate MPA header. */
1077 if (mpa->revision > mpa_rev) { 891 if (mpa->revision != mpa_rev) {
1078 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1079 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1080 err = -EPROTO; 892 err = -EPROTO;
1081 goto err; 893 goto err;
1082 } 894 }
@@ -1126,66 +938,13 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1126 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 938 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1127 ep->mpa_attr.recv_marker_enabled = markers_enabled; 939 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1128 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 940 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1129 ep->mpa_attr.version = mpa->revision; 941 ep->mpa_attr.version = mpa_rev;
1130 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; 942 ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
1131 943 FW_RI_INIT_P2PTYPE_DISABLED;
1132 if (mpa->revision == 2) {
1133 ep->mpa_attr.enhanced_rdma_conn =
1134 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1135 if (ep->mpa_attr.enhanced_rdma_conn) {
1136 mpa_v2_params = (struct mpa_v2_conn_params *)
1137 (ep->mpa_pkt + sizeof(*mpa));
1138 resp_ird = ntohs(mpa_v2_params->ird) &
1139 MPA_V2_IRD_ORD_MASK;
1140 resp_ord = ntohs(mpa_v2_params->ord) &
1141 MPA_V2_IRD_ORD_MASK;
1142
1143 /*
1144 * This is a double-check. Ideally, below checks are
1145 * not required since ird/ord stuff has been taken
1146 * care of in c4iw_accept_cr
1147 */
1148 if ((ep->ird < resp_ord) || (ep->ord > resp_ird)) {
1149 err = -ENOMEM;
1150 ep->ird = resp_ord;
1151 ep->ord = resp_ird;
1152 insuff_ird = 1;
1153 }
1154
1155 if (ntohs(mpa_v2_params->ird) &
1156 MPA_V2_PEER2PEER_MODEL) {
1157 if (ntohs(mpa_v2_params->ord) &
1158 MPA_V2_RDMA_WRITE_RTR)
1159 ep->mpa_attr.p2p_type =
1160 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1161 else if (ntohs(mpa_v2_params->ord) &
1162 MPA_V2_RDMA_READ_RTR)
1163 ep->mpa_attr.p2p_type =
1164 FW_RI_INIT_P2PTYPE_READ_REQ;
1165 }
1166 }
1167 } else if (mpa->revision == 1)
1168 if (peer2peer)
1169 ep->mpa_attr.p2p_type = p2p_type;
1170
1171 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " 944 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1172 "xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = " 945 "xmit_marker_enabled=%d, version=%d\n", __func__,
1173 "%d\n", __func__, ep->mpa_attr.crc_enabled, 946 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1174 ep->mpa_attr.recv_marker_enabled, 947 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version);
1175 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1176 ep->mpa_attr.p2p_type, p2p_type);
1177
1178 /*
1179 * If responder's RTR does not match with that of initiator, assign
1180 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1181 * generated when moving QP to RTS state.
1182 * A TERM message will be sent after QP has moved to RTS state
1183 */
1184 if ((ep->mpa_attr.version == 2) && peer2peer &&
1185 (ep->mpa_attr.p2p_type != p2p_type)) {
1186 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1187 rtr_mismatch = 1;
1188 }
1189 948
1190 attrs.mpa_attr = ep->mpa_attr; 949 attrs.mpa_attr = ep->mpa_attr;
1191 attrs.max_ird = ep->ird; 950 attrs.max_ird = ep->ird;
@@ -1202,39 +961,6 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1202 ep->com.qp, mask, &attrs, 1); 961 ep->com.qp, mask, &attrs, 1);
1203 if (err) 962 if (err)
1204 goto err; 963 goto err;
1205
1206 /*
1207 * If responder's RTR requirement did not match with what initiator
1208 * supports, generate TERM message
1209 */
1210 if (rtr_mismatch) {
1211 printk(KERN_ERR "%s: RTR mismatch, sending TERM\n", __func__);
1212 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1213 attrs.ecode = MPA_NOMATCH_RTR;
1214 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1215 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1216 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1217 err = -ENOMEM;
1218 goto out;
1219 }
1220
1221 /*
1222 * Generate TERM if initiator IRD is not sufficient for responder
1223 * provided ORD. Currently, we do the same behaviour even when
1224 * responder provided IRD is also not sufficient as regards to
1225 * initiator ORD.
1226 */
1227 if (insuff_ird) {
1228 printk(KERN_ERR "%s: Insufficient IRD, sending TERM\n",
1229 __func__);
1230 attrs.layer_etype = LAYER_MPA | DDP_LLP;
1231 attrs.ecode = MPA_INSUFF_IRD;
1232 attrs.next_state = C4IW_QP_STATE_TERMINATE;
1233 err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1234 C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1235 err = -ENOMEM;
1236 goto out;
1237 }
1238 goto out; 964 goto out;
1239err: 965err:
1240 state_set(&ep->com, ABORTING); 966 state_set(&ep->com, ABORTING);
@@ -1247,7 +973,6 @@ out:
1247static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) 973static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1248{ 974{
1249 struct mpa_message *mpa; 975 struct mpa_message *mpa;
1250 struct mpa_v2_conn_params *mpa_v2_params;
1251 u16 plen; 976 u16 plen;
1252 977
1253 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 978 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@@ -1288,9 +1013,7 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1288 /* 1013 /*
1289 * Validate MPA Header. 1014 * Validate MPA Header.
1290 */ 1015 */
1291 if (mpa->revision > mpa_rev) { 1016 if (mpa->revision != mpa_rev) {
1292 printk(KERN_ERR MOD "%s MPA version mismatch. Local = %d,"
1293 " Received = %d\n", __func__, mpa_rev, mpa->revision);
1294 abort_connection(ep, skb, GFP_KERNEL); 1017 abort_connection(ep, skb, GFP_KERNEL);
1295 return; 1018 return;
1296 } 1019 }
@@ -1333,37 +1056,9 @@ static void process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1333 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0; 1056 ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1334 ep->mpa_attr.recv_marker_enabled = markers_enabled; 1057 ep->mpa_attr.recv_marker_enabled = markers_enabled;
1335 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0; 1058 ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1336 ep->mpa_attr.version = mpa->revision; 1059 ep->mpa_attr.version = mpa_rev;
1337 if (mpa->revision == 1) 1060 ep->mpa_attr.p2p_type = peer2peer ? p2p_type :
1338 ep->tried_with_mpa_v1 = 1; 1061 FW_RI_INIT_P2PTYPE_DISABLED;
1339 ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1340
1341 if (mpa->revision == 2) {
1342 ep->mpa_attr.enhanced_rdma_conn =
1343 mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1344 if (ep->mpa_attr.enhanced_rdma_conn) {
1345 mpa_v2_params = (struct mpa_v2_conn_params *)
1346 (ep->mpa_pkt + sizeof(*mpa));
1347 ep->ird = ntohs(mpa_v2_params->ird) &
1348 MPA_V2_IRD_ORD_MASK;
1349 ep->ord = ntohs(mpa_v2_params->ord) &
1350 MPA_V2_IRD_ORD_MASK;
1351 if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1352 if (peer2peer) {
1353 if (ntohs(mpa_v2_params->ord) &
1354 MPA_V2_RDMA_WRITE_RTR)
1355 ep->mpa_attr.p2p_type =
1356 FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1357 else if (ntohs(mpa_v2_params->ord) &
1358 MPA_V2_RDMA_READ_RTR)
1359 ep->mpa_attr.p2p_type =
1360 FW_RI_INIT_P2PTYPE_READ_REQ;
1361 }
1362 }
1363 } else if (mpa->revision == 1)
1364 if (peer2peer)
1365 ep->mpa_attr.p2p_type = p2p_type;
1366
1367 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, " 1062 PDBG("%s - crc_enabled=%d, recv_marker_enabled=%d, "
1368 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__, 1063 "xmit_marker_enabled=%d, version=%d p2p_type=%d\n", __func__,
1369 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled, 1064 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
@@ -1384,7 +1079,6 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1384 unsigned int dlen = ntohs(hdr->len); 1079 unsigned int dlen = ntohs(hdr->len);
1385 unsigned int tid = GET_TID(hdr); 1080 unsigned int tid = GET_TID(hdr);
1386 struct tid_info *t = dev->rdev.lldi.tids; 1081 struct tid_info *t = dev->rdev.lldi.tids;
1387 __u8 status = hdr->status;
1388 1082
1389 ep = lookup_tid(t, tid); 1083 ep = lookup_tid(t, tid);
1390 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen); 1084 PDBG("%s ep %p tid %u dlen %u\n", __func__, ep, ep->hwtid, dlen);
@@ -1407,9 +1101,9 @@ static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1407 case MPA_REP_SENT: 1101 case MPA_REP_SENT:
1408 break; 1102 break;
1409 default: 1103 default:
1410 pr_err("%s Unexpected streaming data." \ 1104 printk(KERN_ERR MOD "%s Unexpected streaming data."
1411 " ep %p state %d tid %u status %d\n", 1105 " ep %p state %d tid %u\n",
1412 __func__, ep, state_read(&ep->com), ep->hwtid, status); 1106 __func__, ep, state_read(&ep->com), ep->hwtid);
1413 1107
1414 /* 1108 /*
1415 * The ep will timeout and inform the ULP of the failure. 1109 * The ep will timeout and inform the ULP of the failure.
@@ -1429,11 +1123,8 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1429 struct tid_info *t = dev->rdev.lldi.tids; 1123 struct tid_info *t = dev->rdev.lldi.tids;
1430 1124
1431 ep = lookup_tid(t, tid); 1125 ep = lookup_tid(t, tid);
1432 if (!ep) {
1433 printk(KERN_WARNING MOD "Abort rpl to freed endpoint\n");
1434 return 0;
1435 }
1436 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1126 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
1127 BUG_ON(!ep);
1437 mutex_lock(&ep->com.mutex); 1128 mutex_lock(&ep->com.mutex);
1438 switch (ep->com.state) { 1129 switch (ep->com.state) {
1439 case ABORTING: 1130 case ABORTING:
@@ -1452,63 +1143,6 @@ static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1452 return 0; 1143 return 0;
1453} 1144}
1454 1145
1455static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1456{
1457 struct sk_buff *skb;
1458 struct fw_ofld_connection_wr *req;
1459 unsigned int mtu_idx;
1460 int wscale;
1461
1462 skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1463 req = (struct fw_ofld_connection_wr *)__skb_put(skb, sizeof(*req));
1464 memset(req, 0, sizeof(*req));
1465 req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR));
1466 req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
1467 req->le.filter = cpu_to_be32(select_ntuple(ep->com.dev, ep->dst,
1468 ep->l2t));
1469 req->le.lport = ep->com.local_addr.sin_port;
1470 req->le.pport = ep->com.remote_addr.sin_port;
1471 req->le.u.ipv4.lip = ep->com.local_addr.sin_addr.s_addr;
1472 req->le.u.ipv4.pip = ep->com.remote_addr.sin_addr.s_addr;
1473 req->tcb.t_state_to_astid =
1474 htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_SENT) |
1475 V_FW_OFLD_CONNECTION_WR_ASTID(atid));
1476 req->tcb.cplrxdataack_cplpassacceptrpl =
1477 htons(F_FW_OFLD_CONNECTION_WR_CPLRXDATAACK);
1478 req->tcb.tx_max = jiffies;
1479 req->tcb.rcv_adv = htons(1);
1480 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
1481 wscale = compute_wscale(rcv_win);
1482 req->tcb.opt0 = TCAM_BYPASS(1) |
1483 (nocong ? NO_CONG(1) : 0) |
1484 KEEP_ALIVE(1) |
1485 DELACK(1) |
1486 WND_SCALE(wscale) |
1487 MSS_IDX(mtu_idx) |
1488 L2T_IDX(ep->l2t->idx) |
1489 TX_CHAN(ep->tx_chan) |
1490 SMAC_SEL(ep->smac_idx) |
1491 DSCP(ep->tos) |
1492 ULP_MODE(ULP_MODE_TCPDDP) |
1493 RCV_BUFSIZ(rcv_win >> 10);
1494 req->tcb.opt2 = PACE(1) |
1495 TX_QUEUE(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
1496 RX_CHANNEL(0) |
1497 CCTRL_ECN(enable_ecn) |
1498 RSS_QUEUE_VALID | RSS_QUEUE(ep->rss_qid);
1499 if (enable_tcp_timestamps)
1500 req->tcb.opt2 |= TSTAMPS_EN(1);
1501 if (enable_tcp_sack)
1502 req->tcb.opt2 |= SACK_EN(1);
1503 if (wscale && enable_tcp_window_scaling)
1504 req->tcb.opt2 |= WND_SCALE_EN(1);
1505 req->tcb.opt0 = cpu_to_be64(req->tcb.opt0);
1506 req->tcb.opt2 = cpu_to_be32(req->tcb.opt2);
1507 set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
1508 set_bit(ACT_OFLD_CONN, &ep->com.history);
1509 c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1510}
1511
1512/* 1146/*
1513 * Return whether a failed active open has allocated a TID 1147 * Return whether a failed active open has allocated a TID
1514 */ 1148 */
@@ -1518,111 +1152,6 @@ static inline int act_open_has_tid(int status)
1518 status != CPL_ERR_ARP_MISS; 1152 status != CPL_ERR_ARP_MISS;
1519} 1153}
1520 1154
1521#define ACT_OPEN_RETRY_COUNT 2
1522
1523static int c4iw_reconnect(struct c4iw_ep *ep)
1524{
1525 int err = 0;
1526 struct rtable *rt;
1527 struct port_info *pi;
1528 struct net_device *pdev;
1529 int step;
1530 struct neighbour *neigh;
1531
1532 PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id);
1533 init_timer(&ep->timer);
1534
1535 /*
1536 * Allocate an active TID to initiate a TCP connection.
1537 */
1538 ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
1539 if (ep->atid == -1) {
1540 pr_err("%s - cannot alloc atid.\n", __func__);
1541 err = -ENOMEM;
1542 goto fail2;
1543 }
1544 insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
1545
1546 /* find a route */
1547 rt = find_route(ep->com.dev,
1548 ep->com.cm_id->local_addr.sin_addr.s_addr,
1549 ep->com.cm_id->remote_addr.sin_addr.s_addr,
1550 ep->com.cm_id->local_addr.sin_port,
1551 ep->com.cm_id->remote_addr.sin_port, 0);
1552 if (!rt) {
1553 pr_err("%s - cannot find route.\n", __func__);
1554 err = -EHOSTUNREACH;
1555 goto fail3;
1556 }
1557 ep->dst = &rt->dst;
1558
1559 neigh = dst_neigh_lookup(ep->dst,
1560 &ep->com.cm_id->remote_addr.sin_addr.s_addr);
1561 /* get a l2t entry */
1562 if (neigh->dev->flags & IFF_LOOPBACK) {
1563 PDBG("%s LOOPBACK\n", __func__);
1564 pdev = ip_dev_find(&init_net,
1565 ep->com.cm_id->remote_addr.sin_addr.s_addr);
1566 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1567 neigh, pdev, 0);
1568 pi = (struct port_info *)netdev_priv(pdev);
1569 ep->mtu = pdev->mtu;
1570 ep->tx_chan = cxgb4_port_chan(pdev);
1571 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1572 dev_put(pdev);
1573 } else {
1574 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1575 neigh, neigh->dev, 0);
1576 pi = (struct port_info *)netdev_priv(neigh->dev);
1577 ep->mtu = dst_mtu(ep->dst);
1578 ep->tx_chan = cxgb4_port_chan(neigh->dev);
1579 ep->smac_idx = (cxgb4_port_viid(neigh->dev) &
1580 0x7F) << 1;
1581 }
1582
1583 step = ep->com.dev->rdev.lldi.ntxq / ep->com.dev->rdev.lldi.nchan;
1584 ep->txq_idx = pi->port_id * step;
1585 ep->ctrlq_idx = pi->port_id;
1586 step = ep->com.dev->rdev.lldi.nrxq / ep->com.dev->rdev.lldi.nchan;
1587 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[pi->port_id * step];
1588
1589 if (!ep->l2t) {
1590 pr_err("%s - cannot alloc l2e.\n", __func__);
1591 err = -ENOMEM;
1592 goto fail4;
1593 }
1594
1595 PDBG("%s txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
1596 __func__, ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
1597 ep->l2t->idx);
1598
1599 state_set(&ep->com, CONNECTING);
1600 ep->tos = 0;
1601
1602 /* send connect request to rnic */
1603 err = send_connect(ep);
1604 if (!err)
1605 goto out;
1606
1607 cxgb4_l2t_release(ep->l2t);
1608fail4:
1609 dst_release(ep->dst);
1610fail3:
1611 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
1612 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
1613fail2:
1614 /*
1615 * remember to send notification to upper layer.
1616 * We are in here so the upper layer is not aware that this is
1617 * re-connect attempt and so, upper layer is still waiting for
1618 * response of 1st connect request.
1619 */
1620 connect_reply_upcall(ep, -ECONNRESET);
1621 c4iw_put_ep(&ep->com);
1622out:
1623 return err;
1624}
1625
1626static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) 1155static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1627{ 1156{
1628 struct c4iw_ep *ep; 1157 struct c4iw_ep *ep;
@@ -1643,56 +1172,12 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1643 return 0; 1172 return 0;
1644 } 1173 }
1645 1174
1646 set_bit(ACT_OPEN_RPL, &ep->com.history);
1647
1648 /*
1649 * Log interesting failures.
1650 */
1651 switch (status) {
1652 case CPL_ERR_CONN_RESET:
1653 case CPL_ERR_CONN_TIMEDOUT:
1654 break;
1655 case CPL_ERR_TCAM_FULL:
1656 if (dev->rdev.lldi.enable_fw_ofld_conn) {
1657 mutex_lock(&dev->rdev.stats.lock);
1658 dev->rdev.stats.tcam_full++;
1659 mutex_unlock(&dev->rdev.stats.lock);
1660 send_fw_act_open_req(ep,
1661 GET_TID_TID(GET_AOPEN_ATID(
1662 ntohl(rpl->atid_status))));
1663 return 0;
1664 }
1665 break;
1666 case CPL_ERR_CONN_EXIST:
1667 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
1668 set_bit(ACT_RETRY_INUSE, &ep->com.history);
1669 remove_handle(ep->com.dev, &ep->com.dev->atid_idr,
1670 atid);
1671 cxgb4_free_atid(t, atid);
1672 dst_release(ep->dst);
1673 cxgb4_l2t_release(ep->l2t);
1674 c4iw_reconnect(ep);
1675 return 0;
1676 }
1677 break;
1678 default:
1679 printk(KERN_INFO MOD "Active open failure - "
1680 "atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
1681 atid, status, status2errno(status),
1682 &ep->com.local_addr.sin_addr.s_addr,
1683 ntohs(ep->com.local_addr.sin_port),
1684 &ep->com.remote_addr.sin_addr.s_addr,
1685 ntohs(ep->com.remote_addr.sin_port));
1686 break;
1687 }
1688
1689 connect_reply_upcall(ep, status2errno(status)); 1175 connect_reply_upcall(ep, status2errno(status));
1690 state_set(&ep->com, DEAD); 1176 state_set(&ep->com, DEAD);
1691 1177
1692 if (status && act_open_has_tid(status)) 1178 if (status && act_open_has_tid(status))
1693 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl)); 1179 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl));
1694 1180
1695 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, atid);
1696 cxgb4_free_atid(t, atid); 1181 cxgb4_free_atid(t, atid);
1697 dst_release(ep->dst); 1182 dst_release(ep->dst);
1698 cxgb4_l2t_release(ep->l2t); 1183 cxgb4_l2t_release(ep->l2t);
@@ -1709,14 +1194,13 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1709 struct c4iw_listen_ep *ep = lookup_stid(t, stid); 1194 struct c4iw_listen_ep *ep = lookup_stid(t, stid);
1710 1195
1711 if (!ep) { 1196 if (!ep) {
1712 PDBG("%s stid %d lookup failure!\n", __func__, stid); 1197 printk(KERN_ERR MOD "stid %d lookup failure!\n", stid);
1713 goto out; 1198 return 0;
1714 } 1199 }
1715 PDBG("%s ep %p status %d error %d\n", __func__, ep, 1200 PDBG("%s ep %p status %d error %d\n", __func__, ep,
1716 rpl->status, status2errno(rpl->status)); 1201 rpl->status, status2errno(rpl->status));
1717 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status)); 1202 c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
1718 1203
1719out:
1720 return 0; 1204 return 0;
1721} 1205}
1722 1206
@@ -1768,15 +1252,14 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1768 skb_get(skb); 1252 skb_get(skb);
1769 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx); 1253 cxgb4_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx);
1770 wscale = compute_wscale(rcv_win); 1254 wscale = compute_wscale(rcv_win);
1771 opt0 = (nocong ? NO_CONG(1) : 0) | 1255 opt0 = KEEP_ALIVE(1) |
1772 KEEP_ALIVE(1) |
1773 DELACK(1) | 1256 DELACK(1) |
1774 WND_SCALE(wscale) | 1257 WND_SCALE(wscale) |
1775 MSS_IDX(mtu_idx) | 1258 MSS_IDX(mtu_idx) |
1776 L2T_IDX(ep->l2t->idx) | 1259 L2T_IDX(ep->l2t->idx) |
1777 TX_CHAN(ep->tx_chan) | 1260 TX_CHAN(ep->tx_chan) |
1778 SMAC_SEL(ep->smac_idx) | 1261 SMAC_SEL(ep->smac_idx) |
1779 DSCP(ep->tos >> 2) | 1262 DSCP(ep->tos) |
1780 ULP_MODE(ULP_MODE_TCPDDP) | 1263 ULP_MODE(ULP_MODE_TCPDDP) |
1781 RCV_BUFSIZ(rcv_win>>10); 1264 RCV_BUFSIZ(rcv_win>>10);
1782 opt2 = RX_CHANNEL(0) | 1265 opt2 = RX_CHANNEL(0) |
@@ -1788,15 +1271,6 @@ static void accept_cr(struct c4iw_ep *ep, __be32 peer_ip, struct sk_buff *skb,
1788 opt2 |= SACK_EN(1); 1271 opt2 |= SACK_EN(1);
1789 if (wscale && enable_tcp_window_scaling) 1272 if (wscale && enable_tcp_window_scaling)
1790 opt2 |= WND_SCALE_EN(1); 1273 opt2 |= WND_SCALE_EN(1);
1791 if (enable_ecn) {
1792 const struct tcphdr *tcph;
1793 u32 hlen = ntohl(req->hdr_len);
1794
1795 tcph = (const void *)(req + 1) + G_ETH_HDR_LEN(hlen) +
1796 G_IP_HDR_LEN(hlen);
1797 if (tcph->ece && tcph->cwr)
1798 opt2 |= CCTRL_ECN(1);
1799 }
1800 1274
1801 rpl = cplhdr(skb); 1275 rpl = cplhdr(skb);
1802 INIT_TP_WR(rpl, ep->hwtid); 1276 INIT_TP_WR(rpl, ep->hwtid);
@@ -1844,98 +1318,30 @@ static void get_4tuple(struct cpl_pass_accept_req *req,
1844 return; 1318 return;
1845} 1319}
1846 1320
1847static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst,
1848 struct c4iw_dev *cdev, bool clear_mpa_v1)
1849{
1850 struct neighbour *n;
1851 int err, step;
1852
1853 n = dst_neigh_lookup(dst, &peer_ip);
1854 if (!n)
1855 return -ENODEV;
1856
1857 rcu_read_lock();
1858 err = -ENOMEM;
1859 if (n->dev->flags & IFF_LOOPBACK) {
1860 struct net_device *pdev;
1861
1862 pdev = ip_dev_find(&init_net, peer_ip);
1863 if (!pdev) {
1864 err = -ENODEV;
1865 goto out;
1866 }
1867 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1868 n, pdev, 0);
1869 if (!ep->l2t)
1870 goto out;
1871 ep->mtu = pdev->mtu;
1872 ep->tx_chan = cxgb4_port_chan(pdev);
1873 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1874 step = cdev->rdev.lldi.ntxq /
1875 cdev->rdev.lldi.nchan;
1876 ep->txq_idx = cxgb4_port_idx(pdev) * step;
1877 step = cdev->rdev.lldi.nrxq /
1878 cdev->rdev.lldi.nchan;
1879 ep->ctrlq_idx = cxgb4_port_idx(pdev);
1880 ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1881 cxgb4_port_idx(pdev) * step];
1882 dev_put(pdev);
1883 } else {
1884 ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
1885 n, n->dev, 0);
1886 if (!ep->l2t)
1887 goto out;
1888 ep->mtu = dst_mtu(dst);
1889 ep->tx_chan = cxgb4_port_chan(n->dev);
1890 ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1;
1891 step = cdev->rdev.lldi.ntxq /
1892 cdev->rdev.lldi.nchan;
1893 ep->txq_idx = cxgb4_port_idx(n->dev) * step;
1894 ep->ctrlq_idx = cxgb4_port_idx(n->dev);
1895 step = cdev->rdev.lldi.nrxq /
1896 cdev->rdev.lldi.nchan;
1897 ep->rss_qid = cdev->rdev.lldi.rxq_ids[
1898 cxgb4_port_idx(n->dev) * step];
1899
1900 if (clear_mpa_v1) {
1901 ep->retry_with_mpa_v1 = 0;
1902 ep->tried_with_mpa_v1 = 0;
1903 }
1904 }
1905 err = 0;
1906out:
1907 rcu_read_unlock();
1908
1909 neigh_release(n);
1910
1911 return err;
1912}
1913
1914static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) 1321static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1915{ 1322{
1916 struct c4iw_ep *child_ep = NULL, *parent_ep; 1323 struct c4iw_ep *child_ep, *parent_ep;
1917 struct cpl_pass_accept_req *req = cplhdr(skb); 1324 struct cpl_pass_accept_req *req = cplhdr(skb);
1918 unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); 1325 unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid));
1919 struct tid_info *t = dev->rdev.lldi.tids; 1326 struct tid_info *t = dev->rdev.lldi.tids;
1920 unsigned int hwtid = GET_TID(req); 1327 unsigned int hwtid = GET_TID(req);
1328 struct neighbour *neigh;
1921 struct dst_entry *dst; 1329 struct dst_entry *dst;
1330 struct l2t_entry *l2t;
1922 struct rtable *rt; 1331 struct rtable *rt;
1923 __be32 local_ip, peer_ip = 0; 1332 __be32 local_ip, peer_ip;
1924 __be16 local_port, peer_port; 1333 __be16 local_port, peer_port;
1925 int err; 1334 struct net_device *pdev;
1926 u16 peer_mss = ntohs(req->tcpopt.mss); 1335 u32 tx_chan, smac_idx;
1336 u16 rss_qid;
1337 u32 mtu;
1338 int step;
1339 int txq_idx, ctrlq_idx;
1927 1340
1928 parent_ep = lookup_stid(t, stid); 1341 parent_ep = lookup_stid(t, stid);
1929 if (!parent_ep) { 1342 PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid);
1930 PDBG("%s connect request on invalid stid %d\n", __func__, stid);
1931 goto reject;
1932 }
1933 get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
1934 1343
1935 PDBG("%s parent ep %p hwtid %u laddr 0x%x raddr 0x%x lport %d " \ 1344 get_4tuple(req, &local_ip, &peer_ip, &local_port, &peer_port);
1936 "rport %d peer_mss %d\n", __func__, parent_ep, hwtid,
1937 ntohl(local_ip), ntohl(peer_ip), ntohs(local_port),
1938 ntohs(peer_port), peer_mss);
1939 1345
1940 if (state_read(&parent_ep->com) != LISTEN) { 1346 if (state_read(&parent_ep->com) != LISTEN) {
1941 printk(KERN_ERR "%s - listening ep not in LISTEN\n", 1347 printk(KERN_ERR "%s - listening ep not in LISTEN\n",
@@ -1952,27 +1358,49 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1952 goto reject; 1358 goto reject;
1953 } 1359 }
1954 dst = &rt->dst; 1360 dst = &rt->dst;
1955 1361 rcu_read_lock();
1956 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); 1362 neigh = dst_get_neighbour(dst);
1957 if (!child_ep) { 1363 if (neigh->dev->flags & IFF_LOOPBACK) {
1958 printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", 1364 pdev = ip_dev_find(&init_net, peer_ip);
1365 BUG_ON(!pdev);
1366 l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0);
1367 mtu = pdev->mtu;
1368 tx_chan = cxgb4_port_chan(pdev);
1369 smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1370 step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan;
1371 txq_idx = cxgb4_port_idx(pdev) * step;
1372 ctrlq_idx = cxgb4_port_idx(pdev);
1373 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
1374 rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step];
1375 dev_put(pdev);
1376 } else {
1377 l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, neigh->dev, 0);
1378 mtu = dst_mtu(dst);
1379 tx_chan = cxgb4_port_chan(neigh->dev);
1380 smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
1381 step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan;
1382 txq_idx = cxgb4_port_idx(neigh->dev) * step;
1383 ctrlq_idx = cxgb4_port_idx(neigh->dev);
1384 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
1385 rss_qid = dev->rdev.lldi.rxq_ids[
1386 cxgb4_port_idx(neigh->dev) * step];
1387 }
1388 rcu_read_unlock();
1389 if (!l2t) {
1390 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
1959 __func__); 1391 __func__);
1960 dst_release(dst); 1392 dst_release(dst);
1961 goto reject; 1393 goto reject;
1962 } 1394 }
1963 1395
1964 err = import_ep(child_ep, peer_ip, dst, dev, false); 1396 child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
1965 if (err) { 1397 if (!child_ep) {
1966 printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", 1398 printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n",
1967 __func__); 1399 __func__);
1400 cxgb4_l2t_release(l2t);
1968 dst_release(dst); 1401 dst_release(dst);
1969 kfree(child_ep);
1970 goto reject; 1402 goto reject;
1971 } 1403 }
1972
1973 if (peer_mss && child_ep->mtu > (peer_mss + 40))
1974 child_ep->mtu = peer_mss + 40;
1975
1976 state_set(&child_ep->com, CONNECTING); 1404 state_set(&child_ep->com, CONNECTING);
1977 child_ep->com.dev = dev; 1405 child_ep->com.dev = dev;
1978 child_ep->com.cm_id = NULL; 1406 child_ep->com.cm_id = NULL;
@@ -1985,16 +1413,22 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
1985 c4iw_get_ep(&parent_ep->com); 1413 c4iw_get_ep(&parent_ep->com);
1986 child_ep->parent_ep = parent_ep; 1414 child_ep->parent_ep = parent_ep;
1987 child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid)); 1415 child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid));
1416 child_ep->l2t = l2t;
1988 child_ep->dst = dst; 1417 child_ep->dst = dst;
1989 child_ep->hwtid = hwtid; 1418 child_ep->hwtid = hwtid;
1419 child_ep->tx_chan = tx_chan;
1420 child_ep->smac_idx = smac_idx;
1421 child_ep->rss_qid = rss_qid;
1422 child_ep->mtu = mtu;
1423 child_ep->txq_idx = txq_idx;
1424 child_ep->ctrlq_idx = ctrlq_idx;
1990 1425
1991 PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, 1426 PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__,
1992 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); 1427 tx_chan, smac_idx, rss_qid);
1993 1428
1994 init_timer(&child_ep->timer); 1429 init_timer(&child_ep->timer);
1995 cxgb4_insert_tid(t, child_ep, hwtid); 1430 cxgb4_insert_tid(t, child_ep, hwtid);
1996 accept_cr(child_ep, peer_ip, skb, req); 1431 accept_cr(child_ep, peer_ip, skb, req);
1997 set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
1998 goto out; 1432 goto out;
1999reject: 1433reject:
2000 reject_cr(dev, hwtid, peer_ip, skb); 1434 reject_cr(dev, hwtid, peer_ip, skb);
@@ -2014,17 +1448,12 @@ static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2014 ep->snd_seq = be32_to_cpu(req->snd_isn); 1448 ep->snd_seq = be32_to_cpu(req->snd_isn);
2015 ep->rcv_seq = be32_to_cpu(req->rcv_isn); 1449 ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2016 1450
2017 PDBG("%s ep %p hwtid %u tcp_opt 0x%02x\n", __func__, ep, tid,
2018 ntohs(req->tcp_opt));
2019
2020 set_emss(ep, ntohs(req->tcp_opt)); 1451 set_emss(ep, ntohs(req->tcp_opt));
2021 insert_handle(dev, &dev->hwtid_idr, ep, ep->hwtid);
2022 1452
2023 dst_confirm(ep->dst); 1453 dst_confirm(ep->dst);
2024 state_set(&ep->com, MPA_REQ_WAIT); 1454 state_set(&ep->com, MPA_REQ_WAIT);
2025 start_ep_timer(ep); 1455 start_ep_timer(ep);
2026 send_flowc(ep, skb); 1456 send_flowc(ep, skb);
2027 set_bit(PASS_ESTAB, &ep->com.history);
2028 1457
2029 return 0; 1458 return 0;
2030} 1459}
@@ -2044,7 +1473,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2044 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); 1473 PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
2045 dst_confirm(ep->dst); 1474 dst_confirm(ep->dst);
2046 1475
2047 set_bit(PEER_CLOSE, &ep->com.history);
2048 mutex_lock(&ep->com.mutex); 1476 mutex_lock(&ep->com.mutex);
2049 switch (ep->com.state) { 1477 switch (ep->com.state) {
2050 case MPA_REQ_WAIT: 1478 case MPA_REQ_WAIT:
@@ -2144,15 +1572,11 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2144 } 1572 }
2145 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid, 1573 PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
2146 ep->com.state); 1574 ep->com.state);
2147 set_bit(PEER_ABORT, &ep->com.history);
2148 1575
2149 /* 1576 /*
2150 * Wake up any threads in rdma_init() or rdma_fini(). 1577 * Wake up any threads in rdma_init() or rdma_fini().
2151 * However, this is not needed if com state is just
2152 * MPA_REQ_SENT
2153 */ 1578 */
2154 if (ep->com.state != MPA_REQ_SENT) 1579 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2155 c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
2156 1580
2157 mutex_lock(&ep->com.mutex); 1581 mutex_lock(&ep->com.mutex);
2158 switch (ep->com.state) { 1582 switch (ep->com.state) {
@@ -2163,21 +1587,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2163 break; 1587 break;
2164 case MPA_REQ_SENT: 1588 case MPA_REQ_SENT:
2165 stop_ep_timer(ep); 1589 stop_ep_timer(ep);
2166 if (mpa_rev == 2 && ep->tried_with_mpa_v1) 1590 connect_reply_upcall(ep, -ECONNRESET);
2167 connect_reply_upcall(ep, -ECONNRESET);
2168 else {
2169 /*
2170 * we just don't send notification upwards because we
2171 * want to retry with mpa_v1 without upper layers even
2172 * knowing it.
2173 *
2174 * do some housekeeping so as to re-initiate the
2175 * connection
2176 */
2177 PDBG("%s: mpa_rev=%d. Retrying with mpav1\n", __func__,
2178 mpa_rev);
2179 ep->retry_with_mpa_v1 = 1;
2180 }
2181 break; 1591 break;
2182 case MPA_REP_SENT: 1592 case MPA_REP_SENT:
2183 break; 1593 break;
@@ -2213,9 +1623,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2213 dst_confirm(ep->dst); 1623 dst_confirm(ep->dst);
2214 if (ep->com.state != ABORTING) { 1624 if (ep->com.state != ABORTING) {
2215 __state_set(&ep->com, DEAD); 1625 __state_set(&ep->com, DEAD);
2216 /* we don't release if we want to retry with mpa_v1 */ 1626 release = 1;
2217 if (!ep->retry_with_mpa_v1)
2218 release = 1;
2219 } 1627 }
2220 mutex_unlock(&ep->com.mutex); 1628 mutex_unlock(&ep->com.mutex);
2221 1629
@@ -2235,15 +1643,6 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2235out: 1643out:
2236 if (release) 1644 if (release)
2237 release_ep_resources(ep); 1645 release_ep_resources(ep);
2238
2239 /* retry with mpa-v1 */
2240 if (ep && ep->retry_with_mpa_v1) {
2241 cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid);
2242 dst_release(ep->dst);
2243 cxgb4_l2t_release(ep->l2t);
2244 c4iw_reconnect(ep);
2245 }
2246
2247 return 0; 1646 return 0;
2248} 1647}
2249 1648
@@ -2359,7 +1758,6 @@ int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
2359 c4iw_put_ep(&ep->com); 1758 c4iw_put_ep(&ep->com);
2360 return -ECONNRESET; 1759 return -ECONNRESET;
2361 } 1760 }
2362 set_bit(ULP_REJECT, &ep->com.history);
2363 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1761 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2364 if (mpa_rev == 0) 1762 if (mpa_rev == 0)
2365 abort_connection(ep, NULL, GFP_KERNEL); 1763 abort_connection(ep, NULL, GFP_KERNEL);
@@ -2389,7 +1787,6 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2389 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD); 1787 BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
2390 BUG_ON(!qp); 1788 BUG_ON(!qp);
2391 1789
2392 set_bit(ULP_ACCEPT, &ep->com.history);
2393 if ((conn_param->ord > c4iw_max_read_depth) || 1790 if ((conn_param->ord > c4iw_max_read_depth) ||
2394 (conn_param->ird > c4iw_max_read_depth)) { 1791 (conn_param->ird > c4iw_max_read_depth)) {
2395 abort_connection(ep, NULL, GFP_KERNEL); 1792 abort_connection(ep, NULL, GFP_KERNEL);
@@ -2397,40 +1794,18 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2397 goto err; 1794 goto err;
2398 } 1795 }
2399 1796
2400 if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { 1797 cm_id->add_ref(cm_id);
2401 if (conn_param->ord > ep->ird) { 1798 ep->com.cm_id = cm_id;
2402 ep->ird = conn_param->ird; 1799 ep->com.qp = qp;
2403 ep->ord = conn_param->ord;
2404 send_mpa_reject(ep, conn_param->private_data,
2405 conn_param->private_data_len);
2406 abort_connection(ep, NULL, GFP_KERNEL);
2407 err = -ENOMEM;
2408 goto err;
2409 }
2410 if (conn_param->ird > ep->ord) {
2411 if (!ep->ord)
2412 conn_param->ird = 1;
2413 else {
2414 abort_connection(ep, NULL, GFP_KERNEL);
2415 err = -ENOMEM;
2416 goto err;
2417 }
2418 }
2419 1800
2420 }
2421 ep->ird = conn_param->ird; 1801 ep->ird = conn_param->ird;
2422 ep->ord = conn_param->ord; 1802 ep->ord = conn_param->ord;
2423 1803
2424 if (ep->mpa_attr.version != 2) 1804 if (peer2peer && ep->ird == 0)
2425 if (peer2peer && ep->ird == 0) 1805 ep->ird = 1;
2426 ep->ird = 1;
2427 1806
2428 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord); 1807 PDBG("%s %d ird %d ord %d\n", __func__, __LINE__, ep->ird, ep->ord);
2429 1808
2430 cm_id->add_ref(cm_id);
2431 ep->com.cm_id = cm_id;
2432 ep->com.qp = qp;
2433
2434 /* bind QP to EP and move to RTS */ 1809 /* bind QP to EP and move to RTS */
2435 attrs.mpa_attr = ep->mpa_attr; 1810 attrs.mpa_attr = ep->mpa_attr;
2436 attrs.max_ird = ep->ird; 1811 attrs.max_ird = ep->ird;
@@ -2469,10 +1844,13 @@ err:
2469 1844
2470int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) 1845int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2471{ 1846{
1847 int err = 0;
2472 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 1848 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2473 struct c4iw_ep *ep; 1849 struct c4iw_ep *ep;
2474 struct rtable *rt; 1850 struct rtable *rt;
2475 int err = 0; 1851 struct net_device *pdev;
1852 struct neighbour *neigh;
1853 int step;
2476 1854
2477 if ((conn_param->ord > c4iw_max_read_depth) || 1855 if ((conn_param->ord > c4iw_max_read_depth) ||
2478 (conn_param->ird > c4iw_max_read_depth)) { 1856 (conn_param->ird > c4iw_max_read_depth)) {
@@ -2513,7 +1891,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2513 err = -ENOMEM; 1891 err = -ENOMEM;
2514 goto fail2; 1892 goto fail2;
2515 } 1893 }
2516 insert_handle(dev, &dev->atid_idr, ep, ep->atid);
2517 1894
2518 PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__, 1895 PDBG("%s saddr 0x%x sport 0x%x raddr 0x%x rport 0x%x\n", __func__,
2519 ntohl(cm_id->local_addr.sin_addr.s_addr), 1896 ntohl(cm_id->local_addr.sin_addr.s_addr),
@@ -2534,10 +1911,47 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2534 } 1911 }
2535 ep->dst = &rt->dst; 1912 ep->dst = &rt->dst;
2536 1913
2537 err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr, 1914 rcu_read_lock();
2538 ep->dst, ep->com.dev, true); 1915 neigh = dst_get_neighbour(ep->dst);
2539 if (err) { 1916
1917 /* get a l2t entry */
1918 if (neigh->dev->flags & IFF_LOOPBACK) {
1919 PDBG("%s LOOPBACK\n", __func__);
1920 pdev = ip_dev_find(&init_net,
1921 cm_id->remote_addr.sin_addr.s_addr);
1922 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1923 neigh, pdev, 0);
1924 ep->mtu = pdev->mtu;
1925 ep->tx_chan = cxgb4_port_chan(pdev);
1926 ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1;
1927 step = ep->com.dev->rdev.lldi.ntxq /
1928 ep->com.dev->rdev.lldi.nchan;
1929 ep->txq_idx = cxgb4_port_idx(pdev) * step;
1930 step = ep->com.dev->rdev.lldi.nrxq /
1931 ep->com.dev->rdev.lldi.nchan;
1932 ep->ctrlq_idx = cxgb4_port_idx(pdev);
1933 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1934 cxgb4_port_idx(pdev) * step];
1935 dev_put(pdev);
1936 } else {
1937 ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t,
1938 neigh, neigh->dev, 0);
1939 ep->mtu = dst_mtu(ep->dst);
1940 ep->tx_chan = cxgb4_port_chan(neigh->dev);
1941 ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1;
1942 step = ep->com.dev->rdev.lldi.ntxq /
1943 ep->com.dev->rdev.lldi.nchan;
1944 ep->txq_idx = cxgb4_port_idx(neigh->dev) * step;
1945 ep->ctrlq_idx = cxgb4_port_idx(neigh->dev);
1946 step = ep->com.dev->rdev.lldi.nrxq /
1947 ep->com.dev->rdev.lldi.nchan;
1948 ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[
1949 cxgb4_port_idx(neigh->dev) * step];
1950 }
1951 rcu_read_unlock();
1952 if (!ep->l2t) {
2540 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); 1953 printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
1954 err = -ENOMEM;
2541 goto fail4; 1955 goto fail4;
2542 } 1956 }
2543 1957
@@ -2559,7 +1973,6 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2559fail4: 1973fail4:
2560 dst_release(ep->dst); 1974 dst_release(ep->dst);
2561fail3: 1975fail3:
2562 remove_handle(ep->com.dev, &ep->com.dev->atid_idr, ep->atid);
2563 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid); 1976 cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2564fail2: 1977fail2:
2565 cm_id->rem_ref(cm_id); 1978 cm_id->rem_ref(cm_id);
@@ -2574,6 +1987,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2574 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); 1987 struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
2575 struct c4iw_listen_ep *ep; 1988 struct c4iw_listen_ep *ep;
2576 1989
1990
2577 might_sleep(); 1991 might_sleep();
2578 1992
2579 ep = alloc_ep(sizeof(*ep), GFP_KERNEL); 1993 ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
@@ -2592,54 +2006,30 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
2592 /* 2006 /*
2593 * Allocate a server TID. 2007 * Allocate a server TID.
2594 */ 2008 */
2595 if (dev->rdev.lldi.enable_fw_ofld_conn) 2009 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
2596 ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids, PF_INET, ep);
2597 else
2598 ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids, PF_INET, ep);
2599
2600 if (ep->stid == -1) { 2010 if (ep->stid == -1) {
2601 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__); 2011 printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
2602 err = -ENOMEM; 2012 err = -ENOMEM;
2603 goto fail2; 2013 goto fail2;
2604 } 2014 }
2605 insert_handle(dev, &dev->stid_idr, ep, ep->stid); 2015
2606 state_set(&ep->com, LISTEN); 2016 state_set(&ep->com, LISTEN);
2607 if (dev->rdev.lldi.enable_fw_ofld_conn) { 2017 c4iw_init_wr_wait(&ep->com.wr_wait);
2608 do { 2018 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0], ep->stid,
2609 err = cxgb4_create_server_filter( 2019 ep->com.local_addr.sin_addr.s_addr,
2610 ep->com.dev->rdev.lldi.ports[0], ep->stid, 2020 ep->com.local_addr.sin_port,
2611 ep->com.local_addr.sin_addr.s_addr, 2021 ep->com.dev->rdev.lldi.rxq_ids[0]);
2612 ep->com.local_addr.sin_port, 2022 if (err)
2613 0, 2023 goto fail3;
2614 ep->com.dev->rdev.lldi.rxq_ids[0], 2024
2615 0, 2025 /* wait for pass_open_rpl */
2616 0); 2026 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2617 if (err == -EBUSY) { 2027 __func__);
2618 set_current_state(TASK_UNINTERRUPTIBLE);
2619 schedule_timeout(usecs_to_jiffies(100));
2620 }
2621 } while (err == -EBUSY);
2622 } else {
2623 c4iw_init_wr_wait(&ep->com.wr_wait);
2624 err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
2625 ep->stid, ep->com.local_addr.sin_addr.s_addr,
2626 ep->com.local_addr.sin_port,
2627 0,
2628 ep->com.dev->rdev.lldi.rxq_ids[0]);
2629 if (!err)
2630 err = c4iw_wait_for_reply(&ep->com.dev->rdev,
2631 &ep->com.wr_wait,
2632 0, 0, __func__);
2633 }
2634 if (!err) { 2028 if (!err) {
2635 cm_id->provider_data = ep; 2029 cm_id->provider_data = ep;
2636 goto out; 2030 goto out;
2637 } 2031 }
2638 pr_err("%s cxgb4_create_server/filter failed err %d " \ 2032fail3:
2639 "stid %d laddr %08x lport %d\n", \
2640 __func__, err, ep->stid,
2641 ntohl(ep->com.local_addr.sin_addr.s_addr),
2642 ntohs(ep->com.local_addr.sin_port));
2643 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); 2033 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2644fail2: 2034fail2:
2645 cm_id->rem_ref(cm_id); 2035 cm_id->rem_ref(cm_id);
@@ -2658,19 +2048,12 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
2658 2048
2659 might_sleep(); 2049 might_sleep();
2660 state_set(&ep->com, DEAD); 2050 state_set(&ep->com, DEAD);
2661 if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn) { 2051 c4iw_init_wr_wait(&ep->com.wr_wait);
2662 err = cxgb4_remove_server_filter( 2052 err = listen_stop(ep);
2663 ep->com.dev->rdev.lldi.ports[0], ep->stid, 2053 if (err)
2664 ep->com.dev->rdev.lldi.rxq_ids[0], 0); 2054 goto done;
2665 } else { 2055 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait, 0, 0,
2666 c4iw_init_wr_wait(&ep->com.wr_wait); 2056 __func__);
2667 err = listen_stop(ep);
2668 if (err)
2669 goto done;
2670 err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
2671 0, 0, __func__);
2672 }
2673 remove_handle(ep->com.dev, &ep->com.dev->stid_idr, ep->stid);
2674 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET); 2057 cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid, PF_INET);
2675done: 2058done:
2676 cm_id->rem_ref(cm_id); 2059 cm_id->rem_ref(cm_id);
@@ -2734,13 +2117,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2734 2117
2735 if (close) { 2118 if (close) {
2736 if (abrupt) { 2119 if (abrupt) {
2737 set_bit(EP_DISC_ABORT, &ep->com.history);
2738 close_complete_upcall(ep); 2120 close_complete_upcall(ep);
2739 ret = send_abort(ep, NULL, gfp); 2121 ret = send_abort(ep, NULL, gfp);
2740 } else { 2122 } else
2741 set_bit(EP_DISC_CLOSE, &ep->com.history);
2742 ret = send_halfclose(ep, gfp); 2123 ret = send_halfclose(ep, gfp);
2743 }
2744 if (ret) 2124 if (ret)
2745 fatal = 1; 2125 fatal = 1;
2746 } 2126 }
@@ -2750,323 +2130,10 @@ int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
2750 return ret; 2130 return ret;
2751} 2131}
2752 2132
2753static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb, 2133static int async_event(struct c4iw_dev *dev, struct sk_buff *skb)
2754 struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
2755{
2756 struct c4iw_ep *ep;
2757 int atid = be32_to_cpu(req->tid);
2758
2759 ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids, req->tid);
2760 if (!ep)
2761 return;
2762
2763 switch (req->retval) {
2764 case FW_ENOMEM:
2765 set_bit(ACT_RETRY_NOMEM, &ep->com.history);
2766 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2767 send_fw_act_open_req(ep, atid);
2768 return;
2769 }
2770 case FW_EADDRINUSE:
2771 set_bit(ACT_RETRY_INUSE, &ep->com.history);
2772 if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2773 send_fw_act_open_req(ep, atid);
2774 return;
2775 }
2776 break;
2777 default:
2778 pr_info("%s unexpected ofld conn wr retval %d\n",
2779 __func__, req->retval);
2780 break;
2781 }
2782 pr_err("active ofld_connect_wr failure %d atid %d\n",
2783 req->retval, atid);
2784 mutex_lock(&dev->rdev.stats.lock);
2785 dev->rdev.stats.act_ofld_conn_fails++;
2786 mutex_unlock(&dev->rdev.stats.lock);
2787 connect_reply_upcall(ep, status2errno(req->retval));
2788 state_set(&ep->com, DEAD);
2789 remove_handle(dev, &dev->atid_idr, atid);
2790 cxgb4_free_atid(dev->rdev.lldi.tids, atid);
2791 dst_release(ep->dst);
2792 cxgb4_l2t_release(ep->l2t);
2793 c4iw_put_ep(&ep->com);
2794}
2795
2796static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
2797 struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
2798{
2799 struct sk_buff *rpl_skb;
2800 struct cpl_pass_accept_req *cpl;
2801 int ret;
2802
2803 rpl_skb = (struct sk_buff *)cpu_to_be64(req->cookie);
2804 BUG_ON(!rpl_skb);
2805 if (req->retval) {
2806 PDBG("%s passive open failure %d\n", __func__, req->retval);
2807 mutex_lock(&dev->rdev.stats.lock);
2808 dev->rdev.stats.pas_ofld_conn_fails++;
2809 mutex_unlock(&dev->rdev.stats.lock);
2810 kfree_skb(rpl_skb);
2811 } else {
2812 cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
2813 OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
2814 htonl(req->tid)));
2815 ret = pass_accept_req(dev, rpl_skb);
2816 if (!ret)
2817 kfree_skb(rpl_skb);
2818 }
2819 return;
2820}
2821
2822static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
2823{ 2134{
2824 struct cpl_fw6_msg *rpl = cplhdr(skb); 2135 struct cpl_fw6_msg *rpl = cplhdr(skb);
2825 struct cpl_fw6_msg_ofld_connection_wr_rpl *req; 2136 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
2826
2827 switch (rpl->type) {
2828 case FW6_TYPE_CQE:
2829 c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
2830 break;
2831 case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
2832 req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
2833 switch (req->t_state) {
2834 case TCP_SYN_SENT:
2835 active_ofld_conn_reply(dev, skb, req);
2836 break;
2837 case TCP_SYN_RECV:
2838 passive_ofld_conn_reply(dev, skb, req);
2839 break;
2840 default:
2841 pr_err("%s unexpected ofld conn wr state %d\n",
2842 __func__, req->t_state);
2843 break;
2844 }
2845 break;
2846 }
2847 return 0;
2848}
2849
2850static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
2851{
2852 u32 l2info;
2853 u16 vlantag, len, hdr_len;
2854 u8 intf;
2855 struct cpl_rx_pkt *cpl = cplhdr(skb);
2856 struct cpl_pass_accept_req *req;
2857 struct tcp_options_received tmp_opt;
2858
2859 /* Store values from cpl_rx_pkt in temporary location. */
2860 vlantag = cpl->vlan;
2861 len = cpl->len;
2862 l2info = cpl->l2info;
2863 hdr_len = cpl->hdr_len;
2864 intf = cpl->iff;
2865
2866 __skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
2867
2868 /*
2869 * We need to parse the TCP options from SYN packet.
2870 * to generate cpl_pass_accept_req.
2871 */
2872 memset(&tmp_opt, 0, sizeof(tmp_opt));
2873 tcp_clear_options(&tmp_opt);
2874 tcp_parse_options(skb, &tmp_opt, 0, 0, NULL);
2875
2876 req = (struct cpl_pass_accept_req *)__skb_push(skb, sizeof(*req));
2877 memset(req, 0, sizeof(*req));
2878 req->l2info = cpu_to_be16(V_SYN_INTF(intf) |
2879 V_SYN_MAC_IDX(G_RX_MACIDX(htonl(l2info))) |
2880 F_SYN_XACT_MATCH);
2881 req->hdr_len = cpu_to_be32(V_SYN_RX_CHAN(G_RX_CHAN(htonl(l2info))) |
2882 V_TCP_HDR_LEN(G_RX_TCPHDR_LEN(htons(hdr_len))) |
2883 V_IP_HDR_LEN(G_RX_IPHDR_LEN(htons(hdr_len))) |
2884 V_ETH_HDR_LEN(G_RX_ETHHDR_LEN(htonl(l2info))));
2885 req->vlan = vlantag;
2886 req->len = len;
2887 req->tos_stid = cpu_to_be32(PASS_OPEN_TID(stid) |
2888 PASS_OPEN_TOS(tos));
2889 req->tcpopt.mss = htons(tmp_opt.mss_clamp);
2890 if (tmp_opt.wscale_ok)
2891 req->tcpopt.wsf = tmp_opt.snd_wscale;
2892 req->tcpopt.tstamp = tmp_opt.saw_tstamp;
2893 if (tmp_opt.sack_ok)
2894 req->tcpopt.sack = 1;
2895 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
2896 return;
2897}
2898
2899static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
2900 __be32 laddr, __be16 lport,
2901 __be32 raddr, __be16 rport,
2902 u32 rcv_isn, u32 filter, u16 window,
2903 u32 rss_qid, u8 port_id)
2904{
2905 struct sk_buff *req_skb;
2906 struct fw_ofld_connection_wr *req;
2907 struct cpl_pass_accept_req *cpl = cplhdr(skb);
2908
2909 req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
2910 req = (struct fw_ofld_connection_wr *)__skb_put(req_skb, sizeof(*req));
2911 memset(req, 0, sizeof(*req));
2912 req->op_compl = htonl(V_WR_OP(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL(1));
2913 req->len16_pkd = htonl(FW_WR_LEN16(DIV_ROUND_UP(sizeof(*req), 16)));
2914 req->le.version_cpl = htonl(F_FW_OFLD_CONNECTION_WR_CPL);
2915 req->le.filter = filter;
2916 req->le.lport = lport;
2917 req->le.pport = rport;
2918 req->le.u.ipv4.lip = laddr;
2919 req->le.u.ipv4.pip = raddr;
2920 req->tcb.rcv_nxt = htonl(rcv_isn + 1);
2921 req->tcb.rcv_adv = htons(window);
2922 req->tcb.t_state_to_astid =
2923 htonl(V_FW_OFLD_CONNECTION_WR_T_STATE(TCP_SYN_RECV) |
2924 V_FW_OFLD_CONNECTION_WR_RCV_SCALE(cpl->tcpopt.wsf) |
2925 V_FW_OFLD_CONNECTION_WR_ASTID(
2926 GET_PASS_OPEN_TID(ntohl(cpl->tos_stid))));
2927
2928 /*
2929 * We store the qid in opt2 which will be used by the firmware
2930 * to send us the wr response.
2931 */
2932 req->tcb.opt2 = htonl(V_RSS_QUEUE(rss_qid));
2933
2934 /*
2935 * We initialize the MSS index in TCB to 0xF.
2936 * So that when driver sends cpl_pass_accept_rpl
2937 * TCB picks up the correct value. If this was 0
2938 * TP will ignore any value > 0 for MSS index.
2939 */
2940 req->tcb.opt0 = cpu_to_be64(V_MSS_IDX(0xF));
2941 req->cookie = cpu_to_be64((u64)skb);
2942
2943 set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
2944 cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
2945}
2946
2947/*
2948 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
2949 * messages when a filter is being used instead of server to
2950 * redirect a syn packet. When packets hit filter they are redirected
2951 * to the offload queue and driver tries to establish the connection
2952 * using firmware work request.
2953 */
2954static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
2955{
2956 int stid;
2957 unsigned int filter;
2958 struct ethhdr *eh = NULL;
2959 struct vlan_ethhdr *vlan_eh = NULL;
2960 struct iphdr *iph;
2961 struct tcphdr *tcph;
2962 struct rss_header *rss = (void *)skb->data;
2963 struct cpl_rx_pkt *cpl = (void *)skb->data;
2964 struct cpl_pass_accept_req *req = (void *)(rss + 1);
2965 struct l2t_entry *e;
2966 struct dst_entry *dst;
2967 struct rtable *rt;
2968 struct c4iw_ep *lep;
2969 u16 window;
2970 struct port_info *pi;
2971 struct net_device *pdev;
2972 u16 rss_qid;
2973 int step;
2974 u32 tx_chan;
2975 struct neighbour *neigh;
2976
2977 /* Drop all non-SYN packets */
2978 if (!(cpl->l2info & cpu_to_be32(F_RXF_SYN)))
2979 goto reject;
2980
2981 /*
2982 * Drop all packets which did not hit the filter.
2983 * Unlikely to happen.
2984 */
2985 if (!(rss->filter_hit && rss->filter_tid))
2986 goto reject;
2987
2988 /*
2989 * Calculate the server tid from filter hit index from cpl_rx_pkt.
2990 */
2991 stid = cpu_to_be32(rss->hash_val) - dev->rdev.lldi.tids->sftid_base
2992 + dev->rdev.lldi.tids->nstids;
2993
2994 lep = (struct c4iw_ep *)lookup_stid(dev->rdev.lldi.tids, stid);
2995 if (!lep) {
2996 PDBG("%s connect request on invalid stid %d\n", __func__, stid);
2997 goto reject;
2998 }
2999
3000 if (G_RX_ETHHDR_LEN(ntohl(cpl->l2info)) == ETH_HLEN) {
3001 eh = (struct ethhdr *)(req + 1);
3002 iph = (struct iphdr *)(eh + 1);
3003 } else {
3004 vlan_eh = (struct vlan_ethhdr *)(req + 1);
3005 iph = (struct iphdr *)(vlan_eh + 1);
3006 skb->vlan_tci = ntohs(cpl->vlan);
3007 }
3008
3009 if (iph->version != 0x4)
3010 goto reject;
3011
3012 tcph = (struct tcphdr *)(iph + 1);
3013 skb_set_network_header(skb, (void *)iph - (void *)rss);
3014 skb_set_transport_header(skb, (void *)tcph - (void *)rss);
3015 skb_get(skb);
3016
3017 PDBG("%s lip 0x%x lport %u pip 0x%x pport %u tos %d\n", __func__,
3018 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
3019 ntohs(tcph->source), iph->tos);
3020
3021 rt = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
3022 iph->tos);
3023 if (!rt) {
3024 pr_err("%s - failed to find dst entry!\n",
3025 __func__);
3026 goto reject;
3027 }
3028 dst = &rt->dst;
3029 neigh = dst_neigh_lookup_skb(dst, skb);
3030
3031 if (neigh->dev->flags & IFF_LOOPBACK) {
3032 pdev = ip_dev_find(&init_net, iph->daddr);
3033 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3034 pdev, 0);
3035 pi = (struct port_info *)netdev_priv(pdev);
3036 tx_chan = cxgb4_port_chan(pdev);
3037 dev_put(pdev);
3038 } else {
3039 e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
3040 neigh->dev, 0);
3041 pi = (struct port_info *)netdev_priv(neigh->dev);
3042 tx_chan = cxgb4_port_chan(neigh->dev);
3043 }
3044 if (!e) {
3045 pr_err("%s - failed to allocate l2t entry!\n",
3046 __func__);
3047 goto free_dst;
3048 }
3049
3050 step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
3051 rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
3052 window = htons(tcph->window);
3053
3054 /* Calcuate filter portion for LE region. */
3055 filter = cpu_to_be32(select_ntuple(dev, dst, e));
3056
3057 /*
3058 * Synthesize the cpl_pass_accept_req. We have everything except the
3059 * TID. Once firmware sends a reply with TID we update the TID field
3060 * in cpl and pass it through the regular cpl_pass_accept_req path.
3061 */
3062 build_cpl_pass_accept_req(skb, stid, iph->tos);
3063 send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
3064 tcph->source, ntohl(tcph->seq), filter, window,
3065 rss_qid, pi->port_id);
3066 cxgb4_l2t_release(e);
3067free_dst:
3068 dst_release(dst);
3069reject:
3070 return 0; 2137 return 0;
3071} 2138}
3072 2139
@@ -3089,8 +2156,7 @@ static c4iw_handler_func work_handlers[NUM_CPL_CMDS] = {
3089 [CPL_CLOSE_CON_RPL] = close_con_rpl, 2156 [CPL_CLOSE_CON_RPL] = close_con_rpl,
3090 [CPL_RDMA_TERMINATE] = terminate, 2157 [CPL_RDMA_TERMINATE] = terminate,
3091 [CPL_FW4_ACK] = fw4_ack, 2158 [CPL_FW4_ACK] = fw4_ack,
3092 [CPL_FW6_MSG] = deferred_fw6_msg, 2159 [CPL_FW6_MSG] = async_event
3093 [CPL_RX_PKT] = rx_pkt
3094}; 2160};
3095 2161
3096static void process_timeout(struct c4iw_ep *ep) 2162static void process_timeout(struct c4iw_ep *ep)
@@ -3101,7 +2167,6 @@ static void process_timeout(struct c4iw_ep *ep)
3101 mutex_lock(&ep->com.mutex); 2167 mutex_lock(&ep->com.mutex);
3102 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid, 2168 PDBG("%s ep %p tid %u state %d\n", __func__, ep, ep->hwtid,
3103 ep->com.state); 2169 ep->com.state);
3104 set_bit(TIMEDOUT, &ep->com.history);
3105 switch (ep->com.state) { 2170 switch (ep->com.state) {
3106 case MPA_REQ_SENT: 2171 case MPA_REQ_SENT:
3107 __state_set(&ep->com, ABORTING); 2172 __state_set(&ep->com, ABORTING);
@@ -3121,8 +2186,9 @@ static void process_timeout(struct c4iw_ep *ep)
3121 __state_set(&ep->com, ABORTING); 2186 __state_set(&ep->com, ABORTING);
3122 break; 2187 break;
3123 default: 2188 default:
3124 WARN(1, "%s unexpected state ep %p tid %u state %u\n", 2189 printk(KERN_ERR "%s unexpected state ep %p tid %u state %u\n",
3125 __func__, ep, ep->hwtid, ep->com.state); 2190 __func__, ep, ep->hwtid, ep->com.state);
2191 WARN_ON(1);
3126 abort = 0; 2192 abort = 0;
3127 } 2193 }
3128 mutex_unlock(&ep->com.mutex); 2194 mutex_unlock(&ep->com.mutex);
@@ -3222,7 +2288,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3222 PDBG("%s type %u\n", __func__, rpl->type); 2288 PDBG("%s type %u\n", __func__, rpl->type);
3223 2289
3224 switch (rpl->type) { 2290 switch (rpl->type) {
3225 case FW6_TYPE_WR_RPL: 2291 case 1:
3226 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff); 2292 ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
3227 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1]; 2293 wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
3228 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret); 2294 PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
@@ -3230,8 +2296,7 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3230 c4iw_wake_up(wr_waitp, ret ? -ret : 0); 2296 c4iw_wake_up(wr_waitp, ret ? -ret : 0);
3231 kfree_skb(skb); 2297 kfree_skb(skb);
3232 break; 2298 break;
3233 case FW6_TYPE_CQE: 2299 case 2:
3234 case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3235 sched(dev, skb); 2300 sched(dev, skb);
3236 break; 2301 break;
3237 default: 2302 default:
@@ -3251,12 +2316,6 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
3251 unsigned int tid = GET_TID(req); 2316 unsigned int tid = GET_TID(req);
3252 2317
3253 ep = lookup_tid(t, tid); 2318 ep = lookup_tid(t, tid);
3254 if (!ep) {
3255 printk(KERN_WARNING MOD
3256 "Abort on non-existent endpoint, tid %d\n", tid);
3257 kfree_skb(skb);
3258 return 0;
3259 }
3260 if (is_neg_adv_abort(req->status)) { 2319 if (is_neg_adv_abort(req->status)) {
3261 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep, 2320 PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
3262 ep->hwtid); 2321 ep->hwtid);
@@ -3294,8 +2353,7 @@ c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
3294 [CPL_RDMA_TERMINATE] = sched, 2353 [CPL_RDMA_TERMINATE] = sched,
3295 [CPL_FW4_ACK] = sched, 2354 [CPL_FW4_ACK] = sched,
3296 [CPL_SET_TCB_RPL] = set_tcb_rpl, 2355 [CPL_SET_TCB_RPL] = set_tcb_rpl,
3297 [CPL_FW6_MSG] = fw6_msg, 2356 [CPL_FW6_MSG] = fw6_msg
3298 [CPL_RX_PKT] = sched
3299}; 2357};
3300 2358
3301int __init c4iw_cm_init(void) 2359int __init c4iw_cm_init(void)
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 0f1607c8325..1720dc790d1 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -185,7 +185,7 @@ static void insert_recv_cqe(struct t4_wq *wq, struct t4_cq *cq)
185 V_CQE_OPCODE(FW_RI_SEND) | 185 V_CQE_OPCODE(FW_RI_SEND) |
186 V_CQE_TYPE(0) | 186 V_CQE_TYPE(0) |
187 V_CQE_SWCQE(1) | 187 V_CQE_SWCQE(1) |
188 V_CQE_QPID(wq->sq.qid)); 188 V_CQE_QPID(wq->rq.qid));
189 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen)); 189 cqe.bits_type_ts = cpu_to_be64(V_CQE_GENBIT((u64)cq->gen));
190 cq->sw_queue[cq->sw_pidx] = cqe; 190 cq->sw_queue[cq->sw_pidx] = cqe;
191 t4_swcq_produce(cq); 191 t4_swcq_produce(cq);
@@ -311,7 +311,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count)
311 while (ptr != cq->sw_pidx) { 311 while (ptr != cq->sw_pidx) {
312 cqe = &cq->sw_queue[ptr]; 312 cqe = &cq->sw_queue[ptr];
313 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && 313 if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) &&
314 (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) 314 (CQE_QPID(cqe) == wq->rq.qid) && cqe_completes_wr(cqe, wq))
315 (*count)++; 315 (*count)++;
316 if (++ptr == cq->size) 316 if (++ptr == cq->size)
317 ptr = 0; 317 ptr = 0;
@@ -818,7 +818,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev, int entries,
818 chp->cq.size--; /* status page */ 818 chp->cq.size--; /* status page */
819 chp->ibcq.cqe = entries - 2; 819 chp->ibcq.cqe = entries - 2;
820 spin_lock_init(&chp->lock); 820 spin_lock_init(&chp->lock);
821 spin_lock_init(&chp->comp_handler_lock);
822 atomic_set(&chp->refcnt, 1); 821 atomic_set(&chp->refcnt, 1);
823 init_waitqueue_head(&chp->wait); 822 init_waitqueue_head(&chp->wait);
824 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid); 823 ret = insert_handle(rhp, &rhp->cqidr, chp, chp->cq.cqid);
diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index ba11c76c0b5..40a13cc633a 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@ -32,7 +32,6 @@
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/moduleparam.h> 33#include <linux/moduleparam.h>
34#include <linux/debugfs.h> 34#include <linux/debugfs.h>
35#include <linux/vmalloc.h>
36 35
37#include <rdma/ib_verbs.h> 36#include <rdma/ib_verbs.h>
38 37
@@ -45,12 +44,6 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
45MODULE_LICENSE("Dual BSD/GPL"); 44MODULE_LICENSE("Dual BSD/GPL");
46MODULE_VERSION(DRV_VERSION); 45MODULE_VERSION(DRV_VERSION);
47 46
48struct uld_ctx {
49 struct list_head entry;
50 struct cxgb4_lld_info lldi;
51 struct c4iw_dev *dev;
52};
53
54static LIST_HEAD(uld_ctx_list); 47static LIST_HEAD(uld_ctx_list);
55static DEFINE_MUTEX(dev_mutex); 48static DEFINE_MUTEX(dev_mutex);
56 49
@@ -122,7 +115,7 @@ static int qp_release(struct inode *inode, struct file *file)
122 printk(KERN_INFO "%s null qpd?\n", __func__); 115 printk(KERN_INFO "%s null qpd?\n", __func__);
123 return 0; 116 return 0;
124 } 117 }
125 vfree(qpd->buf); 118 kfree(qpd->buf);
126 kfree(qpd); 119 kfree(qpd);
127 return 0; 120 return 0;
128} 121}
@@ -146,7 +139,7 @@ static int qp_open(struct inode *inode, struct file *file)
146 spin_unlock_irq(&qpd->devp->lock); 139 spin_unlock_irq(&qpd->devp->lock);
147 140
148 qpd->bufsize = count * 128; 141 qpd->bufsize = count * 128;
149 qpd->buf = vmalloc(qpd->bufsize); 142 qpd->buf = kmalloc(qpd->bufsize, GFP_KERNEL);
150 if (!qpd->buf) { 143 if (!qpd->buf) {
151 ret = -ENOMEM; 144 ret = -ENOMEM;
152 goto err1; 145 goto err1;
@@ -247,196 +240,6 @@ static const struct file_operations stag_debugfs_fops = {
247 .llseek = default_llseek, 240 .llseek = default_llseek,
248}; 241};
249 242
250static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
251
252static int stats_show(struct seq_file *seq, void *v)
253{
254 struct c4iw_dev *dev = seq->private;
255
256 seq_printf(seq, " Object: %10s %10s %10s %10s\n", "Total", "Current",
257 "Max", "Fail");
258 seq_printf(seq, " PDID: %10llu %10llu %10llu %10llu\n",
259 dev->rdev.stats.pd.total, dev->rdev.stats.pd.cur,
260 dev->rdev.stats.pd.max, dev->rdev.stats.pd.fail);
261 seq_printf(seq, " QID: %10llu %10llu %10llu %10llu\n",
262 dev->rdev.stats.qid.total, dev->rdev.stats.qid.cur,
263 dev->rdev.stats.qid.max, dev->rdev.stats.qid.fail);
264 seq_printf(seq, " TPTMEM: %10llu %10llu %10llu %10llu\n",
265 dev->rdev.stats.stag.total, dev->rdev.stats.stag.cur,
266 dev->rdev.stats.stag.max, dev->rdev.stats.stag.fail);
267 seq_printf(seq, " PBLMEM: %10llu %10llu %10llu %10llu\n",
268 dev->rdev.stats.pbl.total, dev->rdev.stats.pbl.cur,
269 dev->rdev.stats.pbl.max, dev->rdev.stats.pbl.fail);
270 seq_printf(seq, " RQTMEM: %10llu %10llu %10llu %10llu\n",
271 dev->rdev.stats.rqt.total, dev->rdev.stats.rqt.cur,
272 dev->rdev.stats.rqt.max, dev->rdev.stats.rqt.fail);
273 seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu %10llu\n",
274 dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
275 dev->rdev.stats.ocqp.max, dev->rdev.stats.ocqp.fail);
276 seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
277 seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
278 seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
279 seq_printf(seq, " DB State: %s Transitions %llu\n",
280 db_state_str[dev->db_state],
281 dev->rdev.stats.db_state_transitions);
282 seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
283 seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
284 dev->rdev.stats.act_ofld_conn_fails);
285 seq_printf(seq, "PAS_OFLD_CONN_FAILS: %10llu\n",
286 dev->rdev.stats.pas_ofld_conn_fails);
287 return 0;
288}
289
290static int stats_open(struct inode *inode, struct file *file)
291{
292 return single_open(file, stats_show, inode->i_private);
293}
294
295static ssize_t stats_clear(struct file *file, const char __user *buf,
296 size_t count, loff_t *pos)
297{
298 struct c4iw_dev *dev = ((struct seq_file *)file->private_data)->private;
299
300 mutex_lock(&dev->rdev.stats.lock);
301 dev->rdev.stats.pd.max = 0;
302 dev->rdev.stats.pd.fail = 0;
303 dev->rdev.stats.qid.max = 0;
304 dev->rdev.stats.qid.fail = 0;
305 dev->rdev.stats.stag.max = 0;
306 dev->rdev.stats.stag.fail = 0;
307 dev->rdev.stats.pbl.max = 0;
308 dev->rdev.stats.pbl.fail = 0;
309 dev->rdev.stats.rqt.max = 0;
310 dev->rdev.stats.rqt.fail = 0;
311 dev->rdev.stats.ocqp.max = 0;
312 dev->rdev.stats.ocqp.fail = 0;
313 dev->rdev.stats.db_full = 0;
314 dev->rdev.stats.db_empty = 0;
315 dev->rdev.stats.db_drop = 0;
316 dev->rdev.stats.db_state_transitions = 0;
317 dev->rdev.stats.tcam_full = 0;
318 dev->rdev.stats.act_ofld_conn_fails = 0;
319 dev->rdev.stats.pas_ofld_conn_fails = 0;
320 mutex_unlock(&dev->rdev.stats.lock);
321 return count;
322}
323
324static const struct file_operations stats_debugfs_fops = {
325 .owner = THIS_MODULE,
326 .open = stats_open,
327 .release = single_release,
328 .read = seq_read,
329 .llseek = seq_lseek,
330 .write = stats_clear,
331};
332
333static int dump_ep(int id, void *p, void *data)
334{
335 struct c4iw_ep *ep = p;
336 struct c4iw_debugfs_data *epd = data;
337 int space;
338 int cc;
339
340 space = epd->bufsize - epd->pos - 1;
341 if (space == 0)
342 return 1;
343
344 cc = snprintf(epd->buf + epd->pos, space,
345 "ep %p cm_id %p qp %p state %d flags 0x%lx history 0x%lx "
346 "hwtid %d atid %d %pI4:%d <-> %pI4:%d\n",
347 ep, ep->com.cm_id, ep->com.qp, (int)ep->com.state,
348 ep->com.flags, ep->com.history, ep->hwtid, ep->atid,
349 &ep->com.local_addr.sin_addr.s_addr,
350 ntohs(ep->com.local_addr.sin_port),
351 &ep->com.remote_addr.sin_addr.s_addr,
352 ntohs(ep->com.remote_addr.sin_port));
353 if (cc < space)
354 epd->pos += cc;
355 return 0;
356}
357
358static int dump_listen_ep(int id, void *p, void *data)
359{
360 struct c4iw_listen_ep *ep = p;
361 struct c4iw_debugfs_data *epd = data;
362 int space;
363 int cc;
364
365 space = epd->bufsize - epd->pos - 1;
366 if (space == 0)
367 return 1;
368
369 cc = snprintf(epd->buf + epd->pos, space,
370 "ep %p cm_id %p state %d flags 0x%lx stid %d backlog %d "
371 "%pI4:%d\n", ep, ep->com.cm_id, (int)ep->com.state,
372 ep->com.flags, ep->stid, ep->backlog,
373 &ep->com.local_addr.sin_addr.s_addr,
374 ntohs(ep->com.local_addr.sin_port));
375 if (cc < space)
376 epd->pos += cc;
377 return 0;
378}
379
380static int ep_release(struct inode *inode, struct file *file)
381{
382 struct c4iw_debugfs_data *epd = file->private_data;
383 if (!epd) {
384 pr_info("%s null qpd?\n", __func__);
385 return 0;
386 }
387 vfree(epd->buf);
388 kfree(epd);
389 return 0;
390}
391
392static int ep_open(struct inode *inode, struct file *file)
393{
394 struct c4iw_debugfs_data *epd;
395 int ret = 0;
396 int count = 1;
397
398 epd = kmalloc(sizeof(*epd), GFP_KERNEL);
399 if (!epd) {
400 ret = -ENOMEM;
401 goto out;
402 }
403 epd->devp = inode->i_private;
404 epd->pos = 0;
405
406 spin_lock_irq(&epd->devp->lock);
407 idr_for_each(&epd->devp->hwtid_idr, count_idrs, &count);
408 idr_for_each(&epd->devp->atid_idr, count_idrs, &count);
409 idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
410 spin_unlock_irq(&epd->devp->lock);
411
412 epd->bufsize = count * 160;
413 epd->buf = vmalloc(epd->bufsize);
414 if (!epd->buf) {
415 ret = -ENOMEM;
416 goto err1;
417 }
418
419 spin_lock_irq(&epd->devp->lock);
420 idr_for_each(&epd->devp->hwtid_idr, dump_ep, epd);
421 idr_for_each(&epd->devp->atid_idr, dump_ep, epd);
422 idr_for_each(&epd->devp->stid_idr, dump_listen_ep, epd);
423 spin_unlock_irq(&epd->devp->lock);
424
425 file->private_data = epd;
426 goto out;
427err1:
428 kfree(epd);
429out:
430 return ret;
431}
432
433static const struct file_operations ep_debugfs_fops = {
434 .owner = THIS_MODULE,
435 .open = ep_open,
436 .release = ep_release,
437 .read = debugfs_read,
438};
439
440static int setup_debugfs(struct c4iw_dev *devp) 243static int setup_debugfs(struct c4iw_dev *devp)
441{ 244{
442 struct dentry *de; 245 struct dentry *de;
@@ -453,17 +256,6 @@ static int setup_debugfs(struct c4iw_dev *devp)
453 (void *)devp, &stag_debugfs_fops); 256 (void *)devp, &stag_debugfs_fops);
454 if (de && de->d_inode) 257 if (de && de->d_inode)
455 de->d_inode->i_size = 4096; 258 de->d_inode->i_size = 4096;
456
457 de = debugfs_create_file("stats", S_IWUSR, devp->debugfs_root,
458 (void *)devp, &stats_debugfs_fops);
459 if (de && de->d_inode)
460 de->d_inode->i_size = 4096;
461
462 de = debugfs_create_file("eps", S_IWUSR, devp->debugfs_root,
463 (void *)devp, &ep_debugfs_fops);
464 if (de && de->d_inode)
465 de->d_inode->i_size = 4096;
466
467 return 0; 259 return 0;
468} 260}
469 261
@@ -477,13 +269,9 @@ void c4iw_release_dev_ucontext(struct c4iw_rdev *rdev,
477 list_for_each_safe(pos, nxt, &uctx->qpids) { 269 list_for_each_safe(pos, nxt, &uctx->qpids) {
478 entry = list_entry(pos, struct c4iw_qid_list, entry); 270 entry = list_entry(pos, struct c4iw_qid_list, entry);
479 list_del_init(&entry->entry); 271 list_del_init(&entry->entry);
480 if (!(entry->qid & rdev->qpmask)) { 272 if (!(entry->qid & rdev->qpmask))
481 c4iw_put_resource(&rdev->resource.qid_table, 273 c4iw_put_resource(&rdev->resource.qid_fifo, entry->qid,
482 entry->qid); 274 &rdev->resource.qid_fifo_lock);
483 mutex_lock(&rdev->stats.lock);
484 rdev->stats.qid.cur -= rdev->qpmask + 1;
485 mutex_unlock(&rdev->stats.lock);
486 }
487 kfree(entry); 275 kfree(entry);
488 } 276 }
489 277
@@ -544,13 +332,6 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
544 goto err1; 332 goto err1;
545 } 333 }
546 334
547 rdev->stats.pd.total = T4_MAX_NUM_PD;
548 rdev->stats.stag.total = rdev->lldi.vr->stag.size;
549 rdev->stats.pbl.total = rdev->lldi.vr->pbl.size;
550 rdev->stats.rqt.total = rdev->lldi.vr->rq.size;
551 rdev->stats.ocqp.total = rdev->lldi.vr->ocq.size;
552 rdev->stats.qid.total = rdev->lldi.vr->qp.size;
553
554 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD); 335 err = c4iw_init_resource(rdev, c4iw_num_stags(rdev), T4_MAX_NUM_PD);
555 if (err) { 336 if (err) {
556 printk(KERN_ERR MOD "error %d initializing resources\n", err); 337 printk(KERN_ERR MOD "error %d initializing resources\n", err);
@@ -589,44 +370,30 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
589 c4iw_destroy_resource(&rdev->resource); 370 c4iw_destroy_resource(&rdev->resource);
590} 371}
591 372
592static void c4iw_dealloc(struct uld_ctx *ctx) 373struct uld_ctx {
374 struct list_head entry;
375 struct cxgb4_lld_info lldi;
376 struct c4iw_dev *dev;
377};
378
379static void c4iw_remove(struct uld_ctx *ctx)
593{ 380{
381 PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
382 c4iw_unregister_device(ctx->dev);
594 c4iw_rdev_close(&ctx->dev->rdev); 383 c4iw_rdev_close(&ctx->dev->rdev);
595 idr_destroy(&ctx->dev->cqidr); 384 idr_destroy(&ctx->dev->cqidr);
596 idr_destroy(&ctx->dev->qpidr); 385 idr_destroy(&ctx->dev->qpidr);
597 idr_destroy(&ctx->dev->mmidr); 386 idr_destroy(&ctx->dev->mmidr);
598 idr_destroy(&ctx->dev->hwtid_idr);
599 idr_destroy(&ctx->dev->stid_idr);
600 idr_destroy(&ctx->dev->atid_idr);
601 iounmap(ctx->dev->rdev.oc_mw_kva); 387 iounmap(ctx->dev->rdev.oc_mw_kva);
602 ib_dealloc_device(&ctx->dev->ibdev); 388 ib_dealloc_device(&ctx->dev->ibdev);
603 ctx->dev = NULL; 389 ctx->dev = NULL;
604} 390}
605 391
606static void c4iw_remove(struct uld_ctx *ctx)
607{
608 PDBG("%s c4iw_dev %p\n", __func__, ctx->dev);
609 c4iw_unregister_device(ctx->dev);
610 c4iw_dealloc(ctx);
611}
612
613static int rdma_supported(const struct cxgb4_lld_info *infop)
614{
615 return infop->vr->stag.size > 0 && infop->vr->pbl.size > 0 &&
616 infop->vr->rq.size > 0 && infop->vr->qp.size > 0 &&
617 infop->vr->cq.size > 0 && infop->vr->ocq.size > 0;
618}
619
620static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) 392static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
621{ 393{
622 struct c4iw_dev *devp; 394 struct c4iw_dev *devp;
623 int ret; 395 int ret;
624 396
625 if (!rdma_supported(infop)) {
626 printk(KERN_INFO MOD "%s: RDMA not supported on this device.\n",
627 pci_name(infop->pdev));
628 return ERR_PTR(-ENOSYS);
629 }
630 devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp)); 397 devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
631 if (!devp) { 398 if (!devp) {
632 printk(KERN_ERR MOD "Cannot allocate ib device\n"); 399 printk(KERN_ERR MOD "Cannot allocate ib device\n");
@@ -647,6 +414,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
647 414
648 ret = c4iw_rdev_open(&devp->rdev); 415 ret = c4iw_rdev_open(&devp->rdev);
649 if (ret) { 416 if (ret) {
417 mutex_unlock(&dev_mutex);
650 printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret); 418 printk(KERN_ERR MOD "Unable to open CXIO rdev err %d\n", ret);
651 ib_dealloc_device(&devp->ibdev); 419 ib_dealloc_device(&devp->ibdev);
652 return ERR_PTR(ret); 420 return ERR_PTR(ret);
@@ -655,12 +423,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
655 idr_init(&devp->cqidr); 423 idr_init(&devp->cqidr);
656 idr_init(&devp->qpidr); 424 idr_init(&devp->qpidr);
657 idr_init(&devp->mmidr); 425 idr_init(&devp->mmidr);
658 idr_init(&devp->hwtid_idr);
659 idr_init(&devp->stid_idr);
660 idr_init(&devp->atid_idr);
661 spin_lock_init(&devp->lock); 426 spin_lock_init(&devp->lock);
662 mutex_init(&devp->rdev.stats.lock);
663 mutex_init(&devp->db_mutex);
664 427
665 if (c4iw_debugfs_root) { 428 if (c4iw_debugfs_root) {
666 devp->debugfs_root = debugfs_create_dir( 429 devp->debugfs_root = debugfs_create_dir(
@@ -703,76 +466,14 @@ out:
703 return ctx; 466 return ctx;
704} 467}
705 468
706static inline struct sk_buff *copy_gl_to_skb_pkt(const struct pkt_gl *gl,
707 const __be64 *rsp,
708 u32 pktshift)
709{
710 struct sk_buff *skb;
711
712 /*
713 * Allocate space for cpl_pass_accept_req which will be synthesized by
714 * driver. Once the driver synthesizes the request the skb will go
715 * through the regular cpl_pass_accept_req processing.
716 * The math here assumes sizeof cpl_pass_accept_req >= sizeof
717 * cpl_rx_pkt.
718 */
719 skb = alloc_skb(gl->tot_len + sizeof(struct cpl_pass_accept_req) +
720 sizeof(struct rss_header) - pktshift, GFP_ATOMIC);
721 if (unlikely(!skb))
722 return NULL;
723
724 __skb_put(skb, gl->tot_len + sizeof(struct cpl_pass_accept_req) +
725 sizeof(struct rss_header) - pktshift);
726
727 /*
728 * This skb will contain:
729 * rss_header from the rspq descriptor (1 flit)
730 * cpl_rx_pkt struct from the rspq descriptor (2 flits)
731 * space for the difference between the size of an
732 * rx_pkt and pass_accept_req cpl (1 flit)
733 * the packet data from the gl
734 */
735 skb_copy_to_linear_data(skb, rsp, sizeof(struct cpl_pass_accept_req) +
736 sizeof(struct rss_header));
737 skb_copy_to_linear_data_offset(skb, sizeof(struct rss_header) +
738 sizeof(struct cpl_pass_accept_req),
739 gl->va + pktshift,
740 gl->tot_len - pktshift);
741 return skb;
742}
743
744static inline int recv_rx_pkt(struct c4iw_dev *dev, const struct pkt_gl *gl,
745 const __be64 *rsp)
746{
747 unsigned int opcode = *(u8 *)rsp;
748 struct sk_buff *skb;
749
750 if (opcode != CPL_RX_PKT)
751 goto out;
752
753 skb = copy_gl_to_skb_pkt(gl , rsp, dev->rdev.lldi.sge_pktshift);
754 if (skb == NULL)
755 goto out;
756
757 if (c4iw_handlers[opcode] == NULL) {
758 pr_info("%s no handler opcode 0x%x...\n", __func__,
759 opcode);
760 kfree_skb(skb);
761 goto out;
762 }
763 c4iw_handlers[opcode](dev, skb);
764 return 1;
765out:
766 return 0;
767}
768
769static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp, 469static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
770 const struct pkt_gl *gl) 470 const struct pkt_gl *gl)
771{ 471{
772 struct uld_ctx *ctx = handle; 472 struct uld_ctx *ctx = handle;
773 struct c4iw_dev *dev = ctx->dev; 473 struct c4iw_dev *dev = ctx->dev;
774 struct sk_buff *skb; 474 struct sk_buff *skb;
775 u8 opcode; 475 const struct cpl_act_establish *rpl;
476 unsigned int opcode;
776 477
777 if (gl == NULL) { 478 if (gl == NULL) {
778 /* omit RSS and rsp_ctrl at end of descriptor */ 479 /* omit RSS and rsp_ctrl at end of descriptor */
@@ -789,29 +490,19 @@ static int c4iw_uld_rx_handler(void *handle, const __be64 *rsp,
789 u32 qid = be32_to_cpu(rc->pldbuflen_qid); 490 u32 qid = be32_to_cpu(rc->pldbuflen_qid);
790 c4iw_ev_handler(dev, qid); 491 c4iw_ev_handler(dev, qid);
791 return 0; 492 return 0;
792 } else if (unlikely(*(u8 *)rsp != *(u8 *)gl->va)) {
793 if (recv_rx_pkt(dev, gl, rsp))
794 return 0;
795
796 pr_info("%s: unexpected FL contents at %p, " \
797 "RSS %#llx, FL %#llx, len %u\n",
798 pci_name(ctx->lldi.pdev), gl->va,
799 (unsigned long long)be64_to_cpu(*rsp),
800 (unsigned long long)be64_to_cpu(*(u64 *)gl->va),
801 gl->tot_len);
802
803 return 0;
804 } else { 493 } else {
805 skb = cxgb4_pktgl_to_skb(gl, 128, 128); 494 skb = cxgb4_pktgl_to_skb(gl, 128, 128);
806 if (unlikely(!skb)) 495 if (unlikely(!skb))
807 goto nomem; 496 goto nomem;
808 } 497 }
809 498
810 opcode = *(u8 *)rsp; 499 rpl = cplhdr(skb);
500 opcode = rpl->ot.opcode;
501
811 if (c4iw_handlers[opcode]) 502 if (c4iw_handlers[opcode])
812 c4iw_handlers[opcode](dev, skb); 503 c4iw_handlers[opcode](dev, skb);
813 else 504 else
814 pr_info("%s no handler opcode 0x%x...\n", __func__, 505 printk(KERN_INFO "%s no handler opcode 0x%x...\n", __func__,
815 opcode); 506 opcode);
816 507
817 return 0; 508 return 0;
@@ -828,24 +519,15 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
828 case CXGB4_STATE_UP: 519 case CXGB4_STATE_UP:
829 printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev)); 520 printk(KERN_INFO MOD "%s: Up\n", pci_name(ctx->lldi.pdev));
830 if (!ctx->dev) { 521 if (!ctx->dev) {
831 int ret; 522 int ret = 0;
832 523
833 ctx->dev = c4iw_alloc(&ctx->lldi); 524 ctx->dev = c4iw_alloc(&ctx->lldi);
834 if (IS_ERR(ctx->dev)) { 525 if (!IS_ERR(ctx->dev))
835 printk(KERN_ERR MOD 526 ret = c4iw_register_device(ctx->dev);
836 "%s: initialization failed: %ld\n", 527 if (IS_ERR(ctx->dev) || ret)
837 pci_name(ctx->lldi.pdev),
838 PTR_ERR(ctx->dev));
839 ctx->dev = NULL;
840 break;
841 }
842 ret = c4iw_register_device(ctx->dev);
843 if (ret) {
844 printk(KERN_ERR MOD 528 printk(KERN_ERR MOD
845 "%s: RDMA registration failed: %d\n", 529 "%s: RDMA registration failed: %d\n",
846 pci_name(ctx->lldi.pdev), ret); 530 pci_name(ctx->lldi.pdev), ret);
847 c4iw_dealloc(ctx);
848 }
849 } 531 }
850 break; 532 break;
851 case CXGB4_STATE_DOWN: 533 case CXGB4_STATE_DOWN:
@@ -878,234 +560,11 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
878 return 0; 560 return 0;
879} 561}
880 562
881static int disable_qp_db(int id, void *p, void *data)
882{
883 struct c4iw_qp *qp = p;
884
885 t4_disable_wq_db(&qp->wq);
886 return 0;
887}
888
889static void stop_queues(struct uld_ctx *ctx)
890{
891 spin_lock_irq(&ctx->dev->lock);
892 if (ctx->dev->db_state == NORMAL) {
893 ctx->dev->rdev.stats.db_state_transitions++;
894 ctx->dev->db_state = FLOW_CONTROL;
895 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
896 }
897 spin_unlock_irq(&ctx->dev->lock);
898}
899
900static int enable_qp_db(int id, void *p, void *data)
901{
902 struct c4iw_qp *qp = p;
903
904 t4_enable_wq_db(&qp->wq);
905 return 0;
906}
907
908static void resume_queues(struct uld_ctx *ctx)
909{
910 spin_lock_irq(&ctx->dev->lock);
911 if (ctx->dev->qpcnt <= db_fc_threshold &&
912 ctx->dev->db_state == FLOW_CONTROL) {
913 ctx->dev->db_state = NORMAL;
914 ctx->dev->rdev.stats.db_state_transitions++;
915 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
916 }
917 spin_unlock_irq(&ctx->dev->lock);
918}
919
920struct qp_list {
921 unsigned idx;
922 struct c4iw_qp **qps;
923};
924
925static int add_and_ref_qp(int id, void *p, void *data)
926{
927 struct qp_list *qp_listp = data;
928 struct c4iw_qp *qp = p;
929
930 c4iw_qp_add_ref(&qp->ibqp);
931 qp_listp->qps[qp_listp->idx++] = qp;
932 return 0;
933}
934
935static int count_qps(int id, void *p, void *data)
936{
937 unsigned *countp = data;
938 (*countp)++;
939 return 0;
940}
941
942static void deref_qps(struct qp_list qp_list)
943{
944 int idx;
945
946 for (idx = 0; idx < qp_list.idx; idx++)
947 c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
948}
949
950static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
951{
952 int idx;
953 int ret;
954
955 for (idx = 0; idx < qp_list->idx; idx++) {
956 struct c4iw_qp *qp = qp_list->qps[idx];
957
958 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
959 qp->wq.sq.qid,
960 t4_sq_host_wq_pidx(&qp->wq),
961 t4_sq_wq_size(&qp->wq));
962 if (ret) {
963 printk(KERN_ERR MOD "%s: Fatal error - "
964 "DB overflow recovery failed - "
965 "error syncing SQ qid %u\n",
966 pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
967 return;
968 }
969
970 ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
971 qp->wq.rq.qid,
972 t4_rq_host_wq_pidx(&qp->wq),
973 t4_rq_wq_size(&qp->wq));
974
975 if (ret) {
976 printk(KERN_ERR MOD "%s: Fatal error - "
977 "DB overflow recovery failed - "
978 "error syncing RQ qid %u\n",
979 pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
980 return;
981 }
982
983 /* Wait for the dbfifo to drain */
984 while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
985 set_current_state(TASK_UNINTERRUPTIBLE);
986 schedule_timeout(usecs_to_jiffies(10));
987 }
988 }
989}
990
991static void recover_queues(struct uld_ctx *ctx)
992{
993 int count = 0;
994 struct qp_list qp_list;
995 int ret;
996
997 /* lock out kernel db ringers */
998 mutex_lock(&ctx->dev->db_mutex);
999
1000 /* put all queues in to recovery mode */
1001 spin_lock_irq(&ctx->dev->lock);
1002 ctx->dev->db_state = RECOVERY;
1003 ctx->dev->rdev.stats.db_state_transitions++;
1004 idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
1005 spin_unlock_irq(&ctx->dev->lock);
1006
1007 /* slow everybody down */
1008 set_current_state(TASK_UNINTERRUPTIBLE);
1009 schedule_timeout(usecs_to_jiffies(1000));
1010
1011 /* Wait for the dbfifo to completely drain. */
1012 while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
1013 set_current_state(TASK_UNINTERRUPTIBLE);
1014 schedule_timeout(usecs_to_jiffies(10));
1015 }
1016
1017 /* flush the SGE contexts */
1018 ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
1019 if (ret) {
1020 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1021 pci_name(ctx->lldi.pdev));
1022 goto out;
1023 }
1024
1025 /* Count active queues so we can build a list of queues to recover */
1026 spin_lock_irq(&ctx->dev->lock);
1027 idr_for_each(&ctx->dev->qpidr, count_qps, &count);
1028
1029 qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
1030 if (!qp_list.qps) {
1031 printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
1032 pci_name(ctx->lldi.pdev));
1033 spin_unlock_irq(&ctx->dev->lock);
1034 goto out;
1035 }
1036 qp_list.idx = 0;
1037
1038 /* add and ref each qp so it doesn't get freed */
1039 idr_for_each(&ctx->dev->qpidr, add_and_ref_qp, &qp_list);
1040
1041 spin_unlock_irq(&ctx->dev->lock);
1042
1043 /* now traverse the list in a safe context to recover the db state*/
1044 recover_lost_dbs(ctx, &qp_list);
1045
1046 /* we're almost done! deref the qps and clean up */
1047 deref_qps(qp_list);
1048 kfree(qp_list.qps);
1049
1050 /* Wait for the dbfifo to completely drain again */
1051 while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
1052 set_current_state(TASK_UNINTERRUPTIBLE);
1053 schedule_timeout(usecs_to_jiffies(10));
1054 }
1055
1056 /* resume the queues */
1057 spin_lock_irq(&ctx->dev->lock);
1058 if (ctx->dev->qpcnt > db_fc_threshold)
1059 ctx->dev->db_state = FLOW_CONTROL;
1060 else {
1061 ctx->dev->db_state = NORMAL;
1062 idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
1063 }
1064 ctx->dev->rdev.stats.db_state_transitions++;
1065 spin_unlock_irq(&ctx->dev->lock);
1066
1067out:
1068 /* start up kernel db ringers again */
1069 mutex_unlock(&ctx->dev->db_mutex);
1070}
1071
1072static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
1073{
1074 struct uld_ctx *ctx = handle;
1075
1076 switch (control) {
1077 case CXGB4_CONTROL_DB_FULL:
1078 stop_queues(ctx);
1079 mutex_lock(&ctx->dev->rdev.stats.lock);
1080 ctx->dev->rdev.stats.db_full++;
1081 mutex_unlock(&ctx->dev->rdev.stats.lock);
1082 break;
1083 case CXGB4_CONTROL_DB_EMPTY:
1084 resume_queues(ctx);
1085 mutex_lock(&ctx->dev->rdev.stats.lock);
1086 ctx->dev->rdev.stats.db_empty++;
1087 mutex_unlock(&ctx->dev->rdev.stats.lock);
1088 break;
1089 case CXGB4_CONTROL_DB_DROP:
1090 recover_queues(ctx);
1091 mutex_lock(&ctx->dev->rdev.stats.lock);
1092 ctx->dev->rdev.stats.db_drop++;
1093 mutex_unlock(&ctx->dev->rdev.stats.lock);
1094 break;
1095 default:
1096 printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
1097 pci_name(ctx->lldi.pdev), control);
1098 break;
1099 }
1100 return 0;
1101}
1102
1103static struct cxgb4_uld_info c4iw_uld_info = { 563static struct cxgb4_uld_info c4iw_uld_info = {
1104 .name = DRV_NAME, 564 .name = DRV_NAME,
1105 .add = c4iw_uld_add, 565 .add = c4iw_uld_add,
1106 .rx_handler = c4iw_uld_rx_handler, 566 .rx_handler = c4iw_uld_rx_handler,
1107 .state_change = c4iw_uld_state_change, 567 .state_change = c4iw_uld_state_change,
1108 .control = c4iw_uld_control,
1109}; 568};
1110 569
1111static int __init c4iw_init_module(void) 570static int __init c4iw_init_module(void)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index cf2f6b47617..c13041a0aeb 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -42,7 +42,6 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
42{ 42{
43 struct ib_event event; 43 struct ib_event event;
44 struct c4iw_qp_attributes attrs; 44 struct c4iw_qp_attributes attrs;
45 unsigned long flag;
46 45
47 if ((qhp->attr.state == C4IW_QP_STATE_ERROR) || 46 if ((qhp->attr.state == C4IW_QP_STATE_ERROR) ||
48 (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) { 47 (qhp->attr.state == C4IW_QP_STATE_TERMINATE)) {
@@ -73,9 +72,7 @@ static void post_qp_event(struct c4iw_dev *dev, struct c4iw_cq *chp,
73 if (qhp->ibqp.event_handler) 72 if (qhp->ibqp.event_handler)
74 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context); 73 (*qhp->ibqp.event_handler)(&event, qhp->ibqp.qp_context);
75 74
76 spin_lock_irqsave(&chp->comp_handler_lock, flag);
77 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); 75 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
78 spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
79} 76}
80 77
81void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe) 78void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
@@ -84,7 +81,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
84 struct c4iw_qp *qhp; 81 struct c4iw_qp *qhp;
85 u32 cqid; 82 u32 cqid;
86 83
87 spin_lock_irq(&dev->lock); 84 spin_lock(&dev->lock);
88 qhp = get_qhp(dev, CQE_QPID(err_cqe)); 85 qhp = get_qhp(dev, CQE_QPID(err_cqe));
89 if (!qhp) { 86 if (!qhp) {
90 printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d " 87 printk(KERN_ERR MOD "BAD AE qpid 0x%x opcode %d "
@@ -93,7 +90,7 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
93 CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), 90 CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
94 CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), 91 CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
95 CQE_WRID_LOW(err_cqe)); 92 CQE_WRID_LOW(err_cqe));
96 spin_unlock_irq(&dev->lock); 93 spin_unlock(&dev->lock);
97 goto out; 94 goto out;
98 } 95 }
99 96
@@ -109,13 +106,13 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
109 CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe), 106 CQE_OPCODE(err_cqe), CQE_STATUS(err_cqe),
110 CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe), 107 CQE_TYPE(err_cqe), CQE_WRID_HI(err_cqe),
111 CQE_WRID_LOW(err_cqe)); 108 CQE_WRID_LOW(err_cqe));
112 spin_unlock_irq(&dev->lock); 109 spin_unlock(&dev->lock);
113 goto out; 110 goto out;
114 } 111 }
115 112
116 c4iw_qp_add_ref(&qhp->ibqp); 113 c4iw_qp_add_ref(&qhp->ibqp);
117 atomic_inc(&chp->refcnt); 114 atomic_inc(&chp->refcnt);
118 spin_unlock_irq(&dev->lock); 115 spin_unlock(&dev->lock);
119 116
120 /* Bad incoming write */ 117 /* Bad incoming write */
121 if (RQ_TYPE(err_cqe) && 118 if (RQ_TYPE(err_cqe) &&
@@ -186,14 +183,11 @@ out:
186int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) 183int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
187{ 184{
188 struct c4iw_cq *chp; 185 struct c4iw_cq *chp;
189 unsigned long flag;
190 186
191 chp = get_chp(dev, qid); 187 chp = get_chp(dev, qid);
192 if (chp) { 188 if (chp)
193 spin_lock_irqsave(&chp->comp_handler_lock, flag);
194 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); 189 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
195 spin_unlock_irqrestore(&chp->comp_handler_lock, flag); 190 else
196 } else
197 PDBG("%s unknown cqid 0x%x\n", __func__, qid); 191 PDBG("%s unknown cqid 0x%x\n", __func__, qid);
198 return 0; 192 return 0;
199} 193}
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
deleted file mode 100644
index f95e5df30db..00000000000
--- a/drivers/infiniband/hw/cxgb4/id_table.c
+++ /dev/null
@@ -1,112 +0,0 @@
1/*
2 * Copyright (c) 2011 Chelsio Communications. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/kernel.h>
33#include <linux/random.h>
34#include "iw_cxgb4.h"
35
36#define RANDOM_SKIP 16
37
38/*
39 * Trivial bitmap-based allocator. If the random flag is set, the
40 * allocator is designed to:
41 * - pseudo-randomize the id returned such that it is not trivially predictable.
42 * - avoid reuse of recently used id (at the expense of predictability)
43 */
44u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
45{
46 unsigned long flags;
47 u32 obj;
48
49 spin_lock_irqsave(&alloc->lock, flags);
50
51 obj = find_next_zero_bit(alloc->table, alloc->max, alloc->last);
52 if (obj >= alloc->max)
53 obj = find_first_zero_bit(alloc->table, alloc->max);
54
55 if (obj < alloc->max) {
56 if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
57 alloc->last += random32() % RANDOM_SKIP;
58 else
59 alloc->last = obj + 1;
60 if (alloc->last >= alloc->max)
61 alloc->last = 0;
62 set_bit(obj, alloc->table);
63 obj += alloc->start;
64 } else
65 obj = -1;
66
67 spin_unlock_irqrestore(&alloc->lock, flags);
68 return obj;
69}
70
71void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj)
72{
73 unsigned long flags;
74
75 obj -= alloc->start;
76 BUG_ON((int)obj < 0);
77
78 spin_lock_irqsave(&alloc->lock, flags);
79 clear_bit(obj, alloc->table);
80 spin_unlock_irqrestore(&alloc->lock, flags);
81}
82
83int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
84 u32 reserved, u32 flags)
85{
86 int i;
87
88 alloc->start = start;
89 alloc->flags = flags;
90 if (flags & C4IW_ID_TABLE_F_RANDOM)
91 alloc->last = random32() % RANDOM_SKIP;
92 else
93 alloc->last = 0;
94 alloc->max = num;
95 spin_lock_init(&alloc->lock);
96 alloc->table = kmalloc(BITS_TO_LONGS(num) * sizeof(long),
97 GFP_KERNEL);
98 if (!alloc->table)
99 return -ENOMEM;
100
101 bitmap_zero(alloc->table, num);
102 if (!(alloc->flags & C4IW_ID_TABLE_F_EMPTY))
103 for (i = 0; i < reserved; ++i)
104 set_bit(i, alloc->table);
105
106 return 0;
107}
108
109void c4iw_id_table_free(struct c4iw_id_table *alloc)
110{
111 kfree(alloc->table);
112}
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 9c1644fb025..4f045375c8e 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -45,6 +45,7 @@
45#include <linux/kref.h> 45#include <linux/kref.h>
46#include <linux/timer.h> 46#include <linux/timer.h>
47#include <linux/io.h> 47#include <linux/io.h>
48#include <linux/kfifo.h>
48 49
49#include <asm/byteorder.h> 50#include <asm/byteorder.h>
50 51
@@ -78,22 +79,13 @@ static inline void *cplhdr(struct sk_buff *skb)
78 return skb->data; 79 return skb->data;
79} 80}
80 81
81#define C4IW_ID_TABLE_F_RANDOM 1 /* Pseudo-randomize the id's returned */
82#define C4IW_ID_TABLE_F_EMPTY 2 /* Table is initially empty */
83
84struct c4iw_id_table {
85 u32 flags;
86 u32 start; /* logical minimal id */
87 u32 last; /* hint for find */
88 u32 max;
89 spinlock_t lock;
90 unsigned long *table;
91};
92
93struct c4iw_resource { 82struct c4iw_resource {
94 struct c4iw_id_table tpt_table; 83 struct kfifo tpt_fifo;
95 struct c4iw_id_table qid_table; 84 spinlock_t tpt_fifo_lock;
96 struct c4iw_id_table pdid_table; 85 struct kfifo qid_fifo;
86 spinlock_t qid_fifo_lock;
87 struct kfifo pdid_fifo;
88 spinlock_t pdid_fifo_lock;
97}; 89};
98 90
99struct c4iw_qid_list { 91struct c4iw_qid_list {
@@ -111,30 +103,6 @@ enum c4iw_rdev_flags {
111 T4_FATAL_ERROR = (1<<0), 103 T4_FATAL_ERROR = (1<<0),
112}; 104};
113 105
114struct c4iw_stat {
115 u64 total;
116 u64 cur;
117 u64 max;
118 u64 fail;
119};
120
121struct c4iw_stats {
122 struct mutex lock;
123 struct c4iw_stat qid;
124 struct c4iw_stat pd;
125 struct c4iw_stat stag;
126 struct c4iw_stat pbl;
127 struct c4iw_stat rqt;
128 struct c4iw_stat ocqp;
129 u64 db_full;
130 u64 db_empty;
131 u64 db_drop;
132 u64 db_state_transitions;
133 u64 tcam_full;
134 u64 act_ofld_conn_fails;
135 u64 pas_ofld_conn_fails;
136};
137
138struct c4iw_rdev { 106struct c4iw_rdev {
139 struct c4iw_resource resource; 107 struct c4iw_resource resource;
140 unsigned long qpshift; 108 unsigned long qpshift;
@@ -149,7 +117,6 @@ struct c4iw_rdev {
149 struct cxgb4_lld_info lldi; 117 struct cxgb4_lld_info lldi;
150 unsigned long oc_mw_pa; 118 unsigned long oc_mw_pa;
151 void __iomem *oc_mw_kva; 119 void __iomem *oc_mw_kva;
152 struct c4iw_stats stats;
153}; 120};
154 121
155static inline int c4iw_fatal_error(struct c4iw_rdev *rdev) 122static inline int c4iw_fatal_error(struct c4iw_rdev *rdev)
@@ -208,12 +175,6 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
208 return wr_waitp->ret; 175 return wr_waitp->ret;
209} 176}
210 177
211enum db_state {
212 NORMAL = 0,
213 FLOW_CONTROL = 1,
214 RECOVERY = 2
215};
216
217struct c4iw_dev { 178struct c4iw_dev {
218 struct ib_device ibdev; 179 struct ib_device ibdev;
219 struct c4iw_rdev rdev; 180 struct c4iw_rdev rdev;
@@ -222,13 +183,7 @@ struct c4iw_dev {
222 struct idr qpidr; 183 struct idr qpidr;
223 struct idr mmidr; 184 struct idr mmidr;
224 spinlock_t lock; 185 spinlock_t lock;
225 struct mutex db_mutex;
226 struct dentry *debugfs_root; 186 struct dentry *debugfs_root;
227 enum db_state db_state;
228 int qpcnt;
229 struct idr hwtid_idr;
230 struct idr atid_idr;
231 struct idr stid_idr;
232}; 187};
233 188
234static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) 189static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@ -256,57 +211,29 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
256 return idr_find(&rhp->mmidr, mmid); 211 return idr_find(&rhp->mmidr, mmid);
257} 212}
258 213
259static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr, 214static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
260 void *handle, u32 id, int lock) 215 void *handle, u32 id)
261{ 216{
262 int ret; 217 int ret;
263 int newid; 218 int newid;
264 219
265 do { 220 do {
266 if (!idr_pre_get(idr, lock ? GFP_KERNEL : GFP_ATOMIC)) 221 if (!idr_pre_get(idr, GFP_KERNEL))
267 return -ENOMEM; 222 return -ENOMEM;
268 if (lock) 223 spin_lock_irq(&rhp->lock);
269 spin_lock_irq(&rhp->lock);
270 ret = idr_get_new_above(idr, handle, id, &newid); 224 ret = idr_get_new_above(idr, handle, id, &newid);
271 BUG_ON(!ret && newid != id); 225 BUG_ON(newid != id);
272 if (lock) 226 spin_unlock_irq(&rhp->lock);
273 spin_unlock_irq(&rhp->lock);
274 } while (ret == -EAGAIN); 227 } while (ret == -EAGAIN);
275 228
276 return ret; 229 return ret;
277} 230}
278 231
279static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
280 void *handle, u32 id)
281{
282 return _insert_handle(rhp, idr, handle, id, 1);
283}
284
285static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
286 void *handle, u32 id)
287{
288 return _insert_handle(rhp, idr, handle, id, 0);
289}
290
291static inline void _remove_handle(struct c4iw_dev *rhp, struct idr *idr,
292 u32 id, int lock)
293{
294 if (lock)
295 spin_lock_irq(&rhp->lock);
296 idr_remove(idr, id);
297 if (lock)
298 spin_unlock_irq(&rhp->lock);
299}
300
301static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id) 232static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
302{ 233{
303 _remove_handle(rhp, idr, id, 1); 234 spin_lock_irq(&rhp->lock);
304} 235 idr_remove(idr, id);
305 236 spin_unlock_irq(&rhp->lock);
306static inline void remove_handle_nolock(struct c4iw_dev *rhp,
307 struct idr *idr, u32 id)
308{
309 _remove_handle(rhp, idr, id, 0);
310} 237}
311 238
312struct c4iw_pd { 239struct c4iw_pd {
@@ -382,7 +309,6 @@ struct c4iw_cq {
382 struct c4iw_dev *rhp; 309 struct c4iw_dev *rhp;
383 struct t4_cq cq; 310 struct t4_cq cq;
384 spinlock_t lock; 311 spinlock_t lock;
385 spinlock_t comp_handler_lock;
386 atomic_t refcnt; 312 atomic_t refcnt;
387 wait_queue_head_t wait; 313 wait_queue_head_t wait;
388}; 314};
@@ -397,7 +323,6 @@ struct c4iw_mpa_attributes {
397 u8 recv_marker_enabled; 323 u8 recv_marker_enabled;
398 u8 xmit_marker_enabled; 324 u8 xmit_marker_enabled;
399 u8 crc_enabled; 325 u8 crc_enabled;
400 u8 enhanced_rdma_conn;
401 u8 version; 326 u8 version;
402 u8 p2p_type; 327 u8 p2p_type;
403}; 328};
@@ -424,10 +349,6 @@ struct c4iw_qp_attributes {
424 u8 is_terminate_local; 349 u8 is_terminate_local;
425 struct c4iw_mpa_attributes mpa_attr; 350 struct c4iw_mpa_attributes mpa_attr;
426 struct c4iw_ep *llp_stream_handle; 351 struct c4iw_ep *llp_stream_handle;
427 u8 layer_etype;
428 u8 ecode;
429 u16 sq_db_inc;
430 u16 rq_db_inc;
431}; 352};
432 353
433struct c4iw_qp { 354struct c4iw_qp {
@@ -502,8 +423,6 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,
502 423
503enum c4iw_qp_attr_mask { 424enum c4iw_qp_attr_mask {
504 C4IW_QP_ATTR_NEXT_STATE = 1 << 0, 425 C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
505 C4IW_QP_ATTR_SQ_DB = 1<<1,
506 C4IW_QP_ATTR_RQ_DB = 1<<2,
507 C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7, 426 C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
508 C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8, 427 C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
509 C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9, 428 C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
@@ -557,23 +476,6 @@ static inline int c4iw_convert_state(enum ib_qp_state ib_state)
557 } 476 }
558} 477}
559 478
560static inline int to_ib_qp_state(int c4iw_qp_state)
561{
562 switch (c4iw_qp_state) {
563 case C4IW_QP_STATE_IDLE:
564 return IB_QPS_INIT;
565 case C4IW_QP_STATE_RTS:
566 return IB_QPS_RTS;
567 case C4IW_QP_STATE_CLOSING:
568 return IB_QPS_SQD;
569 case C4IW_QP_STATE_TERMINATE:
570 return IB_QPS_SQE;
571 case C4IW_QP_STATE_ERROR:
572 return IB_QPS_ERR;
573 }
574 return IB_QPS_ERR;
575}
576
577static inline u32 c4iw_ib_to_tpt_access(int a) 479static inline u32 c4iw_ib_to_tpt_access(int a)
578{ 480{
579 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) | 481 return (a & IB_ACCESS_REMOTE_WRITE ? FW_RI_MEM_ACCESS_REM_WRITE : 0) |
@@ -599,18 +501,11 @@ enum c4iw_mmid_state {
599#define MPA_KEY_REP "MPA ID Rep Frame" 501#define MPA_KEY_REP "MPA ID Rep Frame"
600 502
601#define MPA_MAX_PRIVATE_DATA 256 503#define MPA_MAX_PRIVATE_DATA 256
602#define MPA_ENHANCED_RDMA_CONN 0x10
603#define MPA_REJECT 0x20 504#define MPA_REJECT 0x20
604#define MPA_CRC 0x40 505#define MPA_CRC 0x40
605#define MPA_MARKERS 0x80 506#define MPA_MARKERS 0x80
606#define MPA_FLAGS_MASK 0xE0 507#define MPA_FLAGS_MASK 0xE0
607 508
608#define MPA_V2_PEER2PEER_MODEL 0x8000
609#define MPA_V2_ZERO_LEN_FPDU_RTR 0x4000
610#define MPA_V2_RDMA_WRITE_RTR 0x8000
611#define MPA_V2_RDMA_READ_RTR 0x4000
612#define MPA_V2_IRD_ORD_MASK 0x3FFF
613
614#define c4iw_put_ep(ep) { \ 509#define c4iw_put_ep(ep) { \
615 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \ 510 PDBG("put_ep (via %s:%u) ep %p refcnt %d\n", __func__, __LINE__, \
616 ep, atomic_read(&((ep)->kref.refcount))); \ 511 ep, atomic_read(&((ep)->kref.refcount))); \
@@ -633,11 +528,6 @@ struct mpa_message {
633 u8 private_data[0]; 528 u8 private_data[0];
634}; 529};
635 530
636struct mpa_v2_conn_params {
637 __be16 ird;
638 __be16 ord;
639};
640
641struct terminate_message { 531struct terminate_message {
642 u8 layer_etype; 532 u8 layer_etype;
643 u8 ecode; 533 u8 ecode;
@@ -690,10 +580,7 @@ enum c4iw_ddp_ecodes {
690 580
691enum c4iw_mpa_ecodes { 581enum c4iw_mpa_ecodes {
692 MPA_CRC_ERR = 0x02, 582 MPA_CRC_ERR = 0x02,
693 MPA_MARKER_ERR = 0x03, 583 MPA_MARKER_ERR = 0x03
694 MPA_LOCAL_CATA = 0x05,
695 MPA_INSUFF_IRD = 0x06,
696 MPA_NOMATCH_RTR = 0x07,
697}; 584};
698 585
699enum c4iw_ep_state { 586enum c4iw_ep_state {
@@ -718,31 +605,6 @@ enum c4iw_ep_flags {
718 CLOSE_SENT = 3, 605 CLOSE_SENT = 3,
719}; 606};
720 607
721enum c4iw_ep_history {
722 ACT_OPEN_REQ = 0,
723 ACT_OFLD_CONN = 1,
724 ACT_OPEN_RPL = 2,
725 ACT_ESTAB = 3,
726 PASS_ACCEPT_REQ = 4,
727 PASS_ESTAB = 5,
728 ABORT_UPCALL = 6,
729 ESTAB_UPCALL = 7,
730 CLOSE_UPCALL = 8,
731 ULP_ACCEPT = 9,
732 ULP_REJECT = 10,
733 TIMEDOUT = 11,
734 PEER_ABORT = 12,
735 PEER_CLOSE = 13,
736 CONNREQ_UPCALL = 14,
737 ABORT_CONN = 15,
738 DISCONN_UPCALL = 16,
739 EP_DISC_CLOSE = 17,
740 EP_DISC_ABORT = 18,
741 CONN_RPL_UPCALL = 19,
742 ACT_RETRY_NOMEM = 20,
743 ACT_RETRY_INUSE = 21
744};
745
746struct c4iw_ep_common { 608struct c4iw_ep_common {
747 struct iw_cm_id *cm_id; 609 struct iw_cm_id *cm_id;
748 struct c4iw_qp *qp; 610 struct c4iw_qp *qp;
@@ -754,7 +616,6 @@ struct c4iw_ep_common {
754 struct sockaddr_in remote_addr; 616 struct sockaddr_in remote_addr;
755 struct c4iw_wr_wait wr_wait; 617 struct c4iw_wr_wait wr_wait;
756 unsigned long flags; 618 unsigned long flags;
757 unsigned long history;
758}; 619};
759 620
760struct c4iw_listen_ep { 621struct c4iw_listen_ep {
@@ -790,9 +651,6 @@ struct c4iw_ep {
790 u16 txq_idx; 651 u16 txq_idx;
791 u16 ctrlq_idx; 652 u16 ctrlq_idx;
792 u8 tos; 653 u8 tos;
793 u8 retry_with_mpa_v1;
794 u8 tried_with_mpa_v1;
795 unsigned int retry_count;
796}; 654};
797 655
798static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id) 656static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
@@ -814,20 +672,14 @@ static inline int compute_wscale(int win)
814 return wscale; 672 return wscale;
815} 673}
816 674
817u32 c4iw_id_alloc(struct c4iw_id_table *alloc);
818void c4iw_id_free(struct c4iw_id_table *alloc, u32 obj);
819int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
820 u32 reserved, u32 flags);
821void c4iw_id_table_free(struct c4iw_id_table *alloc);
822
823typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb); 675typedef int (*c4iw_handler_func)(struct c4iw_dev *dev, struct sk_buff *skb);
824 676
825int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new, 677int c4iw_ep_redirect(void *ctx, struct dst_entry *old, struct dst_entry *new,
826 struct l2t_entry *l2t); 678 struct l2t_entry *l2t);
827void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid, 679void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qpid,
828 struct c4iw_dev_ucontext *uctx); 680 struct c4iw_dev_ucontext *uctx);
829u32 c4iw_get_resource(struct c4iw_id_table *id_table); 681u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock);
830void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry); 682void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock);
831int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid); 683int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid);
832int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev); 684int c4iw_init_ctrl_qp(struct c4iw_rdev *rdev);
833int c4iw_pblpool_create(struct c4iw_rdev *rdev); 685int c4iw_pblpool_create(struct c4iw_rdev *rdev);
@@ -896,8 +748,6 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd,
896 struct ib_udata *udata); 748 struct ib_udata *udata);
897int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, 749int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
898 int attr_mask, struct ib_udata *udata); 750 int attr_mask, struct ib_udata *udata);
899int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
900 int attr_mask, struct ib_qp_init_attr *init_attr);
901struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn); 751struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn);
902u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size); 752u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size);
903void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size); 753void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size);
@@ -926,7 +776,5 @@ void c4iw_ev_dispatch(struct c4iw_dev *dev, struct t4_cqe *err_cqe);
926extern struct cxgb4_client t4c_client; 776extern struct cxgb4_client t4c_client;
927extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS]; 777extern c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS];
928extern int c4iw_max_read_depth; 778extern int c4iw_max_read_depth;
929extern int db_fc_threshold;
930
931 779
932#endif 780#endif
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index afd81790ab3..40c835309e4 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -131,14 +131,10 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
131 stag_idx = (*stag) >> 8; 131 stag_idx = (*stag) >> 8;
132 132
133 if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) { 133 if ((!reset_tpt_entry) && (*stag == T4_STAG_UNSET)) {
134 stag_idx = c4iw_get_resource(&rdev->resource.tpt_table); 134 stag_idx = c4iw_get_resource(&rdev->resource.tpt_fifo,
135 &rdev->resource.tpt_fifo_lock);
135 if (!stag_idx) 136 if (!stag_idx)
136 return -ENOMEM; 137 return -ENOMEM;
137 mutex_lock(&rdev->stats.lock);
138 rdev->stats.stag.cur += 32;
139 if (rdev->stats.stag.cur > rdev->stats.stag.max)
140 rdev->stats.stag.max = rdev->stats.stag.cur;
141 mutex_unlock(&rdev->stats.lock);
142 *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff); 138 *stag = (stag_idx << 8) | (atomic_inc_return(&key) & 0xff);
143 } 139 }
144 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n", 140 PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
@@ -169,12 +165,9 @@ static int write_tpt_entry(struct c4iw_rdev *rdev, u32 reset_tpt_entry,
169 (rdev->lldi.vr->stag.start >> 5), 165 (rdev->lldi.vr->stag.start >> 5),
170 sizeof(tpt), &tpt); 166 sizeof(tpt), &tpt);
171 167
172 if (reset_tpt_entry) { 168 if (reset_tpt_entry)
173 c4iw_put_resource(&rdev->resource.tpt_table, stag_idx); 169 c4iw_put_resource(&rdev->resource.tpt_fifo, stag_idx,
174 mutex_lock(&rdev->stats.lock); 170 &rdev->resource.tpt_fifo_lock);
175 rdev->stats.stag.cur -= 32;
176 mutex_unlock(&rdev->stats.lock);
177 }
178 return err; 171 return err;
179} 172}
180 173
@@ -468,7 +461,7 @@ struct ib_mr *c4iw_register_phys_mem(struct ib_pd *pd,
468 ret = alloc_pbl(mhp, npages); 461 ret = alloc_pbl(mhp, npages);
469 if (ret) { 462 if (ret) {
470 kfree(page_list); 463 kfree(page_list);
471 goto err; 464 goto err_pbl;
472 } 465 }
473 466
474 ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr, 467 ret = write_pbl(&mhp->rhp->rdev, page_list, mhp->attr.pbl_addr,
@@ -693,8 +686,8 @@ int c4iw_dealloc_mw(struct ib_mw *mw)
693 mhp = to_c4iw_mw(mw); 686 mhp = to_c4iw_mw(mw);
694 rhp = mhp->rhp; 687 rhp = mhp->rhp;
695 mmid = (mw->rkey) >> 8; 688 mmid = (mw->rkey) >> 8;
696 remove_handle(rhp, &rhp->mmidr, mmid);
697 deallocate_window(&rhp->rdev, mhp->attr.stag); 689 deallocate_window(&rhp->rdev, mhp->attr.stag);
690 remove_handle(rhp, &rhp->mmidr, mmid);
698 kfree(mhp); 691 kfree(mhp);
699 PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp); 692 PDBG("%s ib_mw %p mmid 0x%x ptr %p\n", __func__, mw, mmid, mhp);
700 return 0; 693 return 0;
@@ -796,12 +789,12 @@ int c4iw_dereg_mr(struct ib_mr *ib_mr)
796 mhp = to_c4iw_mr(ib_mr); 789 mhp = to_c4iw_mr(ib_mr);
797 rhp = mhp->rhp; 790 rhp = mhp->rhp;
798 mmid = mhp->attr.stag >> 8; 791 mmid = mhp->attr.stag >> 8;
799 remove_handle(rhp, &rhp->mmidr, mmid);
800 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size, 792 dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
801 mhp->attr.pbl_addr); 793 mhp->attr.pbl_addr);
802 if (mhp->attr.pbl_size) 794 if (mhp->attr.pbl_size)
803 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr, 795 c4iw_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
804 mhp->attr.pbl_size << 3); 796 mhp->attr.pbl_size << 3);
797 remove_handle(rhp, &rhp->mmidr, mmid);
805 if (mhp->kva) 798 if (mhp->kva)
806 kfree((void *) (unsigned long) mhp->kva); 799 kfree((void *) (unsigned long) mhp->kva);
807 if (mhp->umem) 800 if (mhp->umem)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index e084fdc6da7..247fe706e7f 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -188,10 +188,8 @@ static int c4iw_deallocate_pd(struct ib_pd *pd)
188 php = to_c4iw_pd(pd); 188 php = to_c4iw_pd(pd);
189 rhp = php->rhp; 189 rhp = php->rhp;
190 PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid); 190 PDBG("%s ibpd %p pdid 0x%x\n", __func__, pd, php->pdid);
191 c4iw_put_resource(&rhp->rdev.resource.pdid_table, php->pdid); 191 c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, php->pdid,
192 mutex_lock(&rhp->rdev.stats.lock); 192 &rhp->rdev.resource.pdid_fifo_lock);
193 rhp->rdev.stats.pd.cur--;
194 mutex_unlock(&rhp->rdev.stats.lock);
195 kfree(php); 193 kfree(php);
196 return 0; 194 return 0;
197} 195}
@@ -206,12 +204,14 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
206 204
207 PDBG("%s ibdev %p\n", __func__, ibdev); 205 PDBG("%s ibdev %p\n", __func__, ibdev);
208 rhp = (struct c4iw_dev *) ibdev; 206 rhp = (struct c4iw_dev *) ibdev;
209 pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_table); 207 pdid = c4iw_get_resource(&rhp->rdev.resource.pdid_fifo,
208 &rhp->rdev.resource.pdid_fifo_lock);
210 if (!pdid) 209 if (!pdid)
211 return ERR_PTR(-EINVAL); 210 return ERR_PTR(-EINVAL);
212 php = kzalloc(sizeof(*php), GFP_KERNEL); 211 php = kzalloc(sizeof(*php), GFP_KERNEL);
213 if (!php) { 212 if (!php) {
214 c4iw_put_resource(&rhp->rdev.resource.pdid_table, pdid); 213 c4iw_put_resource(&rhp->rdev.resource.pdid_fifo, pdid,
214 &rhp->rdev.resource.pdid_fifo_lock);
215 return ERR_PTR(-ENOMEM); 215 return ERR_PTR(-ENOMEM);
216 } 216 }
217 php->pdid = pdid; 217 php->pdid = pdid;
@@ -222,11 +222,6 @@ static struct ib_pd *c4iw_allocate_pd(struct ib_device *ibdev,
222 return ERR_PTR(-EFAULT); 222 return ERR_PTR(-EFAULT);
223 } 223 }
224 } 224 }
225 mutex_lock(&rhp->rdev.stats.lock);
226 rhp->rdev.stats.pd.cur++;
227 if (rhp->rdev.stats.pd.cur > rhp->rdev.stats.pd.max)
228 rhp->rdev.stats.pd.max = rhp->rdev.stats.pd.cur;
229 mutex_unlock(&rhp->rdev.stats.lock);
230 PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php); 225 PDBG("%s pdid 0x%0x ptr 0x%p\n", __func__, pdid, php);
231 return &php->ibpd; 226 return &php->ibpd;
232} 227}
@@ -334,7 +329,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
334 props->gid_tbl_len = 1; 329 props->gid_tbl_len = 1;
335 props->pkey_tbl_len = 1; 330 props->pkey_tbl_len = 1;
336 props->active_width = 2; 331 props->active_width = 2;
337 props->active_speed = IB_SPEED_DDR; 332 props->active_speed = 2;
338 props->max_msg_sz = -1; 333 props->max_msg_sz = -1;
339 334
340 return 0; 335 return 0;
@@ -443,7 +438,6 @@ int c4iw_register_device(struct c4iw_dev *dev)
443 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | 438 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
444 (1ull << IB_USER_VERBS_CMD_CREATE_QP) | 439 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
445 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) | 440 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
446 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
447 (1ull << IB_USER_VERBS_CMD_POLL_CQ) | 441 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
448 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) | 442 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
449 (1ull << IB_USER_VERBS_CMD_POST_SEND) | 443 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
@@ -466,7 +460,6 @@ int c4iw_register_device(struct c4iw_dev *dev)
466 dev->ibdev.destroy_ah = c4iw_ah_destroy; 460 dev->ibdev.destroy_ah = c4iw_ah_destroy;
467 dev->ibdev.create_qp = c4iw_create_qp; 461 dev->ibdev.create_qp = c4iw_create_qp;
468 dev->ibdev.modify_qp = c4iw_ib_modify_qp; 462 dev->ibdev.modify_qp = c4iw_ib_modify_qp;
469 dev->ibdev.query_qp = c4iw_ib_query_qp;
470 dev->ibdev.destroy_qp = c4iw_destroy_qp; 463 dev->ibdev.destroy_qp = c4iw_destroy_qp;
471 dev->ibdev.create_cq = c4iw_create_cq; 464 dev->ibdev.create_cq = c4iw_create_cq;
472 dev->ibdev.destroy_cq = c4iw_destroy_cq; 465 dev->ibdev.destroy_cq = c4iw_destroy_cq;
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 05bfe53bff6..a41578e48c7 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -29,24 +29,12 @@
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32
33#include <linux/module.h>
34
35#include "iw_cxgb4.h" 32#include "iw_cxgb4.h"
36 33
37static int db_delay_usecs = 1;
38module_param(db_delay_usecs, int, 0644);
39MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");
40
41static int ocqp_support = 1; 34static int ocqp_support = 1;
42module_param(ocqp_support, int, 0644); 35module_param(ocqp_support, int, 0644);
43MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)"); 36MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
44 37
45int db_fc_threshold = 2000;
46module_param(db_fc_threshold, int, 0644);
47MODULE_PARM_DESC(db_fc_threshold, "QP count/threshold that triggers automatic "
48 "db flow control mode (default = 2000)");
49
50static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state) 38static void set_state(struct c4iw_qp *qhp, enum c4iw_qp_state state)
51{ 39{
52 unsigned long flag; 40 unsigned long flag;
@@ -137,25 +125,19 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
137 return -ENOMEM; 125 return -ENOMEM;
138 126
139 wq->rq.qid = c4iw_get_qpid(rdev, uctx); 127 wq->rq.qid = c4iw_get_qpid(rdev, uctx);
140 if (!wq->rq.qid) { 128 if (!wq->rq.qid)
141 ret = -ENOMEM; 129 goto err1;
142 goto free_sq_qid;
143 }
144 130
145 if (!user) { 131 if (!user) {
146 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq, 132 wq->sq.sw_sq = kzalloc(wq->sq.size * sizeof *wq->sq.sw_sq,
147 GFP_KERNEL); 133 GFP_KERNEL);
148 if (!wq->sq.sw_sq) { 134 if (!wq->sq.sw_sq)
149 ret = -ENOMEM; 135 goto err2;
150 goto free_rq_qid;
151 }
152 136
153 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq, 137 wq->rq.sw_rq = kzalloc(wq->rq.size * sizeof *wq->rq.sw_rq,
154 GFP_KERNEL); 138 GFP_KERNEL);
155 if (!wq->rq.sw_rq) { 139 if (!wq->rq.sw_rq)
156 ret = -ENOMEM; 140 goto err3;
157 goto free_sw_sq;
158 }
159 } 141 }
160 142
161 /* 143 /*
@@ -163,23 +145,15 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
163 */ 145 */
164 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size); 146 wq->rq.rqt_size = roundup_pow_of_two(wq->rq.size);
165 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size); 147 wq->rq.rqt_hwaddr = c4iw_rqtpool_alloc(rdev, wq->rq.rqt_size);
166 if (!wq->rq.rqt_hwaddr) { 148 if (!wq->rq.rqt_hwaddr)
167 ret = -ENOMEM; 149 goto err4;
168 goto free_sw_rq;
169 }
170 150
171 if (user) { 151 if (user) {
172 ret = alloc_oc_sq(rdev, &wq->sq); 152 if (alloc_oc_sq(rdev, &wq->sq) && alloc_host_sq(rdev, &wq->sq))
173 if (ret) 153 goto err5;
174 goto free_hwaddr;
175
176 ret = alloc_host_sq(rdev, &wq->sq);
177 if (ret)
178 goto free_sq;
179 } else 154 } else
180 ret = alloc_host_sq(rdev, &wq->sq); 155 if (alloc_host_sq(rdev, &wq->sq))
181 if (ret) 156 goto err5;
182 goto free_hwaddr;
183 memset(wq->sq.queue, 0, wq->sq.memsize); 157 memset(wq->sq.queue, 0, wq->sq.memsize);
184 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr); 158 dma_unmap_addr_set(&wq->sq, mapping, wq->sq.dma_addr);
185 159
@@ -187,7 +161,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
187 wq->rq.memsize, &(wq->rq.dma_addr), 161 wq->rq.memsize, &(wq->rq.dma_addr),
188 GFP_KERNEL); 162 GFP_KERNEL);
189 if (!wq->rq.queue) 163 if (!wq->rq.queue)
190 goto free_sq; 164 goto err6;
191 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n", 165 PDBG("%s sq base va 0x%p pa 0x%llx rq base va 0x%p pa 0x%llx\n",
192 __func__, wq->sq.queue, 166 __func__, wq->sq.queue,
193 (unsigned long long)virt_to_phys(wq->sq.queue), 167 (unsigned long long)virt_to_phys(wq->sq.queue),
@@ -215,7 +189,7 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
215 skb = alloc_skb(wr_len, GFP_KERNEL); 189 skb = alloc_skb(wr_len, GFP_KERNEL);
216 if (!skb) { 190 if (!skb) {
217 ret = -ENOMEM; 191 ret = -ENOMEM;
218 goto free_dma; 192 goto err7;
219 } 193 }
220 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0); 194 set_wr_txq(skb, CPL_PRIORITY_CONTROL, 0);
221 195
@@ -280,33 +254,33 @@ static int create_qp(struct c4iw_rdev *rdev, struct t4_wq *wq,
280 254
281 ret = c4iw_ofld_send(rdev, skb); 255 ret = c4iw_ofld_send(rdev, skb);
282 if (ret) 256 if (ret)
283 goto free_dma; 257 goto err7;
284 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__); 258 ret = c4iw_wait_for_reply(rdev, &wr_wait, 0, wq->sq.qid, __func__);
285 if (ret) 259 if (ret)
286 goto free_dma; 260 goto err7;
287 261
288 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n", 262 PDBG("%s sqid 0x%x rqid 0x%x kdb 0x%p squdb 0x%llx rqudb 0x%llx\n",
289 __func__, wq->sq.qid, wq->rq.qid, wq->db, 263 __func__, wq->sq.qid, wq->rq.qid, wq->db,
290 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb); 264 (unsigned long long)wq->sq.udb, (unsigned long long)wq->rq.udb);
291 265
292 return 0; 266 return 0;
293free_dma: 267err7:
294 dma_free_coherent(&(rdev->lldi.pdev->dev), 268 dma_free_coherent(&(rdev->lldi.pdev->dev),
295 wq->rq.memsize, wq->rq.queue, 269 wq->rq.memsize, wq->rq.queue,
296 dma_unmap_addr(&wq->rq, mapping)); 270 dma_unmap_addr(&wq->rq, mapping));
297free_sq: 271err6:
298 dealloc_sq(rdev, &wq->sq); 272 dealloc_sq(rdev, &wq->sq);
299free_hwaddr: 273err5:
300 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size); 274 c4iw_rqtpool_free(rdev, wq->rq.rqt_hwaddr, wq->rq.rqt_size);
301free_sw_rq: 275err4:
302 kfree(wq->rq.sw_rq); 276 kfree(wq->rq.sw_rq);
303free_sw_sq: 277err3:
304 kfree(wq->sq.sw_sq); 278 kfree(wq->sq.sw_sq);
305free_rq_qid: 279err2:
306 c4iw_put_qpid(rdev, wq->rq.qid, uctx); 280 c4iw_put_qpid(rdev, wq->rq.qid, uctx);
307free_sq_qid: 281err1:
308 c4iw_put_qpid(rdev, wq->sq.qid, uctx); 282 c4iw_put_qpid(rdev, wq->sq.qid, uctx);
309 return ret; 283 return -ENOMEM;
310} 284}
311 285
312static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp, 286static int build_immd(struct t4_sq *sq, struct fw_ri_immd *immdp,
@@ -943,11 +917,7 @@ static void post_terminate(struct c4iw_qp *qhp, struct t4_cqe *err_cqe,
943 wqe->u.terminate.type = FW_RI_TYPE_TERMINATE; 917 wqe->u.terminate.type = FW_RI_TYPE_TERMINATE;
944 wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term); 918 wqe->u.terminate.immdlen = cpu_to_be32(sizeof *term);
945 term = (struct terminate_message *)wqe->u.terminate.termmsg; 919 term = (struct terminate_message *)wqe->u.terminate.termmsg;
946 if (qhp->attr.layer_etype == (LAYER_MPA|DDP_LLP)) { 920 build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
947 term->layer_etype = qhp->attr.layer_etype;
948 term->ecode = qhp->attr.ecode;
949 } else
950 build_term_codes(err_cqe, &term->layer_etype, &term->ecode);
951 c4iw_ofld_send(&qhp->rhp->rdev, skb); 921 c4iw_ofld_send(&qhp->rhp->rdev, skb);
952} 922}
953 923
@@ -971,11 +941,8 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
971 flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); 941 flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
972 spin_unlock(&qhp->lock); 942 spin_unlock(&qhp->lock);
973 spin_unlock_irqrestore(&rchp->lock, flag); 943 spin_unlock_irqrestore(&rchp->lock, flag);
974 if (flushed) { 944 if (flushed)
975 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
976 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); 945 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
977 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
978 }
979 946
980 /* locking hierarchy: cq lock first, then qp lock. */ 947 /* locking hierarchy: cq lock first, then qp lock. */
981 spin_lock_irqsave(&schp->lock, flag); 948 spin_lock_irqsave(&schp->lock, flag);
@@ -985,17 +952,13 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
985 flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count); 952 flushed = c4iw_flush_sq(&qhp->wq, &schp->cq, count);
986 spin_unlock(&qhp->lock); 953 spin_unlock(&qhp->lock);
987 spin_unlock_irqrestore(&schp->lock, flag); 954 spin_unlock_irqrestore(&schp->lock, flag);
988 if (flushed) { 955 if (flushed)
989 spin_lock_irqsave(&schp->comp_handler_lock, flag);
990 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); 956 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
991 spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
992 }
993} 957}
994 958
995static void flush_qp(struct c4iw_qp *qhp) 959static void flush_qp(struct c4iw_qp *qhp)
996{ 960{
997 struct c4iw_cq *rchp, *schp; 961 struct c4iw_cq *rchp, *schp;
998 unsigned long flag;
999 962
1000 rchp = get_chp(qhp->rhp, qhp->attr.rcq); 963 rchp = get_chp(qhp->rhp, qhp->attr.rcq);
1001 schp = get_chp(qhp->rhp, qhp->attr.scq); 964 schp = get_chp(qhp->rhp, qhp->attr.scq);
@@ -1003,16 +966,8 @@ static void flush_qp(struct c4iw_qp *qhp)
1003 if (qhp->ibqp.uobject) { 966 if (qhp->ibqp.uobject) {
1004 t4_set_wq_in_error(&qhp->wq); 967 t4_set_wq_in_error(&qhp->wq);
1005 t4_set_cq_in_error(&rchp->cq); 968 t4_set_cq_in_error(&rchp->cq);
1006 spin_lock_irqsave(&rchp->comp_handler_lock, flag); 969 if (schp != rchp)
1007 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
1008 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1009 if (schp != rchp) {
1010 t4_set_cq_in_error(&schp->cq); 970 t4_set_cq_in_error(&schp->cq);
1011 spin_lock_irqsave(&schp->comp_handler_lock, flag);
1012 (*schp->ibcq.comp_handler)(&schp->ibcq,
1013 schp->ibcq.cq_context);
1014 spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1015 }
1016 return; 971 return;
1017 } 972 }
1018 __flush_qp(qhp, rchp, schp); 973 __flush_qp(qhp, rchp, schp);
@@ -1057,7 +1012,6 @@ out:
1057 1012
1058static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init) 1013static void build_rtr_msg(u8 p2p_type, struct fw_ri_init *init)
1059{ 1014{
1060 PDBG("%s p2p_type = %d\n", __func__, p2p_type);
1061 memset(&init->u, 0, sizeof init->u); 1015 memset(&init->u, 0, sizeof init->u);
1062 switch (p2p_type) { 1016 switch (p2p_type) {
1063 case FW_RI_INIT_P2PTYPE_RDMA_WRITE: 1017 case FW_RI_INIT_P2PTYPE_RDMA_WRITE:
@@ -1151,35 +1105,6 @@ out:
1151 return ret; 1105 return ret;
1152} 1106}
1153 1107
1154/*
1155 * Called by the library when the qp has user dbs disabled due to
1156 * a DB_FULL condition. This function will single-thread all user
1157 * DB rings to avoid overflowing the hw db-fifo.
1158 */
1159static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
1160{
1161 int delay = db_delay_usecs;
1162
1163 mutex_lock(&qhp->rhp->db_mutex);
1164 do {
1165
1166 /*
1167 * The interrupt threshold is dbfifo_int_thresh << 6. So
1168 * make sure we don't cross that and generate an interrupt.
1169 */
1170 if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) <
1171 (qhp->rhp->rdev.lldi.dbfifo_int_thresh << 5)) {
1172 writel(QID(qid) | PIDX(inc), qhp->wq.db);
1173 break;
1174 }
1175 set_current_state(TASK_UNINTERRUPTIBLE);
1176 schedule_timeout(usecs_to_jiffies(delay));
1177 delay = min(delay << 1, 2000);
1178 } while (1);
1179 mutex_unlock(&qhp->rhp->db_mutex);
1180 return 0;
1181}
1182
1183int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp, 1108int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1184 enum c4iw_qp_attr_mask mask, 1109 enum c4iw_qp_attr_mask mask,
1185 struct c4iw_qp_attributes *attrs, 1110 struct c4iw_qp_attributes *attrs,
@@ -1228,15 +1153,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1228 qhp->attr = newattr; 1153 qhp->attr = newattr;
1229 } 1154 }
1230 1155
1231 if (mask & C4IW_QP_ATTR_SQ_DB) {
1232 ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
1233 goto out;
1234 }
1235 if (mask & C4IW_QP_ATTR_RQ_DB) {
1236 ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
1237 goto out;
1238 }
1239
1240 if (!(mask & C4IW_QP_ATTR_NEXT_STATE)) 1156 if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
1241 goto out; 1157 goto out;
1242 if (qhp->attr.state == attrs->next_state) 1158 if (qhp->attr.state == attrs->next_state)
@@ -1290,16 +1206,12 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1290 disconnect = 1; 1206 disconnect = 1;
1291 c4iw_get_ep(&qhp->ep->com); 1207 c4iw_get_ep(&qhp->ep->com);
1292 } 1208 }
1293 if (qhp->ibqp.uobject)
1294 t4_set_wq_in_error(&qhp->wq);
1295 ret = rdma_fini(rhp, qhp, ep); 1209 ret = rdma_fini(rhp, qhp, ep);
1296 if (ret) 1210 if (ret)
1297 goto err; 1211 goto err;
1298 break; 1212 break;
1299 case C4IW_QP_STATE_TERMINATE: 1213 case C4IW_QP_STATE_TERMINATE:
1300 set_state(qhp, C4IW_QP_STATE_TERMINATE); 1214 set_state(qhp, C4IW_QP_STATE_TERMINATE);
1301 qhp->attr.layer_etype = attrs->layer_etype;
1302 qhp->attr.ecode = attrs->ecode;
1303 if (qhp->ibqp.uobject) 1215 if (qhp->ibqp.uobject)
1304 t4_set_wq_in_error(&qhp->wq); 1216 t4_set_wq_in_error(&qhp->wq);
1305 ep = qhp->ep; 1217 ep = qhp->ep;
@@ -1310,8 +1222,6 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
1310 break; 1222 break;
1311 case C4IW_QP_STATE_ERROR: 1223 case C4IW_QP_STATE_ERROR:
1312 set_state(qhp, C4IW_QP_STATE_ERROR); 1224 set_state(qhp, C4IW_QP_STATE_ERROR);
1313 if (qhp->ibqp.uobject)
1314 t4_set_wq_in_error(&qhp->wq);
1315 if (!internal) { 1225 if (!internal) {
1316 abort = 1; 1226 abort = 1;
1317 disconnect = 1; 1227 disconnect = 1;
@@ -1413,14 +1323,6 @@ out:
1413 return ret; 1323 return ret;
1414} 1324}
1415 1325
1416static int enable_qp_db(int id, void *p, void *data)
1417{
1418 struct c4iw_qp *qp = p;
1419
1420 t4_enable_wq_db(&qp->wq);
1421 return 0;
1422}
1423
1424int c4iw_destroy_qp(struct ib_qp *ib_qp) 1326int c4iw_destroy_qp(struct ib_qp *ib_qp)
1425{ 1327{
1426 struct c4iw_dev *rhp; 1328 struct c4iw_dev *rhp;
@@ -1432,22 +1334,10 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1432 rhp = qhp->rhp; 1334 rhp = qhp->rhp;
1433 1335
1434 attrs.next_state = C4IW_QP_STATE_ERROR; 1336 attrs.next_state = C4IW_QP_STATE_ERROR;
1435 if (qhp->attr.state == C4IW_QP_STATE_TERMINATE) 1337 c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1436 c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1437 else
1438 c4iw_modify_qp(rhp, qhp, C4IW_QP_ATTR_NEXT_STATE, &attrs, 0);
1439 wait_event(qhp->wait, !qhp->ep); 1338 wait_event(qhp->wait, !qhp->ep);
1440 1339
1441 spin_lock_irq(&rhp->lock); 1340 remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
1442 remove_handle_nolock(rhp, &rhp->qpidr, qhp->wq.sq.qid);
1443 rhp->qpcnt--;
1444 BUG_ON(rhp->qpcnt < 0);
1445 if (rhp->qpcnt <= db_fc_threshold && rhp->db_state == FLOW_CONTROL) {
1446 rhp->rdev.stats.db_state_transitions++;
1447 rhp->db_state = NORMAL;
1448 idr_for_each(&rhp->qpidr, enable_qp_db, NULL);
1449 }
1450 spin_unlock_irq(&rhp->lock);
1451 atomic_dec(&qhp->refcnt); 1341 atomic_dec(&qhp->refcnt);
1452 wait_event(qhp->wait, !atomic_read(&qhp->refcnt)); 1342 wait_event(qhp->wait, !atomic_read(&qhp->refcnt));
1453 1343
@@ -1461,14 +1351,6 @@ int c4iw_destroy_qp(struct ib_qp *ib_qp)
1461 return 0; 1351 return 0;
1462} 1352}
1463 1353
1464static int disable_qp_db(int id, void *p, void *data)
1465{
1466 struct c4iw_qp *qp = p;
1467
1468 t4_disable_wq_db(&qp->wq);
1469 return 0;
1470}
1471
1472struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs, 1354struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1473 struct ib_udata *udata) 1355 struct ib_udata *udata)
1474{ 1356{
@@ -1555,16 +1437,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
1555 init_waitqueue_head(&qhp->wait); 1437 init_waitqueue_head(&qhp->wait);
1556 atomic_set(&qhp->refcnt, 1); 1438 atomic_set(&qhp->refcnt, 1);
1557 1439
1558 spin_lock_irq(&rhp->lock); 1440 ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
1559 if (rhp->db_state != NORMAL)
1560 t4_disable_wq_db(&qhp->wq);
1561 if (++rhp->qpcnt > db_fc_threshold && rhp->db_state == NORMAL) {
1562 rhp->rdev.stats.db_state_transitions++;
1563 rhp->db_state = FLOW_CONTROL;
1564 idr_for_each(&rhp->qpidr, disable_qp_db, NULL);
1565 }
1566 ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
1567 spin_unlock_irq(&rhp->lock);
1568 if (ret) 1441 if (ret)
1569 goto err2; 1442 goto err2;
1570 1443
@@ -1708,15 +1581,6 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1708 C4IW_QP_ATTR_ENABLE_RDMA_WRITE | 1581 C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
1709 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0; 1582 C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;
1710 1583
1711 /*
1712 * Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
1713 * ringing the queue db when we're in DB_FULL mode.
1714 */
1715 attrs.sq_db_inc = attr->sq_psn;
1716 attrs.rq_db_inc = attr->rq_psn;
1717 mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
1718 mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;
1719
1720 return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0); 1584 return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
1721} 1585}
1722 1586
@@ -1725,14 +1589,3 @@ struct ib_qp *c4iw_get_qp(struct ib_device *dev, int qpn)
1725 PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn); 1589 PDBG("%s ib_dev %p qpn 0x%x\n", __func__, dev, qpn);
1726 return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn); 1590 return (struct ib_qp *)get_qhp(to_c4iw_dev(dev), qpn);
1727} 1591}
1728
1729int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1730 int attr_mask, struct ib_qp_init_attr *init_attr)
1731{
1732 struct c4iw_qp *qhp = to_c4iw_qp(ibqp);
1733
1734 memset(attr, 0, sizeof *attr);
1735 memset(init_attr, 0, sizeof *init_attr);
1736 attr->qp_state = to_ib_qp_state(qhp->attr.state);
1737 return 0;
1738}
diff --git a/drivers/infiniband/hw/cxgb4/resource.c b/drivers/infiniband/hw/cxgb4/resource.c
index cdef4d7fb6d..407ff392415 100644
--- a/drivers/infiniband/hw/cxgb4/resource.c
+++ b/drivers/infiniband/hw/cxgb4/resource.c
@@ -30,25 +30,96 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32/* Crude resource management */ 32/* Crude resource management */
33#include <linux/kernel.h>
34#include <linux/random.h>
35#include <linux/slab.h>
36#include <linux/kfifo.h>
33#include <linux/spinlock.h> 37#include <linux/spinlock.h>
38#include <linux/errno.h>
34#include <linux/genalloc.h> 39#include <linux/genalloc.h>
35#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
36#include "iw_cxgb4.h" 41#include "iw_cxgb4.h"
37 42
38static int c4iw_init_qid_table(struct c4iw_rdev *rdev) 43#define RANDOM_SIZE 16
44
45static int __c4iw_init_resource_fifo(struct kfifo *fifo,
46 spinlock_t *fifo_lock,
47 u32 nr, u32 skip_low,
48 u32 skip_high,
49 int random)
50{
51 u32 i, j, entry = 0, idx;
52 u32 random_bytes;
53 u32 rarray[16];
54 spin_lock_init(fifo_lock);
55
56 if (kfifo_alloc(fifo, nr * sizeof(u32), GFP_KERNEL))
57 return -ENOMEM;
58
59 for (i = 0; i < skip_low + skip_high; i++)
60 kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
61 if (random) {
62 j = 0;
63 random_bytes = random32();
64 for (i = 0; i < RANDOM_SIZE; i++)
65 rarray[i] = i + skip_low;
66 for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
67 if (j >= RANDOM_SIZE) {
68 j = 0;
69 random_bytes = random32();
70 }
71 idx = (random_bytes >> (j * 2)) & 0xF;
72 kfifo_in(fifo,
73 (unsigned char *) &rarray[idx],
74 sizeof(u32));
75 rarray[idx] = i;
76 j++;
77 }
78 for (i = 0; i < RANDOM_SIZE; i++)
79 kfifo_in(fifo,
80 (unsigned char *) &rarray[i],
81 sizeof(u32));
82 } else
83 for (i = skip_low; i < nr - skip_high; i++)
84 kfifo_in(fifo, (unsigned char *) &i, sizeof(u32));
85
86 for (i = 0; i < skip_low + skip_high; i++)
87 if (kfifo_out_locked(fifo, (unsigned char *) &entry,
88 sizeof(u32), fifo_lock))
89 break;
90 return 0;
91}
92
93static int c4iw_init_resource_fifo(struct kfifo *fifo, spinlock_t * fifo_lock,
94 u32 nr, u32 skip_low, u32 skip_high)
95{
96 return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
97 skip_high, 0);
98}
99
100static int c4iw_init_resource_fifo_random(struct kfifo *fifo,
101 spinlock_t *fifo_lock,
102 u32 nr, u32 skip_low, u32 skip_high)
103{
104 return __c4iw_init_resource_fifo(fifo, fifo_lock, nr, skip_low,
105 skip_high, 1);
106}
107
108static int c4iw_init_qid_fifo(struct c4iw_rdev *rdev)
39{ 109{
40 u32 i; 110 u32 i;
41 111
42 if (c4iw_id_table_alloc(&rdev->resource.qid_table, 112 spin_lock_init(&rdev->resource.qid_fifo_lock);
43 rdev->lldi.vr->qp.start, 113
44 rdev->lldi.vr->qp.size, 114 if (kfifo_alloc(&rdev->resource.qid_fifo, rdev->lldi.vr->qp.size *
45 rdev->lldi.vr->qp.size, 0)) 115 sizeof(u32), GFP_KERNEL))
46 return -ENOMEM; 116 return -ENOMEM;
47 117
48 for (i = rdev->lldi.vr->qp.start; 118 for (i = rdev->lldi.vr->qp.start;
49 i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++) 119 i < rdev->lldi.vr->qp.start + rdev->lldi.vr->qp.size; i++)
50 if (!(i & rdev->qpmask)) 120 if (!(i & rdev->qpmask))
51 c4iw_id_free(&rdev->resource.qid_table, i); 121 kfifo_in(&rdev->resource.qid_fifo,
122 (unsigned char *) &i, sizeof(u32));
52 return 0; 123 return 0;
53} 124}
54 125
@@ -56,42 +127,44 @@ static int c4iw_init_qid_table(struct c4iw_rdev *rdev)
56int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid) 127int c4iw_init_resource(struct c4iw_rdev *rdev, u32 nr_tpt, u32 nr_pdid)
57{ 128{
58 int err = 0; 129 int err = 0;
59 err = c4iw_id_table_alloc(&rdev->resource.tpt_table, 0, nr_tpt, 1, 130 err = c4iw_init_resource_fifo_random(&rdev->resource.tpt_fifo,
60 C4IW_ID_TABLE_F_RANDOM); 131 &rdev->resource.tpt_fifo_lock,
132 nr_tpt, 1, 0);
61 if (err) 133 if (err)
62 goto tpt_err; 134 goto tpt_err;
63 err = c4iw_init_qid_table(rdev); 135 err = c4iw_init_qid_fifo(rdev);
64 if (err) 136 if (err)
65 goto qid_err; 137 goto qid_err;
66 err = c4iw_id_table_alloc(&rdev->resource.pdid_table, 0, 138 err = c4iw_init_resource_fifo(&rdev->resource.pdid_fifo,
67 nr_pdid, 1, 0); 139 &rdev->resource.pdid_fifo_lock,
140 nr_pdid, 1, 0);
68 if (err) 141 if (err)
69 goto pdid_err; 142 goto pdid_err;
70 return 0; 143 return 0;
71 pdid_err: 144pdid_err:
72 c4iw_id_table_free(&rdev->resource.qid_table); 145 kfifo_free(&rdev->resource.qid_fifo);
73 qid_err: 146qid_err:
74 c4iw_id_table_free(&rdev->resource.tpt_table); 147 kfifo_free(&rdev->resource.tpt_fifo);
75 tpt_err: 148tpt_err:
76 return -ENOMEM; 149 return -ENOMEM;
77} 150}
78 151
79/* 152/*
80 * returns 0 if no resource available 153 * returns 0 if no resource available
81 */ 154 */
82u32 c4iw_get_resource(struct c4iw_id_table *id_table) 155u32 c4iw_get_resource(struct kfifo *fifo, spinlock_t *lock)
83{ 156{
84 u32 entry; 157 u32 entry;
85 entry = c4iw_id_alloc(id_table); 158 if (kfifo_out_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock))
86 if (entry == (u32)(-1)) 159 return entry;
160 else
87 return 0; 161 return 0;
88 return entry;
89} 162}
90 163
91void c4iw_put_resource(struct c4iw_id_table *id_table, u32 entry) 164void c4iw_put_resource(struct kfifo *fifo, u32 entry, spinlock_t *lock)
92{ 165{
93 PDBG("%s entry 0x%x\n", __func__, entry); 166 PDBG("%s entry 0x%x\n", __func__, entry);
94 c4iw_id_free(id_table, entry); 167 kfifo_in_locked(fifo, (unsigned char *) &entry, sizeof(u32), lock);
95} 168}
96 169
97u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx) 170u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
@@ -108,12 +181,10 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
108 qid = entry->qid; 181 qid = entry->qid;
109 kfree(entry); 182 kfree(entry);
110 } else { 183 } else {
111 qid = c4iw_get_resource(&rdev->resource.qid_table); 184 qid = c4iw_get_resource(&rdev->resource.qid_fifo,
185 &rdev->resource.qid_fifo_lock);
112 if (!qid) 186 if (!qid)
113 goto out; 187 goto out;
114 mutex_lock(&rdev->stats.lock);
115 rdev->stats.qid.cur += rdev->qpmask + 1;
116 mutex_unlock(&rdev->stats.lock);
117 for (i = qid+1; i & rdev->qpmask; i++) { 188 for (i = qid+1; i & rdev->qpmask; i++) {
118 entry = kmalloc(sizeof *entry, GFP_KERNEL); 189 entry = kmalloc(sizeof *entry, GFP_KERNEL);
119 if (!entry) 190 if (!entry)
@@ -142,10 +213,6 @@ u32 c4iw_get_cqid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
142out: 213out:
143 mutex_unlock(&uctx->lock); 214 mutex_unlock(&uctx->lock);
144 PDBG("%s qid 0x%x\n", __func__, qid); 215 PDBG("%s qid 0x%x\n", __func__, qid);
145 mutex_lock(&rdev->stats.lock);
146 if (rdev->stats.qid.cur > rdev->stats.qid.max)
147 rdev->stats.qid.max = rdev->stats.qid.cur;
148 mutex_unlock(&rdev->stats.lock);
149 return qid; 216 return qid;
150} 217}
151 218
@@ -178,12 +245,10 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
178 qid = entry->qid; 245 qid = entry->qid;
179 kfree(entry); 246 kfree(entry);
180 } else { 247 } else {
181 qid = c4iw_get_resource(&rdev->resource.qid_table); 248 qid = c4iw_get_resource(&rdev->resource.qid_fifo,
249 &rdev->resource.qid_fifo_lock);
182 if (!qid) 250 if (!qid)
183 goto out; 251 goto out;
184 mutex_lock(&rdev->stats.lock);
185 rdev->stats.qid.cur += rdev->qpmask + 1;
186 mutex_unlock(&rdev->stats.lock);
187 for (i = qid+1; i & rdev->qpmask; i++) { 252 for (i = qid+1; i & rdev->qpmask; i++) {
188 entry = kmalloc(sizeof *entry, GFP_KERNEL); 253 entry = kmalloc(sizeof *entry, GFP_KERNEL);
189 if (!entry) 254 if (!entry)
@@ -212,10 +277,6 @@ u32 c4iw_get_qpid(struct c4iw_rdev *rdev, struct c4iw_dev_ucontext *uctx)
212out: 277out:
213 mutex_unlock(&uctx->lock); 278 mutex_unlock(&uctx->lock);
214 PDBG("%s qid 0x%x\n", __func__, qid); 279 PDBG("%s qid 0x%x\n", __func__, qid);
215 mutex_lock(&rdev->stats.lock);
216 if (rdev->stats.qid.cur > rdev->stats.qid.max)
217 rdev->stats.qid.max = rdev->stats.qid.cur;
218 mutex_unlock(&rdev->stats.lock);
219 return qid; 280 return qid;
220} 281}
221 282
@@ -236,9 +297,9 @@ void c4iw_put_qpid(struct c4iw_rdev *rdev, u32 qid,
236 297
237void c4iw_destroy_resource(struct c4iw_resource *rscp) 298void c4iw_destroy_resource(struct c4iw_resource *rscp)
238{ 299{
239 c4iw_id_table_free(&rscp->tpt_table); 300 kfifo_free(&rscp->tpt_fifo);
240 c4iw_id_table_free(&rscp->qid_table); 301 kfifo_free(&rscp->qid_fifo);
241 c4iw_id_table_free(&rscp->pdid_table); 302 kfifo_free(&rscp->pdid_fifo);
242} 303}
243 304
244/* 305/*
@@ -251,23 +312,15 @@ u32 c4iw_pblpool_alloc(struct c4iw_rdev *rdev, int size)
251{ 312{
252 unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size); 313 unsigned long addr = gen_pool_alloc(rdev->pbl_pool, size);
253 PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); 314 PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
254 mutex_lock(&rdev->stats.lock); 315 if (!addr)
255 if (addr) { 316 printk_ratelimited(KERN_WARNING MOD "%s: Out of PBL memory\n",
256 rdev->stats.pbl.cur += roundup(size, 1 << MIN_PBL_SHIFT); 317 pci_name(rdev->lldi.pdev));
257 if (rdev->stats.pbl.cur > rdev->stats.pbl.max)
258 rdev->stats.pbl.max = rdev->stats.pbl.cur;
259 } else
260 rdev->stats.pbl.fail++;
261 mutex_unlock(&rdev->stats.lock);
262 return (u32)addr; 318 return (u32)addr;
263} 319}
264 320
265void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size) 321void c4iw_pblpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
266{ 322{
267 PDBG("%s addr 0x%x size %d\n", __func__, addr, size); 323 PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
268 mutex_lock(&rdev->stats.lock);
269 rdev->stats.pbl.cur -= roundup(size, 1 << MIN_PBL_SHIFT);
270 mutex_unlock(&rdev->stats.lock);
271 gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size); 324 gen_pool_free(rdev->pbl_pool, (unsigned long)addr, size);
272} 325}
273 326
@@ -324,23 +377,12 @@ u32 c4iw_rqtpool_alloc(struct c4iw_rdev *rdev, int size)
324 if (!addr) 377 if (!addr)
325 printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n", 378 printk_ratelimited(KERN_WARNING MOD "%s: Out of RQT memory\n",
326 pci_name(rdev->lldi.pdev)); 379 pci_name(rdev->lldi.pdev));
327 mutex_lock(&rdev->stats.lock);
328 if (addr) {
329 rdev->stats.rqt.cur += roundup(size << 6, 1 << MIN_RQT_SHIFT);
330 if (rdev->stats.rqt.cur > rdev->stats.rqt.max)
331 rdev->stats.rqt.max = rdev->stats.rqt.cur;
332 } else
333 rdev->stats.rqt.fail++;
334 mutex_unlock(&rdev->stats.lock);
335 return (u32)addr; 380 return (u32)addr;
336} 381}
337 382
338void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size) 383void c4iw_rqtpool_free(struct c4iw_rdev *rdev, u32 addr, int size)
339{ 384{
340 PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6); 385 PDBG("%s addr 0x%x size %d\n", __func__, addr, size << 6);
341 mutex_lock(&rdev->stats.lock);
342 rdev->stats.rqt.cur -= roundup(size << 6, 1 << MIN_RQT_SHIFT);
343 mutex_unlock(&rdev->stats.lock);
344 gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6); 386 gen_pool_free(rdev->rqt_pool, (unsigned long)addr, size << 6);
345} 387}
346 388
@@ -391,22 +433,12 @@ u32 c4iw_ocqp_pool_alloc(struct c4iw_rdev *rdev, int size)
391{ 433{
392 unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size); 434 unsigned long addr = gen_pool_alloc(rdev->ocqp_pool, size);
393 PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size); 435 PDBG("%s addr 0x%x size %d\n", __func__, (u32)addr, size);
394 if (addr) {
395 mutex_lock(&rdev->stats.lock);
396 rdev->stats.ocqp.cur += roundup(size, 1 << MIN_OCQP_SHIFT);
397 if (rdev->stats.ocqp.cur > rdev->stats.ocqp.max)
398 rdev->stats.ocqp.max = rdev->stats.ocqp.cur;
399 mutex_unlock(&rdev->stats.lock);
400 }
401 return (u32)addr; 436 return (u32)addr;
402} 437}
403 438
404void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size) 439void c4iw_ocqp_pool_free(struct c4iw_rdev *rdev, u32 addr, int size)
405{ 440{
406 PDBG("%s addr 0x%x size %d\n", __func__, addr, size); 441 PDBG("%s addr 0x%x size %d\n", __func__, addr, size);
407 mutex_lock(&rdev->stats.lock);
408 rdev->stats.ocqp.cur -= roundup(size, 1 << MIN_OCQP_SHIFT);
409 mutex_unlock(&rdev->stats.lock);
410 gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size); 442 gen_pool_free(rdev->ocqp_pool, (unsigned long)addr, size);
411} 443}
412 444
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 16f26ab2930..c0221eec881 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -62,10 +62,6 @@ struct t4_status_page {
62 __be16 pidx; 62 __be16 pidx;
63 u8 qp_err; /* flit 1 - sw owns */ 63 u8 qp_err; /* flit 1 - sw owns */
64 u8 db_off; 64 u8 db_off;
65 u8 pad;
66 u16 host_wq_pidx;
67 u16 host_cidx;
68 u16 host_pidx;
69}; 65};
70 66
71#define T4_EQ_ENTRY_SIZE 64 67#define T4_EQ_ENTRY_SIZE 64
@@ -379,16 +375,6 @@ static inline void t4_rq_consume(struct t4_wq *wq)
379 wq->rq.cidx = 0; 375 wq->rq.cidx = 0;
380} 376}
381 377
382static inline u16 t4_rq_host_wq_pidx(struct t4_wq *wq)
383{
384 return wq->rq.queue[wq->rq.size].status.host_wq_pidx;
385}
386
387static inline u16 t4_rq_wq_size(struct t4_wq *wq)
388{
389 return wq->rq.size * T4_RQ_NUM_SLOTS;
390}
391
392static inline int t4_sq_onchip(struct t4_sq *sq) 378static inline int t4_sq_onchip(struct t4_sq *sq)
393{ 379{
394 return sq->flags & T4_SQ_ONCHIP; 380 return sq->flags & T4_SQ_ONCHIP;
@@ -426,16 +412,6 @@ static inline void t4_sq_consume(struct t4_wq *wq)
426 wq->sq.cidx = 0; 412 wq->sq.cidx = 0;
427} 413}
428 414
429static inline u16 t4_sq_host_wq_pidx(struct t4_wq *wq)
430{
431 return wq->sq.queue[wq->sq.size].status.host_wq_pidx;
432}
433
434static inline u16 t4_sq_wq_size(struct t4_wq *wq)
435{
436 return wq->sq.size * T4_SQ_NUM_SLOTS;
437}
438
439static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc) 415static inline void t4_ring_sq_db(struct t4_wq *wq, u16 inc)
440{ 416{
441 wmb(); 417 wmb();
diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/cxgb4/user.h
index 32b754c35ab..e6669d54770 100644
--- a/drivers/infiniband/hw/cxgb4/user.h
+++ b/drivers/infiniband/hw/cxgb4/user.h
@@ -32,7 +32,7 @@
32#ifndef __C4IW_USER_H__ 32#ifndef __C4IW_USER_H__
33#define __C4IW_USER_H__ 33#define __C4IW_USER_H__
34 34
35#define C4IW_UVERBS_ABI_VERSION 2 35#define C4IW_UVERBS_ABI_VERSION 1
36 36
37/* 37/*
38 * Make sure that all structs defined in this file remain laid out so 38 * Make sure that all structs defined in this file remain laid out so
diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
index f08f6eaf3fa..aaf6023a483 100644
--- a/drivers/infiniband/hw/ehca/ehca_classes.h
+++ b/drivers/infiniband/hw/ehca/ehca_classes.h
@@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock;
379 379
380extern int ehca_static_rate; 380extern int ehca_static_rate;
381extern int ehca_port_act_time; 381extern int ehca_port_act_time;
382extern bool ehca_use_hp_mr; 382extern int ehca_use_hp_mr;
383extern bool ehca_scaling_code; 383extern int ehca_scaling_code;
384extern int ehca_lock_hcalls; 384extern int ehca_lock_hcalls;
385extern int ehca_nr_ports; 385extern int ehca_nr_ports;
386extern int ehca_max_cq; 386extern int ehca_max_cq;
diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
index 8f5290147e8..d9b0ebcb67d 100644
--- a/drivers/infiniband/hw/ehca/ehca_cq.c
+++ b/drivers/infiniband/hw/ehca/ehca_cq.c
@@ -220,7 +220,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
220 cq = ERR_PTR(-EAGAIN); 220 cq = ERR_PTR(-EAGAIN);
221 goto create_cq_exit4; 221 goto create_cq_exit4;
222 } 222 }
223 rpage = __pa(vpage); 223 rpage = virt_to_abs(vpage);
224 224
225 h_ret = hipz_h_register_rpage_cq(adapter_handle, 225 h_ret = hipz_h_register_rpage_cq(adapter_handle,
226 my_cq->ipz_cq_handle, 226 my_cq->ipz_cq_handle,
diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
index 90da6747d39..d9b1bb40f48 100644
--- a/drivers/infiniband/hw/ehca/ehca_eq.c
+++ b/drivers/infiniband/hw/ehca/ehca_eq.c
@@ -101,7 +101,7 @@ int ehca_create_eq(struct ehca_shca *shca,
101 if (!vpage) 101 if (!vpage)
102 goto create_eq_exit2; 102 goto create_eq_exit2;
103 103
104 rpage = __pa(vpage); 104 rpage = virt_to_abs(vpage);
105 h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle, 105 h_ret = hipz_h_register_rpage_eq(shca->ipz_hca_handle,
106 eq->ipz_eq_handle, 106 eq->ipz_eq_handle,
107 &eq->pf, 107 &eq->pf,
@@ -125,7 +125,7 @@ int ehca_create_eq(struct ehca_shca *shca,
125 tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca); 125 tasklet_init(&eq->interrupt_task, ehca_tasklet_eq, (long)shca);
126 126
127 ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq, 127 ret = ibmebus_request_irq(eq->ist, ehca_interrupt_eq,
128 0, "ehca_eq", 128 IRQF_DISABLED, "ehca_eq",
129 (void *)shca); 129 (void *)shca);
130 if (ret < 0) 130 if (ret < 0)
131 ehca_err(ib_dev, "Can't map interrupt handler."); 131 ehca_err(ib_dev, "Can't map interrupt handler.");
@@ -133,7 +133,7 @@ int ehca_create_eq(struct ehca_shca *shca,
133 tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca); 133 tasklet_init(&eq->interrupt_task, ehca_tasklet_neq, (long)shca);
134 134
135 ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq, 135 ret = ibmebus_request_irq(eq->ist, ehca_interrupt_neq,
136 0, "ehca_neq", 136 IRQF_DISABLED, "ehca_neq",
137 (void *)shca); 137 (void *)shca);
138 if (ret < 0) 138 if (ret < 0)
139 ehca_err(ib_dev, "Can't map interrupt handler."); 139 ehca_err(ib_dev, "Can't map interrupt handler.");
diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
index 9ed4d258830..73edc366866 100644
--- a/drivers/infiniband/hw/ehca/ehca_hca.c
+++ b/drivers/infiniband/hw/ehca/ehca_hca.c
@@ -233,7 +233,7 @@ int ehca_query_port(struct ib_device *ibdev,
233 props->phys_state = 5; 233 props->phys_state = 5;
234 props->state = rblock->state; 234 props->state = rblock->state;
235 props->active_width = IB_WIDTH_12X; 235 props->active_width = IB_WIDTH_12X;
236 props->active_speed = IB_SPEED_SDR; 236 props->active_speed = 0x1;
237 } 237 }
238 238
239query_port1: 239query_port1:
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index 8615d7cf7e0..e571e60ecb8 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -42,7 +42,6 @@
42 */ 42 */
43 43
44#include <linux/slab.h> 44#include <linux/slab.h>
45#include <linux/smpboot.h>
46 45
47#include "ehca_classes.h" 46#include "ehca_classes.h"
48#include "ehca_irq.h" 47#include "ehca_irq.h"
@@ -653,7 +652,7 @@ void ehca_tasklet_eq(unsigned long data)
653 ehca_process_eq((struct ehca_shca*)data, 1); 652 ehca_process_eq((struct ehca_shca*)data, 1);
654} 653}
655 654
656static int find_next_online_cpu(struct ehca_comp_pool *pool) 655static inline int find_next_online_cpu(struct ehca_comp_pool *pool)
657{ 656{
658 int cpu; 657 int cpu;
659 unsigned long flags; 658 unsigned long flags;
@@ -663,20 +662,17 @@ static int find_next_online_cpu(struct ehca_comp_pool *pool)
663 ehca_dmp(cpu_online_mask, cpumask_size(), ""); 662 ehca_dmp(cpu_online_mask, cpumask_size(), "");
664 663
665 spin_lock_irqsave(&pool->last_cpu_lock, flags); 664 spin_lock_irqsave(&pool->last_cpu_lock, flags);
666 do { 665 cpu = cpumask_next(pool->last_cpu, cpu_online_mask);
667 cpu = cpumask_next(pool->last_cpu, cpu_online_mask); 666 if (cpu >= nr_cpu_ids)
668 if (cpu >= nr_cpu_ids) 667 cpu = cpumask_first(cpu_online_mask);
669 cpu = cpumask_first(cpu_online_mask); 668 pool->last_cpu = cpu;
670 pool->last_cpu = cpu;
671 } while (!per_cpu_ptr(pool->cpu_comp_tasks, cpu)->active);
672 spin_unlock_irqrestore(&pool->last_cpu_lock, flags); 669 spin_unlock_irqrestore(&pool->last_cpu_lock, flags);
673 670
674 return cpu; 671 return cpu;
675} 672}
676 673
677static void __queue_comp_task(struct ehca_cq *__cq, 674static void __queue_comp_task(struct ehca_cq *__cq,
678 struct ehca_cpu_comp_task *cct, 675 struct ehca_cpu_comp_task *cct)
679 struct task_struct *thread)
680{ 676{
681 unsigned long flags; 677 unsigned long flags;
682 678
@@ -687,7 +683,7 @@ static void __queue_comp_task(struct ehca_cq *__cq,
687 __cq->nr_callbacks++; 683 __cq->nr_callbacks++;
688 list_add_tail(&__cq->entry, &cct->cq_list); 684 list_add_tail(&__cq->entry, &cct->cq_list);
689 cct->cq_jobs++; 685 cct->cq_jobs++;
690 wake_up_process(thread); 686 wake_up(&cct->wait_queue);
691 } else 687 } else
692 __cq->nr_callbacks++; 688 __cq->nr_callbacks++;
693 689
@@ -699,7 +695,6 @@ static void queue_comp_task(struct ehca_cq *__cq)
699{ 695{
700 int cpu_id; 696 int cpu_id;
701 struct ehca_cpu_comp_task *cct; 697 struct ehca_cpu_comp_task *cct;
702 struct task_struct *thread;
703 int cq_jobs; 698 int cq_jobs;
704 unsigned long flags; 699 unsigned long flags;
705 700
@@ -707,8 +702,7 @@ static void queue_comp_task(struct ehca_cq *__cq)
707 BUG_ON(!cpu_online(cpu_id)); 702 BUG_ON(!cpu_online(cpu_id));
708 703
709 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 704 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
710 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); 705 BUG_ON(!cct);
711 BUG_ON(!cct || !thread);
712 706
713 spin_lock_irqsave(&cct->task_lock, flags); 707 spin_lock_irqsave(&cct->task_lock, flags);
714 cq_jobs = cct->cq_jobs; 708 cq_jobs = cct->cq_jobs;
@@ -716,25 +710,28 @@ static void queue_comp_task(struct ehca_cq *__cq)
716 if (cq_jobs > 0) { 710 if (cq_jobs > 0) {
717 cpu_id = find_next_online_cpu(pool); 711 cpu_id = find_next_online_cpu(pool);
718 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id); 712 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu_id);
719 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu_id); 713 BUG_ON(!cct);
720 BUG_ON(!cct || !thread);
721 } 714 }
722 __queue_comp_task(__cq, cct, thread); 715
716 __queue_comp_task(__cq, cct);
723} 717}
724 718
725static void run_comp_task(struct ehca_cpu_comp_task *cct) 719static void run_comp_task(struct ehca_cpu_comp_task *cct)
726{ 720{
727 struct ehca_cq *cq; 721 struct ehca_cq *cq;
722 unsigned long flags;
723
724 spin_lock_irqsave(&cct->task_lock, flags);
728 725
729 while (!list_empty(&cct->cq_list)) { 726 while (!list_empty(&cct->cq_list)) {
730 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry); 727 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
731 spin_unlock_irq(&cct->task_lock); 728 spin_unlock_irqrestore(&cct->task_lock, flags);
732 729
733 comp_event_callback(cq); 730 comp_event_callback(cq);
734 if (atomic_dec_and_test(&cq->nr_events)) 731 if (atomic_dec_and_test(&cq->nr_events))
735 wake_up(&cq->wait_completion); 732 wake_up(&cq->wait_completion);
736 733
737 spin_lock_irq(&cct->task_lock); 734 spin_lock_irqsave(&cct->task_lock, flags);
738 spin_lock(&cq->task_lock); 735 spin_lock(&cq->task_lock);
739 cq->nr_callbacks--; 736 cq->nr_callbacks--;
740 if (!cq->nr_callbacks) { 737 if (!cq->nr_callbacks) {
@@ -743,76 +740,158 @@ static void run_comp_task(struct ehca_cpu_comp_task *cct)
743 } 740 }
744 spin_unlock(&cq->task_lock); 741 spin_unlock(&cq->task_lock);
745 } 742 }
743
744 spin_unlock_irqrestore(&cct->task_lock, flags);
746} 745}
747 746
748static void comp_task_park(unsigned int cpu) 747static int comp_task(void *__cct)
749{ 748{
750 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 749 struct ehca_cpu_comp_task *cct = __cct;
751 struct ehca_cpu_comp_task *target; 750 int cql_empty;
752 struct task_struct *thread; 751 DECLARE_WAITQUEUE(wait, current);
753 struct ehca_cq *cq, *tmp;
754 LIST_HEAD(list);
755 752
756 spin_lock_irq(&cct->task_lock); 753 set_current_state(TASK_INTERRUPTIBLE);
757 cct->cq_jobs = 0; 754 while (!kthread_should_stop()) {
758 cct->active = 0; 755 add_wait_queue(&cct->wait_queue, &wait);
759 list_splice_init(&cct->cq_list, &list);
760 spin_unlock_irq(&cct->task_lock);
761 756
762 cpu = find_next_online_cpu(pool); 757 spin_lock_irq(&cct->task_lock);
763 target = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 758 cql_empty = list_empty(&cct->cq_list);
764 thread = *per_cpu_ptr(pool->cpu_comp_threads, cpu); 759 spin_unlock_irq(&cct->task_lock);
765 spin_lock_irq(&target->task_lock); 760 if (cql_empty)
766 list_for_each_entry_safe(cq, tmp, &list, entry) { 761 schedule();
767 list_del(&cq->entry); 762 else
768 __queue_comp_task(cq, target, thread); 763 __set_current_state(TASK_RUNNING);
764
765 remove_wait_queue(&cct->wait_queue, &wait);
766
767 spin_lock_irq(&cct->task_lock);
768 cql_empty = list_empty(&cct->cq_list);
769 spin_unlock_irq(&cct->task_lock);
770 if (!cql_empty)
771 run_comp_task(__cct);
772
773 set_current_state(TASK_INTERRUPTIBLE);
769 } 774 }
770 spin_unlock_irq(&target->task_lock); 775 __set_current_state(TASK_RUNNING);
776
777 return 0;
771} 778}
772 779
773static void comp_task_stop(unsigned int cpu, bool online) 780static struct task_struct *create_comp_task(struct ehca_comp_pool *pool,
781 int cpu)
774{ 782{
775 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 783 struct ehca_cpu_comp_task *cct;
784
785 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
786 spin_lock_init(&cct->task_lock);
787 INIT_LIST_HEAD(&cct->cq_list);
788 init_waitqueue_head(&cct->wait_queue);
789 cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu);
776 790
777 spin_lock_irq(&cct->task_lock); 791 return cct->task;
792}
793
794static void destroy_comp_task(struct ehca_comp_pool *pool,
795 int cpu)
796{
797 struct ehca_cpu_comp_task *cct;
798 struct task_struct *task;
799 unsigned long flags_cct;
800
801 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
802
803 spin_lock_irqsave(&cct->task_lock, flags_cct);
804
805 task = cct->task;
806 cct->task = NULL;
778 cct->cq_jobs = 0; 807 cct->cq_jobs = 0;
779 cct->active = 0; 808
780 WARN_ON(!list_empty(&cct->cq_list)); 809 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
781 spin_unlock_irq(&cct->task_lock); 810
811 if (task)
812 kthread_stop(task);
782} 813}
783 814
784static int comp_task_should_run(unsigned int cpu) 815static void __cpuinit take_over_work(struct ehca_comp_pool *pool, int cpu)
785{ 816{
786 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 817 struct ehca_cpu_comp_task *cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
818 LIST_HEAD(list);
819 struct ehca_cq *cq;
820 unsigned long flags_cct;
821
822 spin_lock_irqsave(&cct->task_lock, flags_cct);
823
824 list_splice_init(&cct->cq_list, &list);
825
826 while (!list_empty(&list)) {
827 cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
828
829 list_del(&cq->entry);
830 __queue_comp_task(cq, this_cpu_ptr(pool->cpu_comp_tasks));
831 }
832
833 spin_unlock_irqrestore(&cct->task_lock, flags_cct);
787 834
788 return cct->cq_jobs;
789} 835}
790 836
791static void comp_task(unsigned int cpu) 837static int __cpuinit comp_pool_callback(struct notifier_block *nfb,
838 unsigned long action,
839 void *hcpu)
792{ 840{
793 struct ehca_cpu_comp_task *cct = this_cpu_ptr(pool->cpu_comp_tasks); 841 unsigned int cpu = (unsigned long)hcpu;
794 int cql_empty; 842 struct ehca_cpu_comp_task *cct;
795 843
796 spin_lock_irq(&cct->task_lock); 844 switch (action) {
797 cql_empty = list_empty(&cct->cq_list); 845 case CPU_UP_PREPARE:
798 if (!cql_empty) { 846 case CPU_UP_PREPARE_FROZEN:
799 __set_current_state(TASK_RUNNING); 847 ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
800 run_comp_task(cct); 848 if (!create_comp_task(pool, cpu)) {
849 ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
850 return notifier_from_errno(-ENOMEM);
851 }
852 break;
853 case CPU_UP_CANCELED:
854 case CPU_UP_CANCELED_FROZEN:
855 ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
856 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
857 kthread_bind(cct->task, cpumask_any(cpu_online_mask));
858 destroy_comp_task(pool, cpu);
859 break;
860 case CPU_ONLINE:
861 case CPU_ONLINE_FROZEN:
862 ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
863 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
864 kthread_bind(cct->task, cpu);
865 wake_up_process(cct->task);
866 break;
867 case CPU_DOWN_PREPARE:
868 case CPU_DOWN_PREPARE_FROZEN:
869 ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
870 break;
871 case CPU_DOWN_FAILED:
872 case CPU_DOWN_FAILED_FROZEN:
873 ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
874 break;
875 case CPU_DEAD:
876 case CPU_DEAD_FROZEN:
877 ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
878 destroy_comp_task(pool, cpu);
879 take_over_work(pool, cpu);
880 break;
801 } 881 }
802 spin_unlock_irq(&cct->task_lock); 882
883 return NOTIFY_OK;
803} 884}
804 885
805static struct smp_hotplug_thread comp_pool_threads = { 886static struct notifier_block comp_pool_callback_nb __cpuinitdata = {
806 .thread_should_run = comp_task_should_run, 887 .notifier_call = comp_pool_callback,
807 .thread_fn = comp_task, 888 .priority = 0,
808 .thread_comm = "ehca_comp/%u",
809 .cleanup = comp_task_stop,
810 .park = comp_task_park,
811}; 889};
812 890
813int ehca_create_comp_pool(void) 891int ehca_create_comp_pool(void)
814{ 892{
815 int cpu, ret = -ENOMEM; 893 int cpu;
894 struct task_struct *task;
816 895
817 if (!ehca_scaling_code) 896 if (!ehca_scaling_code)
818 return 0; 897 return 0;
@@ -825,46 +904,38 @@ int ehca_create_comp_pool(void)
825 pool->last_cpu = cpumask_any(cpu_online_mask); 904 pool->last_cpu = cpumask_any(cpu_online_mask);
826 905
827 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task); 906 pool->cpu_comp_tasks = alloc_percpu(struct ehca_cpu_comp_task);
828 if (!pool->cpu_comp_tasks) 907 if (pool->cpu_comp_tasks == NULL) {
829 goto out_pool; 908 kfree(pool);
830 909 return -EINVAL;
831 pool->cpu_comp_threads = alloc_percpu(struct task_struct *); 910 }
832 if (!pool->cpu_comp_threads)
833 goto out_tasks;
834
835 for_each_present_cpu(cpu) {
836 struct ehca_cpu_comp_task *cct;
837 911
838 cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu); 912 for_each_online_cpu(cpu) {
839 spin_lock_init(&cct->task_lock); 913 task = create_comp_task(pool, cpu);
840 INIT_LIST_HEAD(&cct->cq_list); 914 if (task) {
915 kthread_bind(task, cpu);
916 wake_up_process(task);
917 }
841 } 918 }
842 919
843 comp_pool_threads.store = pool->cpu_comp_threads; 920 register_hotcpu_notifier(&comp_pool_callback_nb);
844 ret = smpboot_register_percpu_thread(&comp_pool_threads);
845 if (ret)
846 goto out_threads;
847 921
848 pr_info("eHCA scaling code enabled\n"); 922 printk(KERN_INFO "eHCA scaling code enabled\n");
849 return ret;
850 923
851out_threads: 924 return 0;
852 free_percpu(pool->cpu_comp_threads);
853out_tasks:
854 free_percpu(pool->cpu_comp_tasks);
855out_pool:
856 kfree(pool);
857 return ret;
858} 925}
859 926
860void ehca_destroy_comp_pool(void) 927void ehca_destroy_comp_pool(void)
861{ 928{
929 int i;
930
862 if (!ehca_scaling_code) 931 if (!ehca_scaling_code)
863 return; 932 return;
864 933
865 smpboot_unregister_percpu_thread(&comp_pool_threads); 934 unregister_hotcpu_notifier(&comp_pool_callback_nb);
935
936 for_each_online_cpu(i)
937 destroy_comp_task(pool, i);
866 938
867 free_percpu(pool->cpu_comp_threads);
868 free_percpu(pool->cpu_comp_tasks); 939 free_percpu(pool->cpu_comp_tasks);
869 kfree(pool); 940 kfree(pool);
870} 941}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.h b/drivers/infiniband/hw/ehca/ehca_irq.h
index 5370199f08c..3346cb06cea 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.h
+++ b/drivers/infiniband/hw/ehca/ehca_irq.h
@@ -58,15 +58,15 @@ void ehca_tasklet_eq(unsigned long data);
58void ehca_process_eq(struct ehca_shca *shca, int is_irq); 58void ehca_process_eq(struct ehca_shca *shca, int is_irq);
59 59
60struct ehca_cpu_comp_task { 60struct ehca_cpu_comp_task {
61 wait_queue_head_t wait_queue;
61 struct list_head cq_list; 62 struct list_head cq_list;
63 struct task_struct *task;
62 spinlock_t task_lock; 64 spinlock_t task_lock;
63 int cq_jobs; 65 int cq_jobs;
64 int active;
65}; 66};
66 67
67struct ehca_comp_pool { 68struct ehca_comp_pool {
68 struct ehca_cpu_comp_task __percpu *cpu_comp_tasks; 69 struct ehca_cpu_comp_task *cpu_comp_tasks;
69 struct task_struct * __percpu *cpu_comp_threads;
70 int last_cpu; 70 int last_cpu;
71 spinlock_t last_cpu_lock; 71 spinlock_t last_cpu_lock;
72}; 72};
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index f8a62918a88..c240e9972cb 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>");
59MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); 59MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver");
60MODULE_VERSION(HCAD_VERSION); 60MODULE_VERSION(HCAD_VERSION);
61 61
62static bool ehca_open_aqp1 = 0; 62static int ehca_open_aqp1 = 0;
63static int ehca_hw_level = 0; 63static int ehca_hw_level = 0;
64static bool ehca_poll_all_eqs = 1; 64static int ehca_poll_all_eqs = 1;
65 65
66int ehca_debug_level = 0; 66int ehca_debug_level = 0;
67int ehca_nr_ports = -1; 67int ehca_nr_ports = -1;
68bool ehca_use_hp_mr = 0; 68int ehca_use_hp_mr = 0;
69int ehca_port_act_time = 30; 69int ehca_port_act_time = 30;
70int ehca_static_rate = -1; 70int ehca_static_rate = -1;
71bool ehca_scaling_code = 0; 71int ehca_scaling_code = 0;
72int ehca_lock_hcalls = -1; 72int ehca_lock_hcalls = -1;
73int ehca_max_cq = -1; 73int ehca_max_cq = -1;
74int ehca_max_qp = -1; 74int ehca_max_qp = -1;
@@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO);
82module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); 82module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO);
83module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); 83module_param_named(static_rate, ehca_static_rate, int, S_IRUGO);
84module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); 84module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO);
85module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); 85module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO);
86module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); 86module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO);
87module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); 87module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO);
88 88
@@ -713,8 +713,8 @@ static struct attribute_group ehca_dev_attr_grp = {
713 .attrs = ehca_dev_attrs 713 .attrs = ehca_dev_attrs
714}; 714};
715 715
716static int ehca_probe(struct platform_device *dev, 716static int __devinit ehca_probe(struct platform_device *dev,
717 const struct of_device_id *id) 717 const struct of_device_id *id)
718{ 718{
719 struct ehca_shca *shca; 719 struct ehca_shca *shca;
720 const u64 *handle; 720 const u64 *handle;
@@ -879,7 +879,7 @@ probe1:
879 return -EINVAL; 879 return -EINVAL;
880} 880}
881 881
882static int ehca_remove(struct platform_device *dev) 882static int __devexit ehca_remove(struct platform_device *dev)
883{ 883{
884 struct ehca_shca *shca = dev_get_drvdata(&dev->dev); 884 struct ehca_shca *shca = dev_get_drvdata(&dev->dev);
885 unsigned long flags; 885 unsigned long flags;
diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 87844869dcc..43cae84005f 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c
@@ -112,7 +112,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize)
112 112
113static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) 113static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca)
114{ 114{
115 return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); 115 return 1UL << ilog2(shca->hca_cap_mr_pgsize);
116} 116}
117 117
118static struct ehca_mr *ehca_mr_new(void) 118static struct ehca_mr *ehca_mr_new(void)
@@ -1136,7 +1136,7 @@ int ehca_reg_mr_rpages(struct ehca_shca *shca,
1136 } 1136 }
1137 1137
1138 if (rnum > 1) { 1138 if (rnum > 1) {
1139 rpage = __pa(kpage); 1139 rpage = virt_to_abs(kpage);
1140 if (!rpage) { 1140 if (!rpage) {
1141 ehca_err(&shca->ib_device, "kpage=%p i=%x", 1141 ehca_err(&shca->ib_device, "kpage=%p i=%x",
1142 kpage, i); 1142 kpage, i);
@@ -1231,7 +1231,7 @@ inline int ehca_rereg_mr_rereg1(struct ehca_shca *shca,
1231 pginfo->num_kpages, pginfo->num_hwpages, kpage); 1231 pginfo->num_kpages, pginfo->num_hwpages, kpage);
1232 goto ehca_rereg_mr_rereg1_exit1; 1232 goto ehca_rereg_mr_rereg1_exit1;
1233 } 1233 }
1234 rpage = __pa(kpage); 1234 rpage = virt_to_abs(kpage);
1235 if (!rpage) { 1235 if (!rpage) {
1236 ehca_err(&shca->ib_device, "kpage=%p", kpage); 1236 ehca_err(&shca->ib_device, "kpage=%p", kpage);
1237 ret = -EFAULT; 1237 ret = -EFAULT;
@@ -1525,7 +1525,7 @@ static inline void *ehca_calc_sectbase(int top, int dir, int idx)
1525 unsigned long ret = idx; 1525 unsigned long ret = idx;
1526 ret |= dir << EHCA_DIR_INDEX_SHIFT; 1526 ret |= dir << EHCA_DIR_INDEX_SHIFT;
1527 ret |= top << EHCA_TOP_INDEX_SHIFT; 1527 ret |= top << EHCA_TOP_INDEX_SHIFT;
1528 return __va(ret << SECTION_SIZE_BITS); 1528 return abs_to_virt(ret << SECTION_SIZE_BITS);
1529} 1529}
1530 1530
1531#define ehca_bmap_valid(entry) \ 1531#define ehca_bmap_valid(entry) \
@@ -1537,7 +1537,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1537{ 1537{
1538 u64 h_ret = 0; 1538 u64 h_ret = 0;
1539 unsigned long page = 0; 1539 unsigned long page = 0;
1540 u64 rpage = __pa(kpage); 1540 u64 rpage = virt_to_abs(kpage);
1541 int page_count; 1541 int page_count;
1542 1542
1543 void *sectbase = ehca_calc_sectbase(top, dir, idx); 1543 void *sectbase = ehca_calc_sectbase(top, dir, idx);
@@ -1553,7 +1553,7 @@ static u64 ehca_reg_mr_section(int top, int dir, int idx, u64 *kpage,
1553 for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count); 1553 for (rnum = 0; (rnum < MAX_RPAGES) && (page < page_count);
1554 rnum++) { 1554 rnum++) {
1555 void *pg = sectbase + ((page++) * pginfo->hwpage_size); 1555 void *pg = sectbase + ((page++) * pginfo->hwpage_size);
1556 kpage[rnum] = __pa(pg); 1556 kpage[rnum] = virt_to_abs(pg);
1557 } 1557 }
1558 1558
1559 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr, 1559 h_ret = hipz_h_register_rpage_mr(shca->ipz_hca_handle, mr,
@@ -1870,8 +1870,9 @@ static int ehca_set_pagebuf_user1(struct ehca_mr_pginfo *pginfo,
1870 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) { 1870 for (i = pginfo->u.usr.next_nmap; i < chunk->nmap; ) {
1871 pgaddr = page_to_pfn(sg_page(&chunk->page_list[i])) 1871 pgaddr = page_to_pfn(sg_page(&chunk->page_list[i]))
1872 << PAGE_SHIFT ; 1872 << PAGE_SHIFT ;
1873 *kpage = pgaddr + (pginfo->next_hwpage * 1873 *kpage = phys_to_abs(pgaddr +
1874 pginfo->hwpage_size); 1874 (pginfo->next_hwpage *
1875 pginfo->hwpage_size));
1875 if ( !(*kpage) ) { 1876 if ( !(*kpage) ) {
1876 ehca_gen_err("pgaddr=%llx " 1877 ehca_gen_err("pgaddr=%llx "
1877 "chunk->page_list[i]=%llx " 1878 "chunk->page_list[i]=%llx "
@@ -1926,7 +1927,7 @@ static int ehca_check_kpages_per_ate(struct scatterlist *page_list,
1926 u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT; 1927 u64 pgaddr = page_to_pfn(sg_page(&page_list[t])) << PAGE_SHIFT;
1927 if (ehca_debug_level >= 3) 1928 if (ehca_debug_level >= 3)
1928 ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr, 1929 ehca_gen_dbg("chunk_page=%llx value=%016llx", pgaddr,
1929 *(u64 *)__va(pgaddr)); 1930 *(u64 *)abs_to_virt(phys_to_abs(pgaddr)));
1930 if (pgaddr - PAGE_SIZE != *prev_pgaddr) { 1931 if (pgaddr - PAGE_SIZE != *prev_pgaddr) {
1931 ehca_gen_err("uncontiguous page found pgaddr=%llx " 1932 ehca_gen_err("uncontiguous page found pgaddr=%llx "
1932 "prev_pgaddr=%llx page_list_i=%x", 1933 "prev_pgaddr=%llx page_list_i=%x",
@@ -1961,7 +1962,7 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1961 if (nr_kpages == kpages_per_hwpage) { 1962 if (nr_kpages == kpages_per_hwpage) {
1962 pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i])) 1963 pgaddr = ( page_to_pfn(sg_page(&chunk->page_list[i]))
1963 << PAGE_SHIFT ); 1964 << PAGE_SHIFT );
1964 *kpage = pgaddr; 1965 *kpage = phys_to_abs(pgaddr);
1965 if ( !(*kpage) ) { 1966 if ( !(*kpage) ) {
1966 ehca_gen_err("pgaddr=%llx i=%x", 1967 ehca_gen_err("pgaddr=%llx i=%x",
1967 pgaddr, i); 1968 pgaddr, i);
@@ -1989,11 +1990,13 @@ static int ehca_set_pagebuf_user2(struct ehca_mr_pginfo *pginfo,
1989 (pginfo->hwpage_size - 1)) >> 1990 (pginfo->hwpage_size - 1)) >>
1990 PAGE_SHIFT; 1991 PAGE_SHIFT;
1991 nr_kpages -= pginfo->kpage_cnt; 1992 nr_kpages -= pginfo->kpage_cnt;
1992 *kpage = pgaddr & 1993 *kpage = phys_to_abs(
1993 ~(pginfo->hwpage_size - 1); 1994 pgaddr &
1995 ~(pginfo->hwpage_size - 1));
1994 } 1996 }
1995 if (ehca_debug_level >= 3) { 1997 if (ehca_debug_level >= 3) {
1996 u64 val = *(u64 *)__va(pgaddr); 1998 u64 val = *(u64 *)abs_to_virt(
1999 phys_to_abs(pgaddr));
1997 ehca_gen_dbg("kpage=%llx chunk_page=%llx " 2000 ehca_gen_dbg("kpage=%llx chunk_page=%llx "
1998 "value=%016llx", 2001 "value=%016llx",
1999 *kpage, pgaddr, val); 2002 *kpage, pgaddr, val);
@@ -2081,8 +2084,9 @@ static int ehca_set_pagebuf_phys(struct ehca_mr_pginfo *pginfo,
2081 pginfo->num_hwpages, i); 2084 pginfo->num_hwpages, i);
2082 return -EFAULT; 2085 return -EFAULT;
2083 } 2086 }
2084 *kpage = (pbuf->addr & ~(pginfo->hwpage_size - 1)) + 2087 *kpage = phys_to_abs(
2085 (pginfo->next_hwpage * pginfo->hwpage_size); 2088 (pbuf->addr & ~(pginfo->hwpage_size - 1)) +
2089 (pginfo->next_hwpage * pginfo->hwpage_size));
2086 if ( !(*kpage) && pbuf->addr ) { 2090 if ( !(*kpage) && pbuf->addr ) {
2087 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx " 2091 ehca_gen_err("pbuf->addr=%llx pbuf->size=%llx "
2088 "next_hwpage=%llx", pbuf->addr, 2092 "next_hwpage=%llx", pbuf->addr,
@@ -2120,8 +2124,8 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2120 /* loop over desired page_list entries */ 2124 /* loop over desired page_list entries */
2121 fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem; 2125 fmrlist = pginfo->u.fmr.page_list + pginfo->u.fmr.next_listelem;
2122 for (i = 0; i < number; i++) { 2126 for (i = 0; i < number; i++) {
2123 *kpage = (*fmrlist & ~(pginfo->hwpage_size - 1)) + 2127 *kpage = phys_to_abs((*fmrlist & ~(pginfo->hwpage_size - 1)) +
2124 pginfo->next_hwpage * pginfo->hwpage_size; 2128 pginfo->next_hwpage * pginfo->hwpage_size);
2125 if ( !(*kpage) ) { 2129 if ( !(*kpage) ) {
2126 ehca_gen_err("*fmrlist=%llx fmrlist=%p " 2130 ehca_gen_err("*fmrlist=%llx fmrlist=%p "
2127 "next_listelem=%llx next_hwpage=%llx", 2131 "next_listelem=%llx next_hwpage=%llx",
@@ -2148,7 +2152,8 @@ static int ehca_set_pagebuf_fmr(struct ehca_mr_pginfo *pginfo,
2148 u64 prev = *kpage; 2152 u64 prev = *kpage;
2149 /* check if adrs are contiguous */ 2153 /* check if adrs are contiguous */
2150 for (j = 1; j < cnt_per_hwpage; j++) { 2154 for (j = 1; j < cnt_per_hwpage; j++) {
2151 u64 p = fmrlist[j] & ~(pginfo->hwpage_size - 1); 2155 u64 p = phys_to_abs(fmrlist[j] &
2156 ~(pginfo->hwpage_size - 1));
2152 if (prev + pginfo->u.fmr.fmr_pgsize != p) { 2157 if (prev + pginfo->u.fmr.fmr_pgsize != p) {
2153 ehca_gen_err("uncontiguous fmr pages " 2158 ehca_gen_err("uncontiguous fmr pages "
2154 "found prev=%llx p=%llx " 2159 "found prev=%llx p=%llx "
@@ -2383,8 +2388,8 @@ static int ehca_update_busmap(unsigned long pfn, unsigned long nr_pages)
2383 memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE); 2388 memset(ehca_bmap, 0xFF, EHCA_TOP_MAP_SIZE);
2384 } 2389 }
2385 2390
2386 start_section = (pfn * PAGE_SIZE) / EHCA_SECTSIZE; 2391 start_section = phys_to_abs(pfn * PAGE_SIZE) / EHCA_SECTSIZE;
2387 end_section = ((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE; 2392 end_section = phys_to_abs((pfn + nr_pages) * PAGE_SIZE) / EHCA_SECTSIZE;
2388 for (i = start_section; i < end_section; i++) { 2393 for (i = start_section; i < end_section; i++) {
2389 int ret; 2394 int ret;
2390 top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT); 2395 top = ehca_calc_index(i, EHCA_TOP_INDEX_SHIFT);
@@ -2503,7 +2508,7 @@ static u64 ehca_map_vaddr(void *caddr)
2503 if (!ehca_bmap) 2508 if (!ehca_bmap)
2504 return EHCA_INVAL_ADDR; 2509 return EHCA_INVAL_ADDR;
2505 2510
2506 abs_addr = __pa(caddr); 2511 abs_addr = virt_to_abs(caddr);
2507 top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT); 2512 top = ehca_calc_index(abs_addr, EHCA_TOP_INDEX_SHIFT + EHCA_SECTSHIFT);
2508 if (!ehca_bmap_valid(ehca_bmap->top[top])) 2513 if (!ehca_bmap_valid(ehca_bmap->top[top]))
2509 return EHCA_INVAL_ADDR; 2514 return EHCA_INVAL_ADDR;
diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
index 149393915ae..32fb34201ab 100644
--- a/drivers/infiniband/hw/ehca/ehca_qp.c
+++ b/drivers/infiniband/hw/ehca/ehca_qp.c
@@ -321,7 +321,7 @@ static inline int init_qp_queue(struct ehca_shca *shca,
321 ret = -EINVAL; 321 ret = -EINVAL;
322 goto init_qp_queue1; 322 goto init_qp_queue1;
323 } 323 }
324 rpage = __pa(vpage); 324 rpage = virt_to_abs(vpage);
325 325
326 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle, 326 h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
327 my_qp->ipz_qp_handle, 327 my_qp->ipz_qp_handle,
@@ -977,9 +977,6 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
977 struct hcp_modify_qp_control_block *mqpcb; 977 struct hcp_modify_qp_control_block *mqpcb;
978 u64 hret, update_mask; 978 u64 hret, update_mask;
979 979
980 if (srq_init_attr->srq_type != IB_SRQT_BASIC)
981 return ERR_PTR(-ENOSYS);
982
983 /* For common attributes, internal_create_qp() takes its info 980 /* For common attributes, internal_create_qp() takes its info
984 * out of qp_init_attr, so copy all common attrs there. 981 * out of qp_init_attr, so copy all common attrs there.
985 */ 982 */
@@ -1094,7 +1091,7 @@ static int prepare_sqe_rts(struct ehca_qp *my_qp, struct ehca_shca *shca,
1094 ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p", 1091 ehca_dbg(&shca->ib_device, "qp_num=%x bad_send_wqe_p=%p",
1095 qp_num, bad_send_wqe_p); 1092 qp_num, bad_send_wqe_p);
1096 /* convert wqe pointer to vadr */ 1093 /* convert wqe pointer to vadr */
1097 bad_send_wqe_v = __va((u64)bad_send_wqe_p); 1094 bad_send_wqe_v = abs_to_virt((u64)bad_send_wqe_p);
1098 if (ehca_debug_level >= 2) 1095 if (ehca_debug_level >= 2)
1099 ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num); 1096 ehca_dmp(bad_send_wqe_v, 32, "qp_num=%x bad_wqe", qp_num);
1100 squeue = &my_qp->ipz_squeue; 1097 squeue = &my_qp->ipz_squeue;
@@ -1138,7 +1135,7 @@ static int calc_left_cqes(u64 wqe_p, struct ipz_queue *ipz_queue,
1138 /* convert real to abs address */ 1135 /* convert real to abs address */
1139 wqe_p = wqe_p & (~(1UL << 63)); 1136 wqe_p = wqe_p & (~(1UL << 63));
1140 1137
1141 wqe_v = __va(wqe_p); 1138 wqe_v = abs_to_virt(wqe_p);
1142 1139
1143 if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) { 1140 if (ipz_queue_abs_to_offset(ipz_queue, wqe_p, &q_ofs)) {
1144 ehca_gen_err("Invalid offset for calculating left cqes " 1141 ehca_gen_err("Invalid offset for calculating left cqes "
diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
index 47f94984353..9a3fbfca9b4 100644
--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
+++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
@@ -42,6 +42,7 @@
42 */ 42 */
43 43
44 44
45#include <asm/system.h>
45#include "ehca_classes.h" 46#include "ehca_classes.h"
46#include "ehca_tools.h" 47#include "ehca_tools.h"
47#include "ehca_qes.h" 48#include "ehca_qes.h"
@@ -135,7 +136,7 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
135 mad_hdr->attr_mod); 136 mad_hdr->attr_mod);
136 } 137 }
137 for (j = 0; j < send_wr->num_sge; j++) { 138 for (j = 0; j < send_wr->num_sge; j++) {
138 u8 *data = __va(sge->addr); 139 u8 *data = (u8 *)abs_to_virt(sge->addr);
139 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x " 140 ehca_gen_dbg("send_wr#%x sge#%x addr=%p length=%x "
140 "lkey=%x", 141 "lkey=%x",
141 idx, j, data, sge->length, sge->lkey); 142 idx, j, data, sge->length, sge->lkey);
diff --git a/drivers/infiniband/hw/ehca/ehca_tools.h b/drivers/infiniband/hw/ehca/ehca_tools.h
index d280b12aae6..54c0d23bad9 100644
--- a/drivers/infiniband/hw/ehca/ehca_tools.h
+++ b/drivers/infiniband/hw/ehca/ehca_tools.h
@@ -59,6 +59,7 @@
59#include <linux/device.h> 59#include <linux/device.h>
60 60
61#include <linux/atomic.h> 61#include <linux/atomic.h>
62#include <asm/abs_addr.h>
62#include <asm/ibmebus.h> 63#include <asm/ibmebus.h>
63#include <asm/io.h> 64#include <asm/io.h>
64#include <asm/pgtable.h> 65#include <asm/pgtable.h>
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 1a1d5d99fcf..45ee89b65c2 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -117,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
117 physical = galpas->user.fw_handle; 117 physical = galpas->user.fw_handle;
118 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 118 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
119 ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical); 119 ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
120 /* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */ 120 /* VM_IO | VM_RESERVED are set by remap_pfn_range() */
121 ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT, 121 ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
122 vma->vm_page_prot); 122 vma->vm_page_prot);
123 if (unlikely(ret)) { 123 if (unlikely(ret)) {
@@ -139,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
139 u64 start, ofs; 139 u64 start, ofs;
140 struct page *page; 140 struct page *page;
141 141
142 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 142 vma->vm_flags |= VM_RESERVED;
143 start = vma->vm_start; 143 start = vma->vm_start;
144 for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) { 144 for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
145 u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs); 145 u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
diff --git a/drivers/infiniband/hw/ehca/hcp_if.c b/drivers/infiniband/hw/ehca/hcp_if.c
index 89517ffb438..e6f9cdd94c7 100644
--- a/drivers/infiniband/hw/ehca/hcp_if.c
+++ b/drivers/infiniband/hw/ehca/hcp_if.c
@@ -90,6 +90,26 @@
90 90
91static DEFINE_SPINLOCK(hcall_lock); 91static DEFINE_SPINLOCK(hcall_lock);
92 92
93static u32 get_longbusy_msecs(int longbusy_rc)
94{
95 switch (longbusy_rc) {
96 case H_LONG_BUSY_ORDER_1_MSEC:
97 return 1;
98 case H_LONG_BUSY_ORDER_10_MSEC:
99 return 10;
100 case H_LONG_BUSY_ORDER_100_MSEC:
101 return 100;
102 case H_LONG_BUSY_ORDER_1_SEC:
103 return 1000;
104 case H_LONG_BUSY_ORDER_10_SEC:
105 return 10000;
106 case H_LONG_BUSY_ORDER_100_SEC:
107 return 100000;
108 default:
109 return 1;
110 }
111}
112
93static long ehca_plpar_hcall_norets(unsigned long opcode, 113static long ehca_plpar_hcall_norets(unsigned long opcode,
94 unsigned long arg1, 114 unsigned long arg1,
95 unsigned long arg2, 115 unsigned long arg2,
@@ -376,7 +396,7 @@ u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
376 struct hipz_query_port *query_port_response_block) 396 struct hipz_query_port *query_port_response_block)
377{ 397{
378 u64 ret; 398 u64 ret;
379 u64 r_cb = __pa(query_port_response_block); 399 u64 r_cb = virt_to_abs(query_port_response_block);
380 400
381 if (r_cb & (EHCA_PAGESIZE-1)) { 401 if (r_cb & (EHCA_PAGESIZE-1)) {
382 ehca_gen_err("response block not page aligned"); 402 ehca_gen_err("response block not page aligned");
@@ -418,7 +438,7 @@ u64 hipz_h_modify_port(const struct ipz_adapter_handle adapter_handle,
418u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle, 438u64 hipz_h_query_hca(const struct ipz_adapter_handle adapter_handle,
419 struct hipz_query_hca *query_hca_rblock) 439 struct hipz_query_hca *query_hca_rblock)
420{ 440{
421 u64 r_cb = __pa(query_hca_rblock); 441 u64 r_cb = virt_to_abs(query_hca_rblock);
422 442
423 if (r_cb & (EHCA_PAGESIZE-1)) { 443 if (r_cb & (EHCA_PAGESIZE-1)) {
424 ehca_gen_err("response_block=%p not page aligned", 444 ehca_gen_err("response_block=%p not page aligned",
@@ -557,7 +577,7 @@ u64 hipz_h_modify_qp(const struct ipz_adapter_handle adapter_handle,
557 adapter_handle.handle, /* r4 */ 577 adapter_handle.handle, /* r4 */
558 qp_handle.handle, /* r5 */ 578 qp_handle.handle, /* r5 */
559 update_mask, /* r6 */ 579 update_mask, /* r6 */
560 __pa(mqpcb), /* r7 */ 580 virt_to_abs(mqpcb), /* r7 */
561 0, 0, 0, 0, 0); 581 0, 0, 0, 0, 0);
562 582
563 if (ret == H_NOT_ENOUGH_RESOURCES) 583 if (ret == H_NOT_ENOUGH_RESOURCES)
@@ -575,7 +595,7 @@ u64 hipz_h_query_qp(const struct ipz_adapter_handle adapter_handle,
575 return ehca_plpar_hcall_norets(H_QUERY_QP, 595 return ehca_plpar_hcall_norets(H_QUERY_QP,
576 adapter_handle.handle, /* r4 */ 596 adapter_handle.handle, /* r4 */
577 qp_handle.handle, /* r5 */ 597 qp_handle.handle, /* r5 */
578 __pa(qqpcb), /* r6 */ 598 virt_to_abs(qqpcb), /* r6 */
579 0, 0, 0, 0); 599 0, 0, 0, 0);
580} 600}
581 601
@@ -767,7 +787,7 @@ u64 hipz_h_register_rpage_mr(const struct ipz_adapter_handle adapter_handle,
767 if (count > 1) { 787 if (count > 1) {
768 u64 *kpage; 788 u64 *kpage;
769 int i; 789 int i;
770 kpage = __va(logical_address_of_page); 790 kpage = (u64 *)abs_to_virt(logical_address_of_page);
771 for (i = 0; i < count; i++) 791 for (i = 0; i < count; i++)
772 ehca_gen_dbg("kpage[%d]=%p", 792 ehca_gen_dbg("kpage[%d]=%p",
773 i, (void *)kpage[i]); 793 i, (void *)kpage[i]);
@@ -924,7 +944,7 @@ u64 hipz_h_error_data(const struct ipz_adapter_handle adapter_handle,
924 void *rblock, 944 void *rblock,
925 unsigned long *byte_count) 945 unsigned long *byte_count)
926{ 946{
927 u64 r_cb = __pa(rblock); 947 u64 r_cb = virt_to_abs(rblock);
928 948
929 if (r_cb & (EHCA_PAGESIZE-1)) { 949 if (r_cb & (EHCA_PAGESIZE-1)) {
930 ehca_gen_err("rblock not page aligned."); 950 ehca_gen_err("rblock not page aligned.");
diff --git a/drivers/infiniband/hw/ehca/ipz_pt_fn.c b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
index 62c71fadb4d..1898d6e7cce 100644
--- a/drivers/infiniband/hw/ehca/ipz_pt_fn.c
+++ b/drivers/infiniband/hw/ehca/ipz_pt_fn.c
@@ -81,7 +81,7 @@ int ipz_queue_abs_to_offset(struct ipz_queue *queue, u64 addr, u64 *q_offset)
81{ 81{
82 int i; 82 int i;
83 for (i = 0; i < queue->queue_length / queue->pagesize; i++) { 83 for (i = 0; i < queue->queue_length / queue->pagesize; i++) {
84 u64 page = __pa(queue->queue_pages[i]); 84 u64 page = (u64)virt_to_abs(queue->queue_pages[i]);
85 if (addr >= page && addr < page + queue->pagesize) { 85 if (addr >= page && addr < page + queue->pagesize) {
86 *q_offset = addr - page + i * queue->pagesize; 86 *q_offset = addr - page + i * queue->pagesize;
87 return 0; 87 return 0;
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 714293b7851..daef61d5e5b 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -45,7 +45,6 @@
45#include <linux/pci.h> 45#include <linux/pci.h>
46#include <linux/vmalloc.h> 46#include <linux/vmalloc.h>
47#include <linux/fs.h> 47#include <linux/fs.h>
48#include <linux/export.h>
49#include <asm/uaccess.h> 48#include <asm/uaccess.h>
50 49
51#include "ipath_kernel.h" 50#include "ipath_kernel.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 7b371f545ec..be24ac72611 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -41,7 +41,6 @@
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/bitmap.h> 42#include <linux/bitmap.h>
43#include <linux/slab.h> 43#include <linux/slab.h>
44#include <linux/module.h>
45 44
46#include "ipath_kernel.h" 45#include "ipath_kernel.h"
47#include "ipath_verbs.h" 46#include "ipath_verbs.h"
@@ -127,8 +126,9 @@ const char *ipath_ibcstatus_str[] = {
127 "LTState1C", "LTState1D", "LTState1E", "LTState1F" 126 "LTState1C", "LTState1D", "LTState1E", "LTState1F"
128}; 127};
129 128
130static void ipath_remove_one(struct pci_dev *); 129static void __devexit ipath_remove_one(struct pci_dev *);
131static int ipath_init_one(struct pci_dev *, const struct pci_device_id *); 130static int __devinit ipath_init_one(struct pci_dev *,
131 const struct pci_device_id *);
132 132
133/* Only needed for registration, nothing else needs this info */ 133/* Only needed for registration, nothing else needs this info */
134#define PCI_VENDOR_ID_PATHSCALE 0x1fc1 134#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
@@ -147,7 +147,7 @@ MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
147static struct pci_driver ipath_driver = { 147static struct pci_driver ipath_driver = {
148 .name = IPATH_DRV_NAME, 148 .name = IPATH_DRV_NAME,
149 .probe = ipath_init_one, 149 .probe = ipath_init_one,
150 .remove = ipath_remove_one, 150 .remove = __devexit_p(ipath_remove_one),
151 .id_table = ipath_pci_tbl, 151 .id_table = ipath_pci_tbl,
152 .driver = { 152 .driver = {
153 .groups = ipath_driver_attr_groups, 153 .groups = ipath_driver_attr_groups,
@@ -391,7 +391,8 @@ done:
391 391
392static void cleanup_device(struct ipath_devdata *dd); 392static void cleanup_device(struct ipath_devdata *dd);
393 393
394static int ipath_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 394static int __devinit ipath_init_one(struct pci_dev *pdev,
395 const struct pci_device_id *ent)
395{ 396{
396 int ret, len, j; 397 int ret, len, j;
397 struct ipath_devdata *dd; 398 struct ipath_devdata *dd;
@@ -735,7 +736,7 @@ static void cleanup_device(struct ipath_devdata *dd)
735 kfree(tmp); 736 kfree(tmp);
736} 737}
737 738
738static void ipath_remove_one(struct pci_dev *pdev) 739static void __devexit ipath_remove_one(struct pci_dev *pdev)
739{ 740{
740 struct ipath_devdata *dd = pci_get_drvdata(pdev); 741 struct ipath_devdata *dd = pci_get_drvdata(pdev);
741 742
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 3eb7e454849..8697eca1435 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -35,7 +35,6 @@
35#include <linux/poll.h> 35#include <linux/poll.h>
36#include <linux/cdev.h> 36#include <linux/cdev.h>
37#include <linux/swap.h> 37#include <linux/swap.h>
38#include <linux/export.h>
39#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
40#include <linux/slab.h> 39#include <linux/slab.h>
41#include <linux/highmem.h> 40#include <linux/highmem.h>
@@ -1225,7 +1224,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
1225 1224
1226 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 1225 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
1227 vma->vm_ops = &ipath_file_vm_ops; 1226 vma->vm_ops = &ipath_file_vm_ops;
1228 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 1227 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
1229 ret = 1; 1228 ret = 1;
1230 1229
1231bail: 1230bail:
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c
index a4de9d58e9b..31ae1b108ae 100644
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -46,7 +46,7 @@
46static struct super_block *ipath_super; 46static struct super_block *ipath_super;
47 47
48static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, 48static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
49 umode_t mode, const struct file_operations *fops, 49 int mode, const struct file_operations *fops,
50 void *data) 50 void *data)
51{ 51{
52 int error; 52 int error;
@@ -61,7 +61,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
61 inode->i_mode = mode; 61 inode->i_mode = mode;
62 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 62 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
63 inode->i_private = data; 63 inode->i_private = data;
64 if (S_ISDIR(mode)) { 64 if ((mode & S_IFMT) == S_IFDIR) {
65 inode->i_op = &simple_dir_inode_operations; 65 inode->i_op = &simple_dir_inode_operations;
66 inc_nlink(inode); 66 inc_nlink(inode);
67 inc_nlink(dir); 67 inc_nlink(dir);
@@ -76,7 +76,7 @@ bail:
76 return error; 76 return error;
77} 77}
78 78
79static int create_file(const char *name, umode_t mode, 79static int create_file(const char *name, mode_t mode,
80 struct dentry *parent, struct dentry **dentry, 80 struct dentry *parent, struct dentry **dentry,
81 const struct file_operations *fops, void *data) 81 const struct file_operations *fops, void *data)
82{ 82{
@@ -89,7 +89,7 @@ static int create_file(const char *name, umode_t mode,
89 error = ipathfs_mknod(parent->d_inode, *dentry, 89 error = ipathfs_mknod(parent->d_inode, *dentry,
90 mode, fops, data); 90 mode, fops, data);
91 else 91 else
92 error = PTR_ERR(*dentry); 92 error = PTR_ERR(dentry);
93 mutex_unlock(&parent->d_inode->i_mutex); 93 mutex_unlock(&parent->d_inode->i_mutex);
94 94
95 return error; 95 return error;
diff --git a/drivers/infiniband/hw/ipath/ipath_iba6110.c b/drivers/infiniband/hw/ipath/ipath_iba6110.c
index 7cc305488a3..1d7aea132a0 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba6110.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba6110.c
@@ -596,7 +596,8 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
596 596
597 ipath_format_hwerrors(hwerrs, 597 ipath_format_hwerrors(hwerrs,
598 ipath_6110_hwerror_msgs, 598 ipath_6110_hwerror_msgs,
599 ARRAY_SIZE(ipath_6110_hwerror_msgs), 599 sizeof(ipath_6110_hwerror_msgs) /
600 sizeof(ipath_6110_hwerror_msgs[0]),
600 msg, msgl); 601 msg, msgl);
601 602
602 if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS)) 603 if (hwerrs & (_IPATH_HTLINK0_CRCBITS | _IPATH_HTLINK1_CRCBITS))
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index be2a60e142b..7c1eebe8c7c 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -33,9 +33,7 @@
33 33
34#include <linux/pci.h> 34#include <linux/pci.h>
35#include <linux/netdevice.h> 35#include <linux/netdevice.h>
36#include <linux/moduleparam.h>
37#include <linux/slab.h> 36#include <linux/slab.h>
38#include <linux/stat.h>
39#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
40 38
41#include "ipath_kernel.h" 39#include "ipath_kernel.h"
@@ -719,6 +717,16 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
719 goto done; 717 goto done;
720 718
721 /* 719 /*
720 * we ignore most issues after reporting them, but have to specially
721 * handle hardware-disabled chips.
722 */
723 if (ret == 2) {
724 /* unique error, known to ipath_init_one */
725 ret = -EPERM;
726 goto done;
727 }
728
729 /*
722 * We could bump this to allow for full rcvegrcnt + rcvtidcnt, 730 * We could bump this to allow for full rcvegrcnt + rcvtidcnt,
723 * but then it no longer nicely fits power of two, and since 731 * but then it no longer nicely fits power of two, and since
724 * we now use routines that backend onto __get_free_pages, the 732 * we now use routines that backend onto __get_free_pages, the
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 26dfbc8ee0f..c0a03ac03ee 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -209,7 +209,8 @@ void ipath_format_hwerrors(u64 hwerrs,
209{ 209{
210 int i; 210 int i;
211 const int glen = 211 const int glen =
212 ARRAY_SIZE(ipath_generic_hwerror_msgs); 212 sizeof(ipath_generic_hwerror_msgs) /
213 sizeof(ipath_generic_hwerror_msgs[0]);
213 214
214 for (i=0; i<glen; i++) { 215 for (i=0; i<glen; i++) {
215 if (hwerrs & ipath_generic_hwerror_msgs[i].mask) { 216 if (hwerrs & ipath_generic_hwerror_msgs[i].mask) {
diff --git a/drivers/infiniband/hw/ipath/ipath_srq.c b/drivers/infiniband/hw/ipath/ipath_srq.c
index 26271984b71..386e2c717c5 100644
--- a/drivers/infiniband/hw/ipath/ipath_srq.c
+++ b/drivers/infiniband/hw/ipath/ipath_srq.c
@@ -107,11 +107,6 @@ struct ib_srq *ipath_create_srq(struct ib_pd *ibpd,
107 u32 sz; 107 u32 sz;
108 struct ib_srq *ret; 108 struct ib_srq *ret;
109 109
110 if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
111 ret = ERR_PTR(-ENOSYS);
112 goto done;
113 }
114
115 if (srq_init_attr->attr.max_wr == 0) { 110 if (srq_init_attr->attr.max_wr == 0) {
116 ret = ERR_PTR(-EINVAL); 111 ret = ERR_PTR(-EINVAL);
117 goto done; 112 goto done;
diff --git a/drivers/infiniband/hw/ipath/ipath_sysfs.c b/drivers/infiniband/hw/ipath/ipath_sysfs.c
index 75558f33f1c..8991677e9a0 100644
--- a/drivers/infiniband/hw/ipath/ipath_sysfs.c
+++ b/drivers/infiniband/hw/ipath/ipath_sysfs.c
@@ -32,7 +32,6 @@
32 */ 32 */
33 33
34#include <linux/ctype.h> 34#include <linux/ctype.h>
35#include <linux/stat.h>
36 35
37#include "ipath_kernel.h" 36#include "ipath_kernel.h"
38#include "ipath_verbs.h" 37#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_user_pages.c b/drivers/infiniband/hw/ipath/ipath_user_pages.c
index dc66c450691..cfed5399f07 100644
--- a/drivers/infiniband/hw/ipath/ipath_user_pages.c
+++ b/drivers/infiniband/hw/ipath/ipath_user_pages.c
@@ -79,7 +79,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
79 goto bail_release; 79 goto bail_release;
80 } 80 }
81 81
82 current->mm->pinned_vm += num_pages; 82 current->mm->locked_vm += num_pages;
83 83
84 ret = 0; 84 ret = 0;
85 goto bail; 85 goto bail;
@@ -178,7 +178,7 @@ void ipath_release_user_pages(struct page **p, size_t num_pages)
178 178
179 __ipath_release_user_pages(p, num_pages, 1); 179 __ipath_release_user_pages(p, num_pages, 1);
180 180
181 current->mm->pinned_vm -= num_pages; 181 current->mm->locked_vm -= num_pages;
182 182
183 up_write(&current->mm->mmap_sem); 183 up_write(&current->mm->mmap_sem);
184} 184}
@@ -195,7 +195,7 @@ static void user_pages_account(struct work_struct *_work)
195 container_of(_work, struct ipath_user_pages_work, work); 195 container_of(_work, struct ipath_user_pages_work, work);
196 196
197 down_write(&work->mm->mmap_sem); 197 down_write(&work->mm->mmap_sem);
198 work->mm->pinned_vm -= work->num_pages; 198 work->mm->locked_vm -= work->num_pages;
199 up_write(&work->mm->mmap_sem); 199 up_write(&work->mm->mmap_sem);
200 mmput(work->mm); 200 mmput(work->mm);
201 kfree(work); 201 kfree(work);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 439c35d4a66..dd7f26d04d4 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -35,7 +35,6 @@
35#include <rdma/ib_user_verbs.h> 35#include <rdma/ib_user_verbs.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/slab.h> 37#include <linux/slab.h>
38#include <linux/module.h>
39#include <linux/utsname.h> 38#include <linux/utsname.h>
40#include <linux/rculist.h> 39#include <linux/rculist.h>
41 40
diff --git a/drivers/infiniband/hw/mlx4/Kconfig b/drivers/infiniband/hw/mlx4/Kconfig
index 24ab11a9ad1..bd995b2b50d 100644
--- a/drivers/infiniband/hw/mlx4/Kconfig
+++ b/drivers/infiniband/hw/mlx4/Kconfig
@@ -1,7 +1,6 @@
1config MLX4_INFINIBAND 1config MLX4_INFINIBAND
2 tristate "Mellanox ConnectX HCA support" 2 tristate "Mellanox ConnectX HCA support"
3 depends on NETDEVICES && ETHERNET && PCI 3 depends on NETDEVICES && NETDEV_10000 && PCI
4 select NET_VENDOR_MELLANOX
5 select MLX4_CORE 4 select MLX4_CORE
6 ---help--- 5 ---help---
7 This driver provides low-level InfiniBand support for 6 This driver provides low-level InfiniBand support for
diff --git a/drivers/infiniband/hw/mlx4/Makefile b/drivers/infiniband/hw/mlx4/Makefile
index f4213b3a8fe..70f09c7826d 100644
--- a/drivers/infiniband/hw/mlx4/Makefile
+++ b/drivers/infiniband/hw/mlx4/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o 1obj-$(CONFIG_MLX4_INFINIBAND) += mlx4_ib.o
2 2
3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o mcg.o cm.o alias_GUID.o sysfs.o 3mlx4_ib-y := ah.o cq.o doorbell.o mad.o main.o mr.o qp.o srq.o
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index a251becdaa9..4b8f9c49397 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -126,7 +126,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
126 ah->av.ib.dlid = cpu_to_be16(0xc000); 126 ah->av.ib.dlid = cpu_to_be16(0xc000);
127 127
128 memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); 128 memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16);
129 ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); 129 ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
130 130
131 return &ah->ibah; 131 return &ah->ibah;
132} 132}
diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
deleted file mode 100644
index 2f215b93db6..00000000000
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ /dev/null
@@ -1,688 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32 /***********************************************************/
33/*This file support the handling of the Alias GUID feature. */
34/***********************************************************/
35#include <rdma/ib_mad.h>
36#include <rdma/ib_smi.h>
37#include <rdma/ib_cache.h>
38#include <rdma/ib_sa.h>
39#include <rdma/ib_pack.h>
40#include <linux/mlx4/cmd.h>
41#include <linux/module.h>
42#include <linux/init.h>
43#include <linux/errno.h>
44#include <rdma/ib_user_verbs.h>
45#include <linux/delay.h>
46#include "mlx4_ib.h"
47
48/*
49The driver keeps the current state of all guids, as they are in the HW.
50Whenever we receive an smp mad GUIDInfo record, the data will be cached.
51*/
52
53struct mlx4_alias_guid_work_context {
54 u8 port;
55 struct mlx4_ib_dev *dev ;
56 struct ib_sa_query *sa_query;
57 struct completion done;
58 int query_id;
59 struct list_head list;
60 int block_num;
61};
62
63struct mlx4_next_alias_guid_work {
64 u8 port;
65 u8 block_num;
66 struct mlx4_sriov_alias_guid_info_rec_det rec_det;
67};
68
69
70void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev, int block_num,
71 u8 port_num, u8 *p_data)
72{
73 int i;
74 u64 guid_indexes;
75 int slave_id;
76 int port_index = port_num - 1;
77
78 if (!mlx4_is_master(dev->dev))
79 return;
80
81 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
82 ports_guid[port_num - 1].
83 all_rec_per_port[block_num].guid_indexes);
84 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
85
86 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
87 /* The location of the specific index starts from bit number 4
88 * until bit num 11 */
89 if (test_bit(i + 4, (unsigned long *)&guid_indexes)) {
90 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
91 if (slave_id >= dev->dev->num_slaves) {
92 pr_debug("The last slave: %d\n", slave_id);
93 return;
94 }
95
96 /* cache the guid: */
97 memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
98 &p_data[i * GUID_REC_SIZE],
99 GUID_REC_SIZE);
100 } else
101 pr_debug("Guid number: %d in block: %d"
102 " was not updated\n", i, block_num);
103 }
104}
105
106static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
107{
108 if (index >= NUM_ALIAS_GUID_PER_PORT) {
109 pr_err("%s: ERROR: asked for index:%d\n", __func__, index);
110 return (__force __be64) -1;
111 }
112 return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
113}
114
115
116ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index)
117{
118 return IB_SA_COMP_MASK(4 + index);
119}
120
121/*
122 * Whenever new GUID is set/unset (guid table change) create event and
123 * notify the relevant slave (master also should be notified).
124 * If the GUID value is not as we have in the cache the slave will not be
125 * updated; in this case it waits for the smp_snoop or the port management
126 * event to call the function and to update the slave.
127 * block_number - the index of the block (16 blocks available)
128 * port_number - 1 or 2
129 */
130void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
131 int block_num, u8 port_num,
132 u8 *p_data)
133{
134 int i;
135 u64 guid_indexes;
136 int slave_id;
137 enum slave_port_state new_state;
138 enum slave_port_state prev_state;
139 __be64 tmp_cur_ag, form_cache_ag;
140 enum slave_port_gen_event gen_event;
141
142 if (!mlx4_is_master(dev->dev))
143 return;
144
145 guid_indexes = be64_to_cpu((__force __be64) dev->sriov.alias_guid.
146 ports_guid[port_num - 1].
147 all_rec_per_port[block_num].guid_indexes);
148 pr_debug("port: %d, guid_indexes: 0x%llx\n", port_num, guid_indexes);
149
150 /*calculate the slaves and notify them*/
151 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
152 /* the location of the specific index runs from bits 4..11 */
153 if (!(test_bit(i + 4, (unsigned long *)&guid_indexes)))
154 continue;
155
156 slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
157 if (slave_id >= dev->dev->num_slaves)
158 return;
159 tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
160 form_cache_ag = get_cached_alias_guid(dev, port_num,
161 (NUM_ALIAS_GUID_IN_REC * block_num) + i);
162 /*
163 * Check if guid is not the same as in the cache,
164 * If it is different, wait for the snoop_smp or the port mgmt
165 * change event to update the slave on its port state change
166 */
167 if (tmp_cur_ag != form_cache_ag)
168 continue;
169 mlx4_gen_guid_change_eqe(dev->dev, slave_id, port_num);
170
171 /*2 cases: Valid GUID, and Invalid Guid*/
172
173 if (tmp_cur_ag != MLX4_NOT_SET_GUID) { /*valid GUID*/
174 prev_state = mlx4_get_slave_port_state(dev->dev, slave_id, port_num);
175 new_state = set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
176 MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID,
177 &gen_event);
178 pr_debug("slave: %d, port: %d prev_port_state: %d,"
179 " new_port_state: %d, gen_event: %d\n",
180 slave_id, port_num, prev_state, new_state, gen_event);
181 if (gen_event == SLAVE_PORT_GEN_EVENT_UP) {
182 pr_debug("sending PORT_UP event to slave: %d, port: %d\n",
183 slave_id, port_num);
184 mlx4_gen_port_state_change_eqe(dev->dev, slave_id,
185 port_num, MLX4_PORT_CHANGE_SUBTYPE_ACTIVE);
186 }
187 } else { /* request to invalidate GUID */
188 set_and_calc_slave_port_state(dev->dev, slave_id, port_num,
189 MLX4_PORT_STATE_IB_EVENT_GID_INVALID,
190 &gen_event);
191 pr_debug("sending PORT DOWN event to slave: %d, port: %d\n",
192 slave_id, port_num);
193 mlx4_gen_port_state_change_eqe(dev->dev, slave_id, port_num,
194 MLX4_PORT_CHANGE_SUBTYPE_DOWN);
195 }
196 }
197}
198
199static void aliasguid_query_handler(int status,
200 struct ib_sa_guidinfo_rec *guid_rec,
201 void *context)
202{
203 struct mlx4_ib_dev *dev;
204 struct mlx4_alias_guid_work_context *cb_ctx = context;
205 u8 port_index ;
206 int i;
207 struct mlx4_sriov_alias_guid_info_rec_det *rec;
208 unsigned long flags, flags1;
209
210 if (!context)
211 return;
212
213 dev = cb_ctx->dev;
214 port_index = cb_ctx->port - 1;
215 rec = &dev->sriov.alias_guid.ports_guid[port_index].
216 all_rec_per_port[cb_ctx->block_num];
217
218 if (status) {
219 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
220 pr_debug("(port: %d) failed: status = %d\n",
221 cb_ctx->port, status);
222 goto out;
223 }
224
225 if (guid_rec->block_num != cb_ctx->block_num) {
226 pr_err("block num mismatch: %d != %d\n",
227 cb_ctx->block_num, guid_rec->block_num);
228 goto out;
229 }
230
231 pr_debug("lid/port: %d/%d, block_num: %d\n",
232 be16_to_cpu(guid_rec->lid), cb_ctx->port,
233 guid_rec->block_num);
234
235 rec = &dev->sriov.alias_guid.ports_guid[port_index].
236 all_rec_per_port[guid_rec->block_num];
237
238 rec->status = MLX4_GUID_INFO_STATUS_SET;
239 rec->method = MLX4_GUID_INFO_RECORD_SET;
240
241 for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
242 __be64 tmp_cur_ag;
243 tmp_cur_ag = *(__be64 *)&guid_rec->guid_info_list[i * GUID_REC_SIZE];
244 /* check if the SM didn't assign one of the records.
245 * if it didn't, if it was not sysadmin request:
246 * ask the SM to give a new GUID, (instead of the driver request).
247 */
248 if (tmp_cur_ag == MLX4_NOT_SET_GUID) {
249 mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in "
250 "block_num: %d was declined by SM, "
251 "ownership by %d (0 = driver, 1=sysAdmin,"
252 " 2=None)\n", __func__, i,
253 guid_rec->block_num, rec->ownership);
254 if (rec->ownership == MLX4_GUID_DRIVER_ASSIGN) {
255 /* if it is driver assign, asks for new GUID from SM*/
256 *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
257 MLX4_NOT_SET_GUID;
258
259 /* Mark the record as not assigned, and let it
260 * be sent again in the next work sched.*/
261 rec->status = MLX4_GUID_INFO_STATUS_IDLE;
262 rec->guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
263 }
264 } else {
265 /* properly assigned record. */
266 /* We save the GUID we just got from the SM in the
267 * admin_guid in order to be persistent, and in the
268 * request from the sm the process will ask for the same GUID */
269 if (rec->ownership == MLX4_GUID_SYSADMIN_ASSIGN &&
270 tmp_cur_ag != *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]) {
271 /* the sysadmin assignment failed.*/
272 mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set"
273 " admin guid after SysAdmin "
274 "configuration. "
275 "Record num %d in block_num:%d "
276 "was declined by SM, "
277 "new val(0x%llx) was kept\n",
278 __func__, i,
279 guid_rec->block_num,
280 be64_to_cpu(*(__be64 *) &
281 rec->all_recs[i * GUID_REC_SIZE]));
282 } else {
283 memcpy(&rec->all_recs[i * GUID_REC_SIZE],
284 &guid_rec->guid_info_list[i * GUID_REC_SIZE],
285 GUID_REC_SIZE);
286 }
287 }
288 }
289 /*
290 The func is call here to close the cases when the
291 sm doesn't send smp, so in the sa response the driver
292 notifies the slave.
293 */
294 mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
295 cb_ctx->port,
296 guid_rec->guid_info_list);
297out:
298 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
299 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
300 if (!dev->sriov.is_going_down)
301 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
302 &dev->sriov.alias_guid.ports_guid[port_index].
303 alias_guid_work, 0);
304 if (cb_ctx->sa_query) {
305 list_del(&cb_ctx->list);
306 kfree(cb_ctx);
307 } else
308 complete(&cb_ctx->done);
309 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
310 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
311}
312
313static void invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
314{
315 int i;
316 u64 cur_admin_val;
317 ib_sa_comp_mask comp_mask = 0;
318
319 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].status
320 = MLX4_GUID_INFO_STATUS_IDLE;
321 dev->sriov.alias_guid.ports_guid[port - 1].all_rec_per_port[index].method
322 = MLX4_GUID_INFO_RECORD_SET;
323
324 /* calculate the comp_mask for that record.*/
325 for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
326 cur_admin_val =
327 *(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
328 all_rec_per_port[index].all_recs[GUID_REC_SIZE * i];
329 /*
330 check the admin value: if it's for delete (~00LL) or
331 it is the first guid of the first record (hw guid) or
332 the records is not in ownership of the sysadmin and the sm doesn't
333 need to assign GUIDs, then don't put it up for assignment.
334 */
335 if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
336 (!index && !i) ||
337 MLX4_GUID_NONE_ASSIGN == dev->sriov.alias_guid.
338 ports_guid[port - 1].all_rec_per_port[index].ownership)
339 continue;
340 comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
341 }
342 dev->sriov.alias_guid.ports_guid[port - 1].
343 all_rec_per_port[index].guid_indexes = comp_mask;
344}
345
346static int set_guid_rec(struct ib_device *ibdev,
347 u8 port, int index,
348 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
349{
350 int err;
351 struct mlx4_ib_dev *dev = to_mdev(ibdev);
352 struct ib_sa_guidinfo_rec guid_info_rec;
353 ib_sa_comp_mask comp_mask;
354 struct ib_port_attr attr;
355 struct mlx4_alias_guid_work_context *callback_context;
356 unsigned long resched_delay, flags, flags1;
357 struct list_head *head =
358 &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
359
360 err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
361 if (err) {
362 pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
363 err, port);
364 return err;
365 }
366 /*check the port was configured by the sm, otherwise no need to send */
367 if (attr.state != IB_PORT_ACTIVE) {
368 pr_debug("port %d not active...rescheduling\n", port);
369 resched_delay = 5 * HZ;
370 err = -EAGAIN;
371 goto new_schedule;
372 }
373
374 callback_context = kmalloc(sizeof *callback_context, GFP_KERNEL);
375 if (!callback_context) {
376 err = -ENOMEM;
377 resched_delay = HZ * 5;
378 goto new_schedule;
379 }
380 callback_context->port = port;
381 callback_context->dev = dev;
382 callback_context->block_num = index;
383
384 memset(&guid_info_rec, 0, sizeof (struct ib_sa_guidinfo_rec));
385
386 guid_info_rec.lid = cpu_to_be16(attr.lid);
387 guid_info_rec.block_num = index;
388
389 memcpy(guid_info_rec.guid_info_list, rec_det->all_recs,
390 GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC);
391 comp_mask = IB_SA_GUIDINFO_REC_LID | IB_SA_GUIDINFO_REC_BLOCK_NUM |
392 rec_det->guid_indexes;
393
394 init_completion(&callback_context->done);
395 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
396 list_add_tail(&callback_context->list, head);
397 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
398
399 callback_context->query_id =
400 ib_sa_guid_info_rec_query(dev->sriov.alias_guid.sa_client,
401 ibdev, port, &guid_info_rec,
402 comp_mask, rec_det->method, 1000,
403 GFP_KERNEL, aliasguid_query_handler,
404 callback_context,
405 &callback_context->sa_query);
406 if (callback_context->query_id < 0) {
407 pr_debug("ib_sa_guid_info_rec_query failed, query_id: "
408 "%d. will reschedule to the next 1 sec.\n",
409 callback_context->query_id);
410 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
411 list_del(&callback_context->list);
412 kfree(callback_context);
413 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
414 resched_delay = 1 * HZ;
415 err = -EAGAIN;
416 goto new_schedule;
417 }
418 err = 0;
419 goto out;
420
421new_schedule:
422 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
423 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
424 invalidate_guid_record(dev, port, index);
425 if (!dev->sriov.is_going_down) {
426 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
427 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
428 resched_delay);
429 }
430 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
431 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
432
433out:
434 return err;
435}
436
437void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port)
438{
439 int i;
440 unsigned long flags, flags1;
441
442 pr_debug("port %d\n", port);
443
444 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
445 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
446 for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
447 invalidate_guid_record(dev, port, i);
448
449 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) {
450 /*
451 make sure no work waits in the queue, if the work is already
452 queued(not on the timer) the cancel will fail. That is not a problem
453 because we just want the work started.
454 */
455 cancel_delayed_work(&dev->sriov.alias_guid.
456 ports_guid[port - 1].alias_guid_work);
457 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
458 &dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
459 0);
460 }
461 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
462 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
463}
464
465/* The function returns the next record that was
466 * not configured (or failed to be configured) */
467static int get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port,
468 struct mlx4_next_alias_guid_work *rec)
469{
470 int j;
471 unsigned long flags;
472
473 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
474 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags);
475 if (dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status ==
476 MLX4_GUID_INFO_STATUS_IDLE) {
477 memcpy(&rec->rec_det,
478 &dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j],
479 sizeof (struct mlx4_sriov_alias_guid_info_rec_det));
480 rec->port = port;
481 rec->block_num = j;
482 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[j].status =
483 MLX4_GUID_INFO_STATUS_PENDING;
484 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
485 return 0;
486 }
487 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags);
488 }
489 return -ENOENT;
490}
491
492static void set_administratively_guid_record(struct mlx4_ib_dev *dev, int port,
493 int rec_index,
494 struct mlx4_sriov_alias_guid_info_rec_det *rec_det)
495{
496 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].guid_indexes =
497 rec_det->guid_indexes;
498 memcpy(dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].all_recs,
499 rec_det->all_recs, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
500 dev->sriov.alias_guid.ports_guid[port].all_rec_per_port[rec_index].status =
501 rec_det->status;
502}
503
504static void set_all_slaves_guids(struct mlx4_ib_dev *dev, int port)
505{
506 int j;
507 struct mlx4_sriov_alias_guid_info_rec_det rec_det ;
508
509 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT ; j++) {
510 memset(rec_det.all_recs, 0, NUM_ALIAS_GUID_IN_REC * GUID_REC_SIZE);
511 rec_det.guid_indexes = (!j ? 0 : IB_SA_GUIDINFO_REC_GID0) |
512 IB_SA_GUIDINFO_REC_GID1 | IB_SA_GUIDINFO_REC_GID2 |
513 IB_SA_GUIDINFO_REC_GID3 | IB_SA_GUIDINFO_REC_GID4 |
514 IB_SA_GUIDINFO_REC_GID5 | IB_SA_GUIDINFO_REC_GID6 |
515 IB_SA_GUIDINFO_REC_GID7;
516 rec_det.status = MLX4_GUID_INFO_STATUS_IDLE;
517 set_administratively_guid_record(dev, port, j, &rec_det);
518 }
519}
520
521static void alias_guid_work(struct work_struct *work)
522{
523 struct delayed_work *delay = to_delayed_work(work);
524 int ret = 0;
525 struct mlx4_next_alias_guid_work *rec;
526 struct mlx4_sriov_alias_guid_port_rec_det *sriov_alias_port =
527 container_of(delay, struct mlx4_sriov_alias_guid_port_rec_det,
528 alias_guid_work);
529 struct mlx4_sriov_alias_guid *sriov_alias_guid = sriov_alias_port->parent;
530 struct mlx4_ib_sriov *ib_sriov = container_of(sriov_alias_guid,
531 struct mlx4_ib_sriov,
532 alias_guid);
533 struct mlx4_ib_dev *dev = container_of(ib_sriov, struct mlx4_ib_dev, sriov);
534
535 rec = kzalloc(sizeof *rec, GFP_KERNEL);
536 if (!rec) {
537 pr_err("alias_guid_work: No Memory\n");
538 return;
539 }
540
541 pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
542 ret = get_next_record_to_update(dev, sriov_alias_port->port, rec);
543 if (ret) {
544 pr_debug("No more records to update.\n");
545 goto out;
546 }
547
548 set_guid_rec(&dev->ib_dev, rec->port + 1, rec->block_num,
549 &rec->rec_det);
550
551out:
552 kfree(rec);
553}
554
555
556void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
557{
558 unsigned long flags, flags1;
559
560 if (!mlx4_is_master(dev->dev))
561 return;
562 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
563 spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1);
564 if (!dev->sriov.is_going_down) {
565 queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
566 &dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
567 }
568 spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
569 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
570}
571
572void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev)
573{
574 int i;
575 struct mlx4_ib_sriov *sriov = &dev->sriov;
576 struct mlx4_alias_guid_work_context *cb_ctx;
577 struct mlx4_sriov_alias_guid_port_rec_det *det;
578 struct ib_sa_query *sa_query;
579 unsigned long flags;
580
581 for (i = 0 ; i < dev->num_ports; i++) {
582 cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work);
583 det = &sriov->alias_guid.ports_guid[i];
584 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
585 while (!list_empty(&det->cb_list)) {
586 cb_ctx = list_entry(det->cb_list.next,
587 struct mlx4_alias_guid_work_context,
588 list);
589 sa_query = cb_ctx->sa_query;
590 cb_ctx->sa_query = NULL;
591 list_del(&cb_ctx->list);
592 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
593 ib_sa_cancel_query(cb_ctx->query_id, sa_query);
594 wait_for_completion(&cb_ctx->done);
595 kfree(cb_ctx);
596 spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags);
597 }
598 spin_unlock_irqrestore(&sriov->alias_guid.ag_work_lock, flags);
599 }
600 for (i = 0 ; i < dev->num_ports; i++) {
601 flush_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
602 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
603 }
604 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
605 kfree(dev->sriov.alias_guid.sa_client);
606}
607
608int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev)
609{
610 char alias_wq_name[15];
611 int ret = 0;
612 int i, j, k;
613 union ib_gid gid;
614
615 if (!mlx4_is_master(dev->dev))
616 return 0;
617 dev->sriov.alias_guid.sa_client =
618 kzalloc(sizeof *dev->sriov.alias_guid.sa_client, GFP_KERNEL);
619 if (!dev->sriov.alias_guid.sa_client)
620 return -ENOMEM;
621
622 ib_sa_register_client(dev->sriov.alias_guid.sa_client);
623
624 spin_lock_init(&dev->sriov.alias_guid.ag_work_lock);
625
626 for (i = 1; i <= dev->num_ports; ++i) {
627 if (dev->ib_dev.query_gid(&dev->ib_dev , i, 0, &gid)) {
628 ret = -EFAULT;
629 goto err_unregister;
630 }
631 }
632
633 for (i = 0 ; i < dev->num_ports; i++) {
634 memset(&dev->sriov.alias_guid.ports_guid[i], 0,
635 sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
636 /*Check if the SM doesn't need to assign the GUIDs*/
637 for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) {
638 if (mlx4_ib_sm_guid_assign) {
639 dev->sriov.alias_guid.ports_guid[i].
640 all_rec_per_port[j].
641 ownership = MLX4_GUID_DRIVER_ASSIGN;
642 continue;
643 }
644 dev->sriov.alias_guid.ports_guid[i].all_rec_per_port[j].
645 ownership = MLX4_GUID_NONE_ASSIGN;
646 /*mark each val as it was deleted,
647 till the sysAdmin will give it valid val*/
648 for (k = 0; k < NUM_ALIAS_GUID_IN_REC; k++) {
649 *(__be64 *)&dev->sriov.alias_guid.ports_guid[i].
650 all_rec_per_port[j].all_recs[GUID_REC_SIZE * k] =
651 cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL);
652 }
653 }
654 INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list);
655 /*prepare the records, set them to be allocated by sm*/
656 for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
657 invalidate_guid_record(dev, i + 1, j);
658
659 dev->sriov.alias_guid.ports_guid[i].parent = &dev->sriov.alias_guid;
660 dev->sriov.alias_guid.ports_guid[i].port = i;
661 if (mlx4_ib_sm_guid_assign)
662 set_all_slaves_guids(dev, i);
663
664 snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i);
665 dev->sriov.alias_guid.ports_guid[i].wq =
666 create_singlethread_workqueue(alias_wq_name);
667 if (!dev->sriov.alias_guid.ports_guid[i].wq) {
668 ret = -ENOMEM;
669 goto err_thread;
670 }
671 INIT_DELAYED_WORK(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work,
672 alias_guid_work);
673 }
674 return 0;
675
676err_thread:
677 for (--i; i >= 0; i--) {
678 destroy_workqueue(dev->sriov.alias_guid.ports_guid[i].wq);
679 dev->sriov.alias_guid.ports_guid[i].wq = NULL;
680 }
681
682err_unregister:
683 ib_sa_unregister_client(dev->sriov.alias_guid.sa_client);
684 kfree(dev->sriov.alias_guid.sa_client);
685 dev->sriov.alias_guid.sa_client = NULL;
686 pr_err("init_alias_guid_service: Failed. (ret:%d)\n", ret);
687 return ret;
688}
diff --git a/drivers/infiniband/hw/mlx4/cm.c b/drivers/infiniband/hw/mlx4/cm.c
deleted file mode 100644
index dbc99d41605..00000000000
--- a/drivers/infiniband/hw/mlx4/cm.c
+++ /dev/null
@@ -1,437 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34
35#include <linux/mlx4/cmd.h>
36#include <linux/rbtree.h>
37#include <linux/idr.h>
38#include <rdma/ib_cm.h>
39
40#include "mlx4_ib.h"
41
42#define CM_CLEANUP_CACHE_TIMEOUT (5 * HZ)
43
44struct id_map_entry {
45 struct rb_node node;
46
47 u32 sl_cm_id;
48 u32 pv_cm_id;
49 int slave_id;
50 int scheduled_delete;
51 struct mlx4_ib_dev *dev;
52
53 struct list_head list;
54 struct delayed_work timeout;
55};
56
57struct cm_generic_msg {
58 struct ib_mad_hdr hdr;
59
60 __be32 local_comm_id;
61 __be32 remote_comm_id;
62};
63
64struct cm_req_msg {
65 unsigned char unused[0x60];
66 union ib_gid primary_path_sgid;
67};
68
69
70static void set_local_comm_id(struct ib_mad *mad, u32 cm_id)
71{
72 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
73 msg->local_comm_id = cpu_to_be32(cm_id);
74}
75
76static u32 get_local_comm_id(struct ib_mad *mad)
77{
78 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
79
80 return be32_to_cpu(msg->local_comm_id);
81}
82
83static void set_remote_comm_id(struct ib_mad *mad, u32 cm_id)
84{
85 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
86 msg->remote_comm_id = cpu_to_be32(cm_id);
87}
88
89static u32 get_remote_comm_id(struct ib_mad *mad)
90{
91 struct cm_generic_msg *msg = (struct cm_generic_msg *)mad;
92
93 return be32_to_cpu(msg->remote_comm_id);
94}
95
96static union ib_gid gid_from_req_msg(struct ib_device *ibdev, struct ib_mad *mad)
97{
98 struct cm_req_msg *msg = (struct cm_req_msg *)mad;
99
100 return msg->primary_path_sgid;
101}
102
103/* Lock should be taken before called */
104static struct id_map_entry *
105id_map_find_by_sl_id(struct ib_device *ibdev, u32 slave_id, u32 sl_cm_id)
106{
107 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
108 struct rb_node *node = sl_id_map->rb_node;
109
110 while (node) {
111 struct id_map_entry *id_map_entry =
112 rb_entry(node, struct id_map_entry, node);
113
114 if (id_map_entry->sl_cm_id > sl_cm_id)
115 node = node->rb_left;
116 else if (id_map_entry->sl_cm_id < sl_cm_id)
117 node = node->rb_right;
118 else if (id_map_entry->slave_id > slave_id)
119 node = node->rb_left;
120 else if (id_map_entry->slave_id < slave_id)
121 node = node->rb_right;
122 else
123 return id_map_entry;
124 }
125 return NULL;
126}
127
128static void id_map_ent_timeout(struct work_struct *work)
129{
130 struct delayed_work *delay = to_delayed_work(work);
131 struct id_map_entry *ent = container_of(delay, struct id_map_entry, timeout);
132 struct id_map_entry *db_ent, *found_ent;
133 struct mlx4_ib_dev *dev = ent->dev;
134 struct mlx4_ib_sriov *sriov = &dev->sriov;
135 struct rb_root *sl_id_map = &sriov->sl_id_map;
136 int pv_id = (int) ent->pv_cm_id;
137
138 spin_lock(&sriov->id_map_lock);
139 db_ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_id);
140 if (!db_ent)
141 goto out;
142 found_ent = id_map_find_by_sl_id(&dev->ib_dev, ent->slave_id, ent->sl_cm_id);
143 if (found_ent && found_ent == ent)
144 rb_erase(&found_ent->node, sl_id_map);
145 idr_remove(&sriov->pv_id_table, pv_id);
146
147out:
148 list_del(&ent->list);
149 spin_unlock(&sriov->id_map_lock);
150 kfree(ent);
151}
152
153static void id_map_find_del(struct ib_device *ibdev, int pv_cm_id)
154{
155 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
156 struct rb_root *sl_id_map = &sriov->sl_id_map;
157 struct id_map_entry *ent, *found_ent;
158
159 spin_lock(&sriov->id_map_lock);
160 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, pv_cm_id);
161 if (!ent)
162 goto out;
163 found_ent = id_map_find_by_sl_id(ibdev, ent->slave_id, ent->sl_cm_id);
164 if (found_ent && found_ent == ent)
165 rb_erase(&found_ent->node, sl_id_map);
166 idr_remove(&sriov->pv_id_table, pv_cm_id);
167out:
168 spin_unlock(&sriov->id_map_lock);
169}
170
171static void sl_id_map_add(struct ib_device *ibdev, struct id_map_entry *new)
172{
173 struct rb_root *sl_id_map = &to_mdev(ibdev)->sriov.sl_id_map;
174 struct rb_node **link = &sl_id_map->rb_node, *parent = NULL;
175 struct id_map_entry *ent;
176 int slave_id = new->slave_id;
177 int sl_cm_id = new->sl_cm_id;
178
179 ent = id_map_find_by_sl_id(ibdev, slave_id, sl_cm_id);
180 if (ent) {
181 pr_debug("overriding existing sl_id_map entry (cm_id = %x)\n",
182 sl_cm_id);
183
184 rb_replace_node(&ent->node, &new->node, sl_id_map);
185 return;
186 }
187
188 /* Go to the bottom of the tree */
189 while (*link) {
190 parent = *link;
191 ent = rb_entry(parent, struct id_map_entry, node);
192
193 if (ent->sl_cm_id > sl_cm_id || (ent->sl_cm_id == sl_cm_id && ent->slave_id > slave_id))
194 link = &(*link)->rb_left;
195 else
196 link = &(*link)->rb_right;
197 }
198
199 rb_link_node(&new->node, parent, link);
200 rb_insert_color(&new->node, sl_id_map);
201}
202
203static struct id_map_entry *
204id_map_alloc(struct ib_device *ibdev, int slave_id, u32 sl_cm_id)
205{
206 int ret, id;
207 static int next_id;
208 struct id_map_entry *ent;
209 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
210
211 ent = kmalloc(sizeof (struct id_map_entry), GFP_KERNEL);
212 if (!ent) {
213 mlx4_ib_warn(ibdev, "Couldn't allocate id cache entry - out of memory\n");
214 return ERR_PTR(-ENOMEM);
215 }
216
217 ent->sl_cm_id = sl_cm_id;
218 ent->slave_id = slave_id;
219 ent->scheduled_delete = 0;
220 ent->dev = to_mdev(ibdev);
221 INIT_DELAYED_WORK(&ent->timeout, id_map_ent_timeout);
222
223 do {
224 spin_lock(&to_mdev(ibdev)->sriov.id_map_lock);
225 ret = idr_get_new_above(&sriov->pv_id_table, ent,
226 next_id, &id);
227 if (!ret) {
228 next_id = ((unsigned) id + 1) & MAX_IDR_MASK;
229 ent->pv_cm_id = (u32)id;
230 sl_id_map_add(ibdev, ent);
231 }
232
233 spin_unlock(&sriov->id_map_lock);
234 } while (ret == -EAGAIN && idr_pre_get(&sriov->pv_id_table, GFP_KERNEL));
235 /*the function idr_get_new_above can return -ENOSPC, so don't insert in that case.*/
236 if (!ret) {
237 spin_lock(&sriov->id_map_lock);
238 list_add_tail(&ent->list, &sriov->cm_list);
239 spin_unlock(&sriov->id_map_lock);
240 return ent;
241 }
242 /*error flow*/
243 kfree(ent);
244 mlx4_ib_warn(ibdev, "No more space in the idr (err:0x%x)\n", ret);
245 return ERR_PTR(-ENOMEM);
246}
247
248static struct id_map_entry *
249id_map_get(struct ib_device *ibdev, int *pv_cm_id, int sl_cm_id, int slave_id)
250{
251 struct id_map_entry *ent;
252 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
253
254 spin_lock(&sriov->id_map_lock);
255 if (*pv_cm_id == -1) {
256 ent = id_map_find_by_sl_id(ibdev, sl_cm_id, slave_id);
257 if (ent)
258 *pv_cm_id = (int) ent->pv_cm_id;
259 } else
260 ent = (struct id_map_entry *)idr_find(&sriov->pv_id_table, *pv_cm_id);
261 spin_unlock(&sriov->id_map_lock);
262
263 return ent;
264}
265
266static void schedule_delayed(struct ib_device *ibdev, struct id_map_entry *id)
267{
268 struct mlx4_ib_sriov *sriov = &to_mdev(ibdev)->sriov;
269 unsigned long flags;
270
271 spin_lock(&sriov->id_map_lock);
272 spin_lock_irqsave(&sriov->going_down_lock, flags);
273 /*make sure that there is no schedule inside the scheduled work.*/
274 if (!sriov->is_going_down) {
275 id->scheduled_delete = 1;
276 schedule_delayed_work(&id->timeout, CM_CLEANUP_CACHE_TIMEOUT);
277 }
278 spin_unlock_irqrestore(&sriov->going_down_lock, flags);
279 spin_unlock(&sriov->id_map_lock);
280}
281
282int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
283 struct ib_mad *mad)
284{
285 struct id_map_entry *id;
286 u32 sl_cm_id;
287 int pv_cm_id = -1;
288
289 sl_cm_id = get_local_comm_id(mad);
290
291 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID ||
292 mad->mad_hdr.attr_id == CM_REP_ATTR_ID) {
293 id = id_map_alloc(ibdev, slave_id, sl_cm_id);
294 if (IS_ERR(id)) {
295 mlx4_ib_warn(ibdev, "%s: id{slave: %d, sl_cm_id: 0x%x} Failed to id_map_alloc\n",
296 __func__, slave_id, sl_cm_id);
297 return PTR_ERR(id);
298 }
299 } else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID) {
300 return 0;
301 } else {
302 id = id_map_get(ibdev, &pv_cm_id, slave_id, sl_cm_id);
303 }
304
305 if (!id) {
306 pr_debug("id{slave: %d, sl_cm_id: 0x%x} is NULL!\n",
307 slave_id, sl_cm_id);
308 return -EINVAL;
309 }
310
311 set_local_comm_id(mad, id->pv_cm_id);
312
313 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
314 schedule_delayed(ibdev, id);
315 else if (mad->mad_hdr.attr_id == CM_DREP_ATTR_ID)
316 id_map_find_del(ibdev, pv_cm_id);
317
318 return 0;
319}
320
321int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
322 struct ib_mad *mad)
323{
324 u32 pv_cm_id;
325 struct id_map_entry *id;
326
327 if (mad->mad_hdr.attr_id == CM_REQ_ATTR_ID) {
328 union ib_gid gid;
329
330 gid = gid_from_req_msg(ibdev, mad);
331 *slave = mlx4_ib_find_real_gid(ibdev, port, gid.global.interface_id);
332 if (*slave < 0) {
333 mlx4_ib_warn(ibdev, "failed matching slave_id by gid (0x%llx)\n",
334 gid.global.interface_id);
335 return -ENOENT;
336 }
337 return 0;
338 }
339
340 pv_cm_id = get_remote_comm_id(mad);
341 id = id_map_get(ibdev, (int *)&pv_cm_id, -1, -1);
342
343 if (!id) {
344 pr_debug("Couldn't find an entry for pv_cm_id 0x%x\n", pv_cm_id);
345 return -ENOENT;
346 }
347
348 *slave = id->slave_id;
349 set_remote_comm_id(mad, id->sl_cm_id);
350
351 if (mad->mad_hdr.attr_id == CM_DREQ_ATTR_ID)
352 schedule_delayed(ibdev, id);
353 else if (mad->mad_hdr.attr_id == CM_REJ_ATTR_ID ||
354 mad->mad_hdr.attr_id == CM_DREP_ATTR_ID) {
355 id_map_find_del(ibdev, (int) pv_cm_id);
356 }
357
358 return 0;
359}
360
361void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev)
362{
363 spin_lock_init(&dev->sriov.id_map_lock);
364 INIT_LIST_HEAD(&dev->sriov.cm_list);
365 dev->sriov.sl_id_map = RB_ROOT;
366 idr_init(&dev->sriov.pv_id_table);
367 idr_pre_get(&dev->sriov.pv_id_table, GFP_KERNEL);
368}
369
370/* slave = -1 ==> all slaves */
371/* TBD -- call paravirt clean for single slave. Need for slave RESET event */
372void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave)
373{
374 struct mlx4_ib_sriov *sriov = &dev->sriov;
375 struct rb_root *sl_id_map = &sriov->sl_id_map;
376 struct list_head lh;
377 struct rb_node *nd;
378 int need_flush = 1;
379 struct id_map_entry *map, *tmp_map;
380 /* cancel all delayed work queue entries */
381 INIT_LIST_HEAD(&lh);
382 spin_lock(&sriov->id_map_lock);
383 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
384 if (slave < 0 || slave == map->slave_id) {
385 if (map->scheduled_delete)
386 need_flush &= !!cancel_delayed_work(&map->timeout);
387 }
388 }
389
390 spin_unlock(&sriov->id_map_lock);
391
392 if (!need_flush)
393 flush_scheduled_work(); /* make sure all timers were flushed */
394
395 /* now, remove all leftover entries from databases*/
396 spin_lock(&sriov->id_map_lock);
397 if (slave < 0) {
398 while (rb_first(sl_id_map)) {
399 struct id_map_entry *ent =
400 rb_entry(rb_first(sl_id_map),
401 struct id_map_entry, node);
402
403 rb_erase(&ent->node, sl_id_map);
404 idr_remove(&sriov->pv_id_table, (int) ent->pv_cm_id);
405 }
406 list_splice_init(&dev->sriov.cm_list, &lh);
407 } else {
408 /* first, move nodes belonging to slave to db remove list */
409 nd = rb_first(sl_id_map);
410 while (nd) {
411 struct id_map_entry *ent =
412 rb_entry(nd, struct id_map_entry, node);
413 nd = rb_next(nd);
414 if (ent->slave_id == slave)
415 list_move_tail(&ent->list, &lh);
416 }
417 /* remove those nodes from databases */
418 list_for_each_entry_safe(map, tmp_map, &lh, list) {
419 rb_erase(&map->node, sl_id_map);
420 idr_remove(&sriov->pv_id_table, (int) map->pv_cm_id);
421 }
422
423 /* add remaining nodes from cm_list */
424 list_for_each_entry_safe(map, tmp_map, &dev->sriov.cm_list, list) {
425 if (slave == map->slave_id)
426 list_move_tail(&map->list, &lh);
427 }
428 }
429
430 spin_unlock(&sriov->id_map_lock);
431
432 /* free any map entries left behind due to cancel_delayed_work above */
433 list_for_each_entry_safe(map, tmp_map, &lh, list) {
434 list_del(&map->list);
435 kfree(map);
436 }
437}
diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index ae67df35dd4..e8df155bc3b 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -50,7 +50,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
50 struct ib_cq *ibcq; 50 struct ib_cq *ibcq;
51 51
52 if (type != MLX4_EVENT_TYPE_CQ_ERROR) { 52 if (type != MLX4_EVENT_TYPE_CQ_ERROR) {
53 pr_warn("Unexpected event type %d " 53 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
54 "on CQ %06x\n", type, cq->cqn); 54 "on CQ %06x\n", type, cq->cqn);
55 return; 55 return;
56 } 56 }
@@ -66,7 +66,7 @@ static void mlx4_ib_cq_event(struct mlx4_cq *cq, enum mlx4_event type)
66 66
67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n) 67static void *get_cqe_from_buf(struct mlx4_ib_cq_buf *buf, int n)
68{ 68{
69 return mlx4_buf_offset(&buf->buf, n * buf->entry_size); 69 return mlx4_buf_offset(&buf->buf, n * sizeof (struct mlx4_cqe));
70} 70}
71 71
72static void *get_cqe(struct mlx4_ib_cq *cq, int n) 72static void *get_cqe(struct mlx4_ib_cq *cq, int n)
@@ -77,9 +77,8 @@ static void *get_cqe(struct mlx4_ib_cq *cq, int n)
77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n) 77static void *get_sw_cqe(struct mlx4_ib_cq *cq, int n)
78{ 78{
79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe); 79 struct mlx4_cqe *cqe = get_cqe(cq, n & cq->ibcq.cqe);
80 struct mlx4_cqe *tcqe = ((cq->buf.entry_size == 64) ? (cqe + 1) : cqe);
81 80
82 return (!!(tcqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^ 81 return (!!(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK) ^
83 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe; 82 !!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
84} 83}
85 84
@@ -100,13 +99,12 @@ static int mlx4_ib_alloc_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *
100{ 99{
101 int err; 100 int err;
102 101
103 err = mlx4_buf_alloc(dev->dev, nent * dev->dev->caps.cqe_size, 102 err = mlx4_buf_alloc(dev->dev, nent * sizeof(struct mlx4_cqe),
104 PAGE_SIZE * 2, &buf->buf); 103 PAGE_SIZE * 2, &buf->buf);
105 104
106 if (err) 105 if (err)
107 goto out; 106 goto out;
108 107
109 buf->entry_size = dev->dev->caps.cqe_size;
110 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift, 108 err = mlx4_mtt_init(dev->dev, buf->buf.npages, buf->buf.page_shift,
111 &buf->mtt); 109 &buf->mtt);
112 if (err) 110 if (err)
@@ -122,7 +120,8 @@ err_mtt:
122 mlx4_mtt_cleanup(dev->dev, &buf->mtt); 120 mlx4_mtt_cleanup(dev->dev, &buf->mtt);
123 121
124err_buf: 122err_buf:
125 mlx4_buf_free(dev->dev, nent * buf->entry_size, &buf->buf); 123 mlx4_buf_free(dev->dev, nent * sizeof(struct mlx4_cqe),
124 &buf->buf);
126 125
127out: 126out:
128 return err; 127 return err;
@@ -130,7 +129,7 @@ out:
130 129
131static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe) 130static void mlx4_ib_free_cq_buf(struct mlx4_ib_dev *dev, struct mlx4_ib_cq_buf *buf, int cqe)
132{ 131{
133 mlx4_buf_free(dev->dev, (cqe + 1) * buf->entry_size, &buf->buf); 132 mlx4_buf_free(dev->dev, (cqe + 1) * sizeof(struct mlx4_cqe), &buf->buf);
134} 133}
135 134
136static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context, 135static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *context,
@@ -138,9 +137,8 @@ static int mlx4_ib_get_cq_umem(struct mlx4_ib_dev *dev, struct ib_ucontext *cont
138 u64 buf_addr, int cqe) 137 u64 buf_addr, int cqe)
139{ 138{
140 int err; 139 int err;
141 int cqe_size = dev->dev->caps.cqe_size;
142 140
143 *umem = ib_umem_get(context, buf_addr, cqe * cqe_size, 141 *umem = ib_umem_get(context, buf_addr, cqe * sizeof (struct mlx4_cqe),
144 IB_ACCESS_LOCAL_WRITE, 1); 142 IB_ACCESS_LOCAL_WRITE, 1);
145 if (IS_ERR(*umem)) 143 if (IS_ERR(*umem))
146 return PTR_ERR(*umem); 144 return PTR_ERR(*umem);
@@ -224,9 +222,6 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
224 uar = &dev->priv_uar; 222 uar = &dev->priv_uar;
225 } 223 }
226 224
227 if (dev->eq_table)
228 vector = dev->eq_table[vector % ibdev->num_comp_vectors];
229
230 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar, 225 err = mlx4_cq_alloc(dev->dev, entries, &cq->buf.mtt, uar,
231 cq->db.dma, &cq->mcq, vector, 0); 226 cq->db.dma, &cq->mcq, vector, 0);
232 if (err) 227 if (err)
@@ -333,23 +328,16 @@ static void mlx4_ib_cq_resize_copy_cqes(struct mlx4_ib_cq *cq)
333{ 328{
334 struct mlx4_cqe *cqe, *new_cqe; 329 struct mlx4_cqe *cqe, *new_cqe;
335 int i; 330 int i;
336 int cqe_size = cq->buf.entry_size;
337 int cqe_inc = cqe_size == 64 ? 1 : 0;
338 331
339 i = cq->mcq.cons_index; 332 i = cq->mcq.cons_index;
340 cqe = get_cqe(cq, i & cq->ibcq.cqe); 333 cqe = get_cqe(cq, i & cq->ibcq.cqe);
341 cqe += cqe_inc;
342
343 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) { 334 while ((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) != MLX4_CQE_OPCODE_RESIZE) {
344 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf, 335 new_cqe = get_cqe_from_buf(&cq->resize_buf->buf,
345 (i + 1) & cq->resize_buf->cqe); 336 (i + 1) & cq->resize_buf->cqe);
346 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), cqe_size); 337 memcpy(new_cqe, get_cqe(cq, i & cq->ibcq.cqe), sizeof(struct mlx4_cqe));
347 new_cqe += cqe_inc;
348
349 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) | 338 new_cqe->owner_sr_opcode = (cqe->owner_sr_opcode & ~MLX4_CQE_OWNER_MASK) |
350 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0); 339 (((i + 1) & (cq->resize_buf->cqe + 1)) ? MLX4_CQE_OWNER_MASK : 0);
351 cqe = get_cqe(cq, ++i & cq->ibcq.cqe); 340 cqe = get_cqe(cq, ++i & cq->ibcq.cqe);
352 cqe += cqe_inc;
353 } 341 }
354 ++cq->mcq.cons_index; 342 ++cq->mcq.cons_index;
355} 343}
@@ -447,7 +435,6 @@ err_buf:
447 435
448out: 436out:
449 mutex_unlock(&cq->resize_mutex); 437 mutex_unlock(&cq->resize_mutex);
450
451 return err; 438 return err;
452} 439}
453 440
@@ -476,7 +463,7 @@ static void dump_cqe(void *cqe)
476{ 463{
477 __be32 *buf = cqe; 464 __be32 *buf = cqe;
478 465
479 pr_debug("CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n", 466 printk(KERN_DEBUG "CQE contents %08x %08x %08x %08x %08x %08x %08x %08x\n",
480 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]), 467 be32_to_cpu(buf[0]), be32_to_cpu(buf[1]), be32_to_cpu(buf[2]),
481 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]), 468 be32_to_cpu(buf[3]), be32_to_cpu(buf[4]), be32_to_cpu(buf[5]),
482 be32_to_cpu(buf[6]), be32_to_cpu(buf[7])); 469 be32_to_cpu(buf[6]), be32_to_cpu(buf[7]));
@@ -486,7 +473,7 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
486 struct ib_wc *wc) 473 struct ib_wc *wc)
487{ 474{
488 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) { 475 if (cqe->syndrome == MLX4_CQE_SYNDROME_LOCAL_QP_OP_ERR) {
489 pr_debug("local QP operation err " 476 printk(KERN_DEBUG "local QP operation err "
490 "(QPN %06x, WQE index %x, vendor syndrome %02x, " 477 "(QPN %06x, WQE index %x, vendor syndrome %02x, "
491 "opcode = %02x)\n", 478 "opcode = %02x)\n",
492 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index), 479 be32_to_cpu(cqe->my_qpn), be16_to_cpu(cqe->wqe_index),
@@ -557,26 +544,6 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum)
557 checksum == cpu_to_be16(0xffff); 544 checksum == cpu_to_be16(0xffff);
558} 545}
559 546
560static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc,
561 unsigned tail, struct mlx4_cqe *cqe)
562{
563 struct mlx4_ib_proxy_sqp_hdr *hdr;
564
565 ib_dma_sync_single_for_cpu(qp->ibqp.device,
566 qp->sqp_proxy_rcv[tail].map,
567 sizeof (struct mlx4_ib_proxy_sqp_hdr),
568 DMA_FROM_DEVICE);
569 hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr);
570 wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index);
571 wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32);
572 wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12);
573 wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF;
574 wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0;
575 wc->dlid_path_bits = 0;
576
577 return 0;
578}
579
580static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, 547static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
581 struct mlx4_ib_qp **cur_qp, 548 struct mlx4_ib_qp **cur_qp,
582 struct ib_wc *wc) 549 struct ib_wc *wc)
@@ -589,16 +556,12 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
589 int is_error; 556 int is_error;
590 u32 g_mlpath_rqpn; 557 u32 g_mlpath_rqpn;
591 u16 wqe_ctr; 558 u16 wqe_ctr;
592 unsigned tail = 0;
593 559
594repoll: 560repoll:
595 cqe = next_cqe_sw(cq); 561 cqe = next_cqe_sw(cq);
596 if (!cqe) 562 if (!cqe)
597 return -EAGAIN; 563 return -EAGAIN;
598 564
599 if (cq->buf.entry_size == 64)
600 cqe++;
601
602 ++cq->mcq.cons_index; 565 ++cq->mcq.cons_index;
603 566
604 /* 567 /*
@@ -613,7 +576,7 @@ repoll:
613 576
614 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && 577 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP &&
615 is_send)) { 578 is_send)) {
616 pr_warn("Completion for NOP opcode detected!\n"); 579 printk(KERN_WARNING "Completion for NOP opcode detected!\n");
617 return -EINVAL; 580 return -EINVAL;
618 } 581 }
619 582
@@ -643,7 +606,7 @@ repoll:
643 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, 606 mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev,
644 be32_to_cpu(cqe->vlan_my_qpn)); 607 be32_to_cpu(cqe->vlan_my_qpn));
645 if (unlikely(!mqp)) { 608 if (unlikely(!mqp)) {
646 pr_warn("CQ %06x with entry for unknown QPN %06x\n", 609 printk(KERN_WARNING "CQ %06x with entry for unknown QPN %06x\n",
647 cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); 610 cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK);
648 return -EINVAL; 611 return -EINVAL;
649 } 612 }
@@ -668,8 +631,7 @@ repoll:
668 mlx4_ib_free_srq_wqe(srq, wqe_ctr); 631 mlx4_ib_free_srq_wqe(srq, wqe_ctr);
669 } else { 632 } else {
670 wq = &(*cur_qp)->rq; 633 wq = &(*cur_qp)->rq;
671 tail = wq->tail & (wq->wqe_cnt - 1); 634 wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
672 wc->wr_id = wq->wrid[tail];
673 ++wq->tail; 635 ++wq->tail;
674 } 636 }
675 637
@@ -752,26 +714,14 @@ repoll:
752 break; 714 break;
753 } 715 }
754 716
755 if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) {
756 if ((*cur_qp)->mlx4_ib_qp_type &
757 (MLX4_IB_QPT_PROXY_SMI_OWNER |
758 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
759 return use_tunnel_data(*cur_qp, cq, wc, tail, cqe);
760 }
761
762 wc->slid = be16_to_cpu(cqe->rlid); 717 wc->slid = be16_to_cpu(cqe->rlid);
718 wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
763 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); 719 g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn);
764 wc->src_qp = g_mlpath_rqpn & 0xffffff; 720 wc->src_qp = g_mlpath_rqpn & 0xffffff;
765 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; 721 wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
766 wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; 722 wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
767 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; 723 wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
768 wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, 724 wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum);
769 cqe->checksum) ? IB_WC_IP_CSUM_OK : 0;
770 if (rdma_port_get_link_layer(wc->qp->device,
771 (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET)
772 wc->sl = be16_to_cpu(cqe->sl_vid) >> 13;
773 else
774 wc->sl = be16_to_cpu(cqe->sl_vid) >> 12;
775 } 725 }
776 726
777 return 0; 727 return 0;
@@ -793,7 +743,8 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
793 break; 743 break;
794 } 744 }
795 745
796 mlx4_cq_set_ci(&cq->mcq); 746 if (npolled)
747 mlx4_cq_set_ci(&cq->mcq);
797 748
798 spin_unlock_irqrestore(&cq->lock, flags); 749 spin_unlock_irqrestore(&cq->lock, flags);
799 750
@@ -820,7 +771,6 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
820 int nfreed = 0; 771 int nfreed = 0;
821 struct mlx4_cqe *cqe, *dest; 772 struct mlx4_cqe *cqe, *dest;
822 u8 owner_bit; 773 u8 owner_bit;
823 int cqe_inc = cq->buf.entry_size == 64 ? 1 : 0;
824 774
825 /* 775 /*
826 * First we need to find the current producer index, so we 776 * First we need to find the current producer index, so we
@@ -839,16 +789,12 @@ void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq)
839 */ 789 */
840 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) { 790 while ((int) --prod_index - (int) cq->mcq.cons_index >= 0) {
841 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe); 791 cqe = get_cqe(cq, prod_index & cq->ibcq.cqe);
842 cqe += cqe_inc;
843
844 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) { 792 if ((be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK) == qpn) {
845 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK)) 793 if (srq && !(cqe->owner_sr_opcode & MLX4_CQE_IS_SEND_MASK))
846 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index)); 794 mlx4_ib_free_srq_wqe(srq, be16_to_cpu(cqe->wqe_index));
847 ++nfreed; 795 ++nfreed;
848 } else if (nfreed) { 796 } else if (nfreed) {
849 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe); 797 dest = get_cqe(cq, (prod_index + nfreed) & cq->ibcq.cqe);
850 dest += cqe_inc;
851
852 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK; 798 owner_bit = dest->owner_sr_opcode & MLX4_CQE_OWNER_MASK;
853 memcpy(dest, cqe, sizeof *cqe); 799 memcpy(dest, cqe, sizeof *cqe);
854 dest->owner_sr_opcode = owner_bit | 800 dest->owner_sr_opcode = owner_bit |
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 0a903c129f0..f36da994a85 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -32,10 +32,7 @@
32 32
33#include <rdma/ib_mad.h> 33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h> 34#include <rdma/ib_smi.h>
35#include <rdma/ib_sa.h>
36#include <rdma/ib_cache.h>
37 35
38#include <linux/random.h>
39#include <linux/mlx4/cmd.h> 36#include <linux/mlx4/cmd.h>
40#include <linux/gfp.h> 37#include <linux/gfp.h>
41#include <rdma/ib_pma.h> 38#include <rdma/ib_pma.h>
@@ -47,62 +44,7 @@ enum {
47 MLX4_IB_VENDOR_CLASS2 = 0xa 44 MLX4_IB_VENDOR_CLASS2 = 0xa
48}; 45};
49 46
50#define MLX4_TUN_SEND_WRID_SHIFT 34 47int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
51#define MLX4_TUN_QPN_SHIFT 32
52#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
53#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
54
55#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
56#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
57
58 /* Port mgmt change event handling */
59
60#define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
61#define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
62#define NUM_IDX_IN_PKEY_TBL_BLK 32
63#define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
64#define GUID_TBL_BLK_NUM_ENTRIES 8
65#define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
66
67struct mlx4_mad_rcv_buf {
68 struct ib_grh grh;
69 u8 payload[256];
70} __packed;
71
72struct mlx4_mad_snd_buf {
73 u8 payload[256];
74} __packed;
75
76struct mlx4_tunnel_mad {
77 struct ib_grh grh;
78 struct mlx4_ib_tunnel_header hdr;
79 struct ib_mad mad;
80} __packed;
81
82struct mlx4_rcv_tunnel_mad {
83 struct mlx4_rcv_tunnel_hdr hdr;
84 struct ib_grh grh;
85 struct ib_mad mad;
86} __packed;
87
88static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
89static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
90static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
91 int block, u32 change_bitmap);
92
93__be64 mlx4_ib_gen_node_guid(void)
94{
95#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
96 return cpu_to_be64(NODE_GUID_HI | random32());
97}
98
99__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
100{
101 return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
102 cpu_to_be64(0xff00000000000000LL);
103}
104
105int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
106 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 48 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
107 void *in_mad, void *response_mad) 49 void *in_mad, void *response_mad)
108{ 50{
@@ -129,13 +71,10 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
129 * Key check traps can't be generated unless we have in_wc to 71 * Key check traps can't be generated unless we have in_wc to
130 * tell us where to send the trap. 72 * tell us where to send the trap.
131 */ 73 */
132 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc) 74 if (ignore_mkey || !in_wc)
133 op_modifier |= 0x1; 75 op_modifier |= 0x1;
134 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc) 76 if (ignore_bkey || !in_wc)
135 op_modifier |= 0x2; 77 op_modifier |= 0x2;
136 if (mlx4_is_mfunc(dev->dev) &&
137 (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
138 op_modifier |= 0x8;
139 78
140 if (in_wc) { 79 if (in_wc) {
141 struct { 80 struct {
@@ -168,10 +107,9 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
168 in_modifier |= in_wc->slid << 16; 107 in_modifier |= in_wc->slid << 16;
169 } 108 }
170 109
171 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, 110 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
172 mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier, 111 in_modifier, op_modifier,
173 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, 112 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
174 (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
175 113
176 if (!err) 114 if (!err)
177 memcpy(response_mad, outmailbox->buf, 256); 115 memcpy(response_mad, outmailbox->buf, 256);
@@ -186,7 +124,6 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
186{ 124{
187 struct ib_ah *new_ah; 125 struct ib_ah *new_ah;
188 struct ib_ah_attr ah_attr; 126 struct ib_ah_attr ah_attr;
189 unsigned long flags;
190 127
191 if (!dev->send_agent[port_num - 1][0]) 128 if (!dev->send_agent[port_num - 1][0])
192 return; 129 return;
@@ -201,134 +138,53 @@ static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
201 if (IS_ERR(new_ah)) 138 if (IS_ERR(new_ah))
202 return; 139 return;
203 140
204 spin_lock_irqsave(&dev->sm_lock, flags); 141 spin_lock(&dev->sm_lock);
205 if (dev->sm_ah[port_num - 1]) 142 if (dev->sm_ah[port_num - 1])
206 ib_destroy_ah(dev->sm_ah[port_num - 1]); 143 ib_destroy_ah(dev->sm_ah[port_num - 1]);
207 dev->sm_ah[port_num - 1] = new_ah; 144 dev->sm_ah[port_num - 1] = new_ah;
208 spin_unlock_irqrestore(&dev->sm_lock, flags); 145 spin_unlock(&dev->sm_lock);
209} 146}
210 147
211/* 148/*
212 * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can 149 * Snoop SM MADs for port info and P_Key table sets, so we can
213 * synthesize LID change, Client-Rereg, GID change, and P_Key change events. 150 * synthesize LID change and P_Key change events.
214 */ 151 */
215static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, 152static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad,
216 u16 prev_lid) 153 u16 prev_lid)
217{ 154{
218 struct ib_port_info *pinfo; 155 struct ib_event event;
219 u16 lid;
220 __be16 *base;
221 u32 bn, pkey_change_bitmap;
222 int i;
223
224 156
225 struct mlx4_ib_dev *dev = to_mdev(ibdev);
226 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || 157 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
227 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 158 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
228 mad->mad_hdr.method == IB_MGMT_METHOD_SET) 159 mad->mad_hdr.method == IB_MGMT_METHOD_SET) {
229 switch (mad->mad_hdr.attr_id) { 160 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO) {
230 case IB_SMP_ATTR_PORT_INFO: 161 struct ib_port_info *pinfo =
231 pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; 162 (struct ib_port_info *) ((struct ib_smp *) mad)->data;
232 lid = be16_to_cpu(pinfo->lid); 163 u16 lid = be16_to_cpu(pinfo->lid);
233 164
234 update_sm_ah(dev, port_num, 165 update_sm_ah(to_mdev(ibdev), port_num,
235 be16_to_cpu(pinfo->sm_lid), 166 be16_to_cpu(pinfo->sm_lid),
236 pinfo->neighbormtu_mastersmsl & 0xf); 167 pinfo->neighbormtu_mastersmsl & 0xf);
237 168
238 if (pinfo->clientrereg_resv_subnetto & 0x80) 169 event.device = ibdev;
239 handle_client_rereg_event(dev, port_num); 170 event.element.port_num = port_num;
240
241 if (prev_lid != lid)
242 handle_lid_change_event(dev, port_num);
243 break;
244
245 case IB_SMP_ATTR_PKEY_TABLE:
246 if (!mlx4_is_mfunc(dev->dev)) {
247 mlx4_ib_dispatch_event(dev, port_num,
248 IB_EVENT_PKEY_CHANGE);
249 break;
250 }
251 171
252 /* at this point, we are running in the master. 172 if (pinfo->clientrereg_resv_subnetto & 0x80) {
253 * Slaves do not receive SMPs. 173 event.event = IB_EVENT_CLIENT_REREGISTER;
254 */ 174 ib_dispatch_event(&event);
255 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
256 base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
257 pkey_change_bitmap = 0;
258 for (i = 0; i < 32; i++) {
259 pr_debug("PKEY[%d] = x%x\n",
260 i + bn*32, be16_to_cpu(base[i]));
261 if (be16_to_cpu(base[i]) !=
262 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
263 pkey_change_bitmap |= (1 << i);
264 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
265 be16_to_cpu(base[i]);
266 }
267 } 175 }
268 pr_debug("PKEY Change event: port=%d, "
269 "block=0x%x, change_bitmap=0x%x\n",
270 port_num, bn, pkey_change_bitmap);
271 176
272 if (pkey_change_bitmap) { 177 if (prev_lid != lid) {
273 mlx4_ib_dispatch_event(dev, port_num, 178 event.event = IB_EVENT_LID_CHANGE;
274 IB_EVENT_PKEY_CHANGE); 179 ib_dispatch_event(&event);
275 if (!dev->sriov.is_going_down)
276 __propagate_pkey_ev(dev, port_num, bn,
277 pkey_change_bitmap);
278 } 180 }
279 break;
280
281 case IB_SMP_ATTR_GUID_INFO:
282 /* paravirtualized master's guid is guid 0 -- does not change */
283 if (!mlx4_is_master(dev->dev))
284 mlx4_ib_dispatch_event(dev, port_num,
285 IB_EVENT_GID_CHANGE);
286 /*if master, notify relevant slaves*/
287 if (mlx4_is_master(dev->dev) &&
288 !dev->sriov.is_going_down) {
289 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
290 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
291 (u8 *)(&((struct ib_smp *)mad)->data));
292 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
293 (u8 *)(&((struct ib_smp *)mad)->data));
294 }
295 break;
296
297 default:
298 break;
299 } 181 }
300}
301 182
302static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, 183 if (mad->mad_hdr.attr_id == IB_SMP_ATTR_PKEY_TABLE) {
303 int block, u32 change_bitmap) 184 event.device = ibdev;
304{ 185 event.event = IB_EVENT_PKEY_CHANGE;
305 int i, ix, slave, err; 186 event.element.port_num = port_num;
306 int have_event = 0; 187 ib_dispatch_event(&event);
307
308 for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
309 if (slave == mlx4_master_func_num(dev->dev))
310 continue;
311 if (!mlx4_is_slave_active(dev->dev, slave))
312 continue;
313
314 have_event = 0;
315 for (i = 0; i < 32; i++) {
316 if (!(change_bitmap & (1 << i)))
317 continue;
318 for (ix = 0;
319 ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
320 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
321 [ix] == i + 32 * block) {
322 err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
323 pr_debug("propagate_pkey_ev: slave %d,"
324 " port %d, ix %d (%d)\n",
325 slave, port_num, ix, err);
326 have_event = 1;
327 break;
328 }
329 }
330 if (have_event)
331 break;
332 } 188 }
333 } 189 }
334} 190}
@@ -336,15 +192,13 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
336static void node_desc_override(struct ib_device *dev, 192static void node_desc_override(struct ib_device *dev,
337 struct ib_mad *mad) 193 struct ib_mad *mad)
338{ 194{
339 unsigned long flags;
340
341 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || 195 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
342 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) && 196 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
343 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && 197 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
344 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { 198 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
345 spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); 199 spin_lock(&to_mdev(dev)->sm_lock);
346 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); 200 memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64);
347 spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); 201 spin_unlock(&to_mdev(dev)->sm_lock);
348 } 202 }
349} 203}
350 204
@@ -354,7 +208,6 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
354 struct ib_mad_send_buf *send_buf; 208 struct ib_mad_send_buf *send_buf;
355 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn]; 209 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
356 int ret; 210 int ret;
357 unsigned long flags;
358 211
359 if (agent) { 212 if (agent) {
360 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR, 213 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
@@ -367,276 +220,19 @@ static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, struct ib_mad *ma
367 * wrong following the IB spec strictly, but we know 220 * wrong following the IB spec strictly, but we know
368 * it's OK for our devices). 221 * it's OK for our devices).
369 */ 222 */
370 spin_lock_irqsave(&dev->sm_lock, flags); 223 spin_lock(&dev->sm_lock);
371 memcpy(send_buf->mad, mad, sizeof *mad); 224 memcpy(send_buf->mad, mad, sizeof *mad);
372 if ((send_buf->ah = dev->sm_ah[port_num - 1])) 225 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
373 ret = ib_post_send_mad(send_buf, NULL); 226 ret = ib_post_send_mad(send_buf, NULL);
374 else 227 else
375 ret = -EINVAL; 228 ret = -EINVAL;
376 spin_unlock_irqrestore(&dev->sm_lock, flags); 229 spin_unlock(&dev->sm_lock);
377 230
378 if (ret) 231 if (ret)
379 ib_free_send_mad(send_buf); 232 ib_free_send_mad(send_buf);
380 } 233 }
381} 234}
382 235
383static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
384 struct ib_sa_mad *sa_mad)
385{
386 int ret = 0;
387
388 /* dispatch to different sa handlers */
389 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
390 case IB_SA_ATTR_MC_MEMBER_REC:
391 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
392 break;
393 default:
394 break;
395 }
396 return ret;
397}
398
399int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
400{
401 struct mlx4_ib_dev *dev = to_mdev(ibdev);
402 int i;
403
404 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
405 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
406 return i;
407 }
408 return -1;
409}
410
411
412static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
413 u8 port, u16 pkey, u16 *ix)
414{
415 int i, ret;
416 u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
417 u16 slot_pkey;
418
419 if (slave == mlx4_master_func_num(dev->dev))
420 return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
421
422 unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
423
424 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
425 if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
426 continue;
427
428 pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
429
430 ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
431 if (ret)
432 continue;
433 if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
434 if (slot_pkey & 0x8000) {
435 *ix = (u16) pkey_ix;
436 return 0;
437 } else {
438 /* take first partial pkey index found */
439 if (partial_ix == 0xFF)
440 partial_ix = pkey_ix;
441 }
442 }
443 }
444
445 if (partial_ix < 0xFF) {
446 *ix = (u16) partial_ix;
447 return 0;
448 }
449
450 return -EINVAL;
451}
452
453int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
454 enum ib_qp_type dest_qpt, struct ib_wc *wc,
455 struct ib_grh *grh, struct ib_mad *mad)
456{
457 struct ib_sge list;
458 struct ib_send_wr wr, *bad_wr;
459 struct mlx4_ib_demux_pv_ctx *tun_ctx;
460 struct mlx4_ib_demux_pv_qp *tun_qp;
461 struct mlx4_rcv_tunnel_mad *tun_mad;
462 struct ib_ah_attr attr;
463 struct ib_ah *ah;
464 struct ib_qp *src_qp = NULL;
465 unsigned tun_tx_ix = 0;
466 int dqpn;
467 int ret = 0;
468 u16 tun_pkey_ix;
469 u16 cached_pkey;
470
471 if (dest_qpt > IB_QPT_GSI)
472 return -EINVAL;
473
474 tun_ctx = dev->sriov.demux[port-1].tun[slave];
475
476 /* check if proxy qp created */
477 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
478 return -EAGAIN;
479
480 /* QP0 forwarding only for Dom0 */
481 if (!dest_qpt && (mlx4_master_func_num(dev->dev) != slave))
482 return -EINVAL;
483
484 if (!dest_qpt)
485 tun_qp = &tun_ctx->qp[0];
486 else
487 tun_qp = &tun_ctx->qp[1];
488
489 /* compute P_Key index to put in tunnel header for slave */
490 if (dest_qpt) {
491 u16 pkey_ix;
492 ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
493 if (ret)
494 return -EINVAL;
495
496 ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
497 if (ret)
498 return -EINVAL;
499 tun_pkey_ix = pkey_ix;
500 } else
501 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
502
503 dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
504
505 /* get tunnel tx data buf for slave */
506 src_qp = tun_qp->qp;
507
508 /* create ah. Just need an empty one with the port num for the post send.
509 * The driver will set the force loopback bit in post_send */
510 memset(&attr, 0, sizeof attr);
511 attr.port_num = port;
512 ah = ib_create_ah(tun_ctx->pd, &attr);
513 if (IS_ERR(ah))
514 return -ENOMEM;
515
516 /* allocate tunnel tx buf after pass failure returns */
517 spin_lock(&tun_qp->tx_lock);
518 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
519 (MLX4_NUM_TUNNEL_BUFS - 1))
520 ret = -EAGAIN;
521 else
522 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
523 spin_unlock(&tun_qp->tx_lock);
524 if (ret)
525 goto out;
526
527 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
528 if (tun_qp->tx_ring[tun_tx_ix].ah)
529 ib_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
530 tun_qp->tx_ring[tun_tx_ix].ah = ah;
531 ib_dma_sync_single_for_cpu(&dev->ib_dev,
532 tun_qp->tx_ring[tun_tx_ix].buf.map,
533 sizeof (struct mlx4_rcv_tunnel_mad),
534 DMA_TO_DEVICE);
535
536 /* copy over to tunnel buffer */
537 if (grh)
538 memcpy(&tun_mad->grh, grh, sizeof *grh);
539 memcpy(&tun_mad->mad, mad, sizeof *mad);
540
541 /* adjust tunnel data */
542 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
543 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
544 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
545 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
546 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
547
548 ib_dma_sync_single_for_device(&dev->ib_dev,
549 tun_qp->tx_ring[tun_tx_ix].buf.map,
550 sizeof (struct mlx4_rcv_tunnel_mad),
551 DMA_TO_DEVICE);
552
553 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
554 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
555 list.lkey = tun_ctx->mr->lkey;
556
557 wr.wr.ud.ah = ah;
558 wr.wr.ud.port_num = port;
559 wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
560 wr.wr.ud.remote_qpn = dqpn;
561 wr.next = NULL;
562 wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
563 wr.sg_list = &list;
564 wr.num_sge = 1;
565 wr.opcode = IB_WR_SEND;
566 wr.send_flags = IB_SEND_SIGNALED;
567
568 ret = ib_post_send(src_qp, &wr, &bad_wr);
569out:
570 if (ret)
571 ib_destroy_ah(ah);
572 return ret;
573}
574
575static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
576 struct ib_wc *wc, struct ib_grh *grh,
577 struct ib_mad *mad)
578{
579 struct mlx4_ib_dev *dev = to_mdev(ibdev);
580 int err;
581 int slave;
582 u8 *slave_id;
583
584 /* Initially assume that this mad is for us */
585 slave = mlx4_master_func_num(dev->dev);
586
587 /* See if the slave id is encoded in a response mad */
588 if (mad->mad_hdr.method & 0x80) {
589 slave_id = (u8 *) &mad->mad_hdr.tid;
590 slave = *slave_id;
591 if (slave != 255) /*255 indicates the dom0*/
592 *slave_id = 0; /* remap tid */
593 }
594
595 /* If a grh is present, we demux according to it */
596 if (wc->wc_flags & IB_WC_GRH) {
597 slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id);
598 if (slave < 0) {
599 mlx4_ib_warn(ibdev, "failed matching grh\n");
600 return -ENOENT;
601 }
602 }
603 /* Class-specific handling */
604 switch (mad->mad_hdr.mgmt_class) {
605 case IB_MGMT_CLASS_SUBN_ADM:
606 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
607 (struct ib_sa_mad *) mad))
608 return 0;
609 break;
610 case IB_MGMT_CLASS_CM:
611 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
612 return 0;
613 break;
614 case IB_MGMT_CLASS_DEVICE_MGMT:
615 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
616 return 0;
617 break;
618 default:
619 /* Drop unsupported classes for slaves in tunnel mode */
620 if (slave != mlx4_master_func_num(dev->dev)) {
621 pr_debug("dropping unsupported ingress mad from class:%d "
622 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
623 return 0;
624 }
625 }
626 /*make sure that no slave==255 was not handled yet.*/
627 if (slave >= dev->dev->caps.sqp_demux) {
628 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
629 slave, dev->dev->caps.sqp_demux);
630 return -ENOENT;
631 }
632
633 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
634 if (err)
635 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
636 slave, err);
637 return 0;
638}
639
640static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 236static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
641 struct ib_wc *in_wc, struct ib_grh *in_grh, 237 struct ib_wc *in_wc, struct ib_grh *in_grh,
642 struct ib_mad *in_mad, struct ib_mad *out_mad) 238 struct ib_mad *in_mad, struct ib_mad *out_mad)
@@ -645,25 +241,6 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
645 int err; 241 int err;
646 struct ib_port_attr pattr; 242 struct ib_port_attr pattr;
647 243
648 if (in_wc && in_wc->qp->qp_num) {
649 pr_debug("received MAD: slid:%d sqpn:%d "
650 "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
651 in_wc->slid, in_wc->src_qp,
652 in_wc->dlid_path_bits,
653 in_wc->qp->qp_num,
654 in_wc->wc_flags,
655 in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
656 be16_to_cpu(in_mad->mad_hdr.attr_id));
657 if (in_wc->wc_flags & IB_WC_GRH) {
658 pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
659 be64_to_cpu(in_grh->sgid.global.subnet_prefix),
660 be64_to_cpu(in_grh->sgid.global.interface_id));
661 pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
662 be64_to_cpu(in_grh->dgid.global.subnet_prefix),
663 be64_to_cpu(in_grh->dgid.global.interface_id));
664 }
665 }
666
667 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE); 244 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
668 245
669 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) { 246 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
@@ -679,9 +256,12 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
679 return IB_MAD_RESULT_SUCCESS; 256 return IB_MAD_RESULT_SUCCESS;
680 257
681 /* 258 /*
682 * Don't process SMInfo queries -- the SMA can't handle them. 259 * Don't process SMInfo queries or vendor-specific
260 * MADs -- the SMA can't handle them.
683 */ 261 */
684 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) 262 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO ||
263 ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) ==
264 IB_SMP_ATTR_VENDOR_MASK))
685 return IB_MAD_RESULT_SUCCESS; 265 return IB_MAD_RESULT_SUCCESS;
686 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || 266 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
687 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || 267 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
@@ -701,19 +281,15 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
701 prev_lid = pattr.lid; 281 prev_lid = pattr.lid;
702 282
703 err = mlx4_MAD_IFC(to_mdev(ibdev), 283 err = mlx4_MAD_IFC(to_mdev(ibdev),
704 (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) | 284 mad_flags & IB_MAD_IGNORE_MKEY,
705 (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) | 285 mad_flags & IB_MAD_IGNORE_BKEY,
706 MLX4_MAD_IFC_NET_VIEW,
707 port_num, in_wc, in_grh, in_mad, out_mad); 286 port_num, in_wc, in_grh, in_mad, out_mad);
708 if (err) 287 if (err)
709 return IB_MAD_RESULT_FAILURE; 288 return IB_MAD_RESULT_FAILURE;
710 289
711 if (!out_mad->mad_hdr.status) { 290 if (!out_mad->mad_hdr.status) {
712 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) 291 smp_snoop(ibdev, port_num, in_mad, prev_lid);
713 smp_snoop(ibdev, port_num, in_mad, prev_lid); 292 node_desc_override(ibdev, out_mad);
714 /* slaves get node desc from FW */
715 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
716 node_desc_override(ibdev, out_mad);
717 } 293 }
718 294
719 /* set return bit in status of directed route responses */ 295 /* set return bit in status of directed route responses */
@@ -754,8 +330,7 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
754 return IB_MAD_RESULT_FAILURE; 330 return IB_MAD_RESULT_FAILURE;
755 331
756 err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, 332 err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
757 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, 333 MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
758 MLX4_CMD_WRAPPED);
759 if (err) 334 if (err)
760 err = IB_MAD_RESULT_FAILURE; 335 err = IB_MAD_RESULT_FAILURE;
761 else { 336 else {
@@ -796,8 +371,6 @@ int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
796static void send_handler(struct ib_mad_agent *agent, 371static void send_handler(struct ib_mad_agent *agent,
797 struct ib_mad_send_wc *mad_send_wc) 372 struct ib_mad_send_wc *mad_send_wc)
798{ 373{
799 if (mad_send_wc->send_buf->context[0])
800 ib_destroy_ah(mad_send_wc->send_buf->context[0]);
801 ib_free_send_mad(mad_send_wc->send_buf); 374 ib_free_send_mad(mad_send_wc->send_buf);
802} 375}
803 376
@@ -855,1194 +428,3 @@ void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
855 ib_destroy_ah(dev->sm_ah[p]); 428 ib_destroy_ah(dev->sm_ah[p]);
856 } 429 }
857} 430}
858
859static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
860{
861 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
862
863 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
864 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
865 MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
866}
867
868static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
869{
870 /* re-configure the alias-guid and mcg's */
871 if (mlx4_is_master(dev->dev)) {
872 mlx4_ib_invalidate_all_guid_record(dev, port_num);
873
874 if (!dev->sriov.is_going_down) {
875 mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
876 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
877 MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
878 }
879 }
880 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
881}
882
883static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
884 struct mlx4_eqe *eqe)
885{
886 __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
887 GET_MASK_FROM_EQE(eqe));
888}
889
890static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
891 u32 guid_tbl_blk_num, u32 change_bitmap)
892{
893 struct ib_smp *in_mad = NULL;
894 struct ib_smp *out_mad = NULL;
895 u16 i;
896
897 if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
898 return;
899
900 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
901 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
902 if (!in_mad || !out_mad) {
903 mlx4_ib_warn(&dev->ib_dev, "failed to allocate memory for guid info mads\n");
904 goto out;
905 }
906
907 guid_tbl_blk_num *= 4;
908
909 for (i = 0; i < 4; i++) {
910 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
911 continue;
912 memset(in_mad, 0, sizeof *in_mad);
913 memset(out_mad, 0, sizeof *out_mad);
914
915 in_mad->base_version = 1;
916 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
917 in_mad->class_version = 1;
918 in_mad->method = IB_MGMT_METHOD_GET;
919 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
920 in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
921
922 if (mlx4_MAD_IFC(dev,
923 MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
924 port_num, NULL, NULL, in_mad, out_mad)) {
925 mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
926 goto out;
927 }
928
929 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
930 port_num,
931 (u8 *)(&((struct ib_smp *)out_mad)->data));
932 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
933 port_num,
934 (u8 *)(&((struct ib_smp *)out_mad)->data));
935 }
936
937out:
938 kfree(in_mad);
939 kfree(out_mad);
940 return;
941}
942
943void handle_port_mgmt_change_event(struct work_struct *work)
944{
945 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
946 struct mlx4_ib_dev *dev = ew->ib_dev;
947 struct mlx4_eqe *eqe = &(ew->ib_eqe);
948 u8 port = eqe->event.port_mgmt_change.port;
949 u32 changed_attr;
950 u32 tbl_block;
951 u32 change_bitmap;
952
953 switch (eqe->subtype) {
954 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
955 changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
956
957 /* Update the SM ah - This should be done before handling
958 the other changed attributes so that MADs can be sent to the SM */
959 if (changed_attr & MSTR_SM_CHANGE_MASK) {
960 u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
961 u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
962 update_sm_ah(dev, port, lid, sl);
963 }
964
965 /* Check if it is a lid change event */
966 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
967 handle_lid_change_event(dev, port);
968
969 /* Generate GUID changed event */
970 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
971 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
972 /*if master, notify all slaves*/
973 if (mlx4_is_master(dev->dev))
974 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
975 MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
976 }
977
978 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
979 handle_client_rereg_event(dev, port);
980 break;
981
982 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
983 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
984 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
985 propagate_pkey_ev(dev, port, eqe);
986 break;
987 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
988 /* paravirtualized master's guid is guid 0 -- does not change */
989 if (!mlx4_is_master(dev->dev))
990 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
991 /*if master, notify relevant slaves*/
992 else if (!dev->sriov.is_going_down) {
993 tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
994 change_bitmap = GET_MASK_FROM_EQE(eqe);
995 handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
996 }
997 break;
998 default:
999 pr_warn("Unsupported subtype 0x%x for "
1000 "Port Management Change event\n", eqe->subtype);
1001 }
1002
1003 kfree(ew);
1004}
1005
1006void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
1007 enum ib_event_type type)
1008{
1009 struct ib_event event;
1010
1011 event.device = &dev->ib_dev;
1012 event.element.port_num = port_num;
1013 event.event = type;
1014
1015 ib_dispatch_event(&event);
1016}
1017
1018static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1019{
1020 unsigned long flags;
1021 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1022 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1023 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1024 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1025 queue_work(ctx->wq, &ctx->work);
1026 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1027}
1028
1029static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1030 struct mlx4_ib_demux_pv_qp *tun_qp,
1031 int index)
1032{
1033 struct ib_sge sg_list;
1034 struct ib_recv_wr recv_wr, *bad_recv_wr;
1035 int size;
1036
1037 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1038 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1039
1040 sg_list.addr = tun_qp->ring[index].map;
1041 sg_list.length = size;
1042 sg_list.lkey = ctx->mr->lkey;
1043
1044 recv_wr.next = NULL;
1045 recv_wr.sg_list = &sg_list;
1046 recv_wr.num_sge = 1;
1047 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1048 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
1049 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1050 size, DMA_FROM_DEVICE);
1051 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1052}
1053
1054static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1055 int slave, struct ib_sa_mad *sa_mad)
1056{
1057 int ret = 0;
1058
1059 /* dispatch to different sa handlers */
1060 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1061 case IB_SA_ATTR_MC_MEMBER_REC:
1062 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1063 break;
1064 default:
1065 break;
1066 }
1067 return ret;
1068}
1069
1070static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1071{
1072 int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
1073
1074 return (qpn >= proxy_start && qpn <= proxy_start + 1);
1075}
1076
1077
1078int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1079 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
1080 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad)
1081{
1082 struct ib_sge list;
1083 struct ib_send_wr wr, *bad_wr;
1084 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1085 struct mlx4_ib_demux_pv_qp *sqp;
1086 struct mlx4_mad_snd_buf *sqp_mad;
1087 struct ib_ah *ah;
1088 struct ib_qp *send_qp = NULL;
1089 unsigned wire_tx_ix = 0;
1090 int ret = 0;
1091 u16 wire_pkey_ix;
1092 int src_qpnum;
1093 u8 sgid_index;
1094
1095
1096 sqp_ctx = dev->sriov.sqps[port-1];
1097
1098 /* check if proxy qp created */
1099 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1100 return -EAGAIN;
1101
1102 /* QP0 forwarding only for Dom0 */
1103 if (dest_qpt == IB_QPT_SMI && (mlx4_master_func_num(dev->dev) != slave))
1104 return -EINVAL;
1105
1106 if (dest_qpt == IB_QPT_SMI) {
1107 src_qpnum = 0;
1108 sqp = &sqp_ctx->qp[0];
1109 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1110 } else {
1111 src_qpnum = 1;
1112 sqp = &sqp_ctx->qp[1];
1113 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1114 }
1115
1116 send_qp = sqp->qp;
1117
1118 /* create ah */
1119 sgid_index = attr->grh.sgid_index;
1120 attr->grh.sgid_index = 0;
1121 ah = ib_create_ah(sqp_ctx->pd, attr);
1122 if (IS_ERR(ah))
1123 return -ENOMEM;
1124 attr->grh.sgid_index = sgid_index;
1125 to_mah(ah)->av.ib.gid_index = sgid_index;
1126 /* get rid of force-loopback bit */
1127 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1128 spin_lock(&sqp->tx_lock);
1129 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1130 (MLX4_NUM_TUNNEL_BUFS - 1))
1131 ret = -EAGAIN;
1132 else
1133 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
1134 spin_unlock(&sqp->tx_lock);
1135 if (ret)
1136 goto out;
1137
1138 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1139 if (sqp->tx_ring[wire_tx_ix].ah)
1140 ib_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
1141 sqp->tx_ring[wire_tx_ix].ah = ah;
1142 ib_dma_sync_single_for_cpu(&dev->ib_dev,
1143 sqp->tx_ring[wire_tx_ix].buf.map,
1144 sizeof (struct mlx4_mad_snd_buf),
1145 DMA_TO_DEVICE);
1146
1147 memcpy(&sqp_mad->payload, mad, sizeof *mad);
1148
1149 ib_dma_sync_single_for_device(&dev->ib_dev,
1150 sqp->tx_ring[wire_tx_ix].buf.map,
1151 sizeof (struct mlx4_mad_snd_buf),
1152 DMA_TO_DEVICE);
1153
1154 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1155 list.length = sizeof (struct mlx4_mad_snd_buf);
1156 list.lkey = sqp_ctx->mr->lkey;
1157
1158 wr.wr.ud.ah = ah;
1159 wr.wr.ud.port_num = port;
1160 wr.wr.ud.pkey_index = wire_pkey_ix;
1161 wr.wr.ud.remote_qkey = qkey;
1162 wr.wr.ud.remote_qpn = remote_qpn;
1163 wr.next = NULL;
1164 wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1165 wr.sg_list = &list;
1166 wr.num_sge = 1;
1167 wr.opcode = IB_WR_SEND;
1168 wr.send_flags = IB_SEND_SIGNALED;
1169
1170 ret = ib_post_send(send_qp, &wr, &bad_wr);
1171out:
1172 if (ret)
1173 ib_destroy_ah(ah);
1174 return ret;
1175}
1176
1177static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1178{
1179 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1180 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1181 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1182 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1183 struct mlx4_ib_ah ah;
1184 struct ib_ah_attr ah_attr;
1185 u8 *slave_id;
1186 int slave;
1187
1188 /* Get slave that sent this packet */
1189 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1190 wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1191 (wc->src_qp & 0x1) != ctx->port - 1 ||
1192 wc->src_qp & 0x4) {
1193 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1194 return;
1195 }
1196 slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1197 if (slave != ctx->slave) {
1198 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1199 "belongs to another slave\n", wc->src_qp);
1200 return;
1201 }
1202 if (slave != mlx4_master_func_num(dev->dev) && !(wc->src_qp & 0x2)) {
1203 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1204 "non-master trying to send QP0 packets\n", wc->src_qp);
1205 return;
1206 }
1207
1208 /* Map transaction ID */
1209 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1210 sizeof (struct mlx4_tunnel_mad),
1211 DMA_FROM_DEVICE);
1212 switch (tunnel->mad.mad_hdr.method) {
1213 case IB_MGMT_METHOD_SET:
1214 case IB_MGMT_METHOD_GET:
1215 case IB_MGMT_METHOD_REPORT:
1216 case IB_SA_METHOD_GET_TABLE:
1217 case IB_SA_METHOD_DELETE:
1218 case IB_SA_METHOD_GET_MULTI:
1219 case IB_SA_METHOD_GET_TRACE_TBL:
1220 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1221 if (*slave_id) {
1222 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1223 "class:%d slave:%d\n", *slave_id,
1224 tunnel->mad.mad_hdr.mgmt_class, slave);
1225 return;
1226 } else
1227 *slave_id = slave;
1228 default:
1229 /* nothing */;
1230 }
1231
1232 /* Class-specific handling */
1233 switch (tunnel->mad.mad_hdr.mgmt_class) {
1234 case IB_MGMT_CLASS_SUBN_ADM:
1235 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1236 (struct ib_sa_mad *) &tunnel->mad))
1237 return;
1238 break;
1239 case IB_MGMT_CLASS_CM:
1240 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1241 (struct ib_mad *) &tunnel->mad))
1242 return;
1243 break;
1244 case IB_MGMT_CLASS_DEVICE_MGMT:
1245 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1246 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1247 return;
1248 break;
1249 default:
1250 /* Drop unsupported classes for slaves in tunnel mode */
1251 if (slave != mlx4_master_func_num(dev->dev)) {
1252 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1253 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1254 return;
1255 }
1256 }
1257
1258 /* We are using standard ib_core services to send the mad, so generate a
1259 * stadard address handle by decoding the tunnelled mlx4_ah fields */
1260 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1261 ah.ibah.device = ctx->ib_dev;
1262 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1263 if ((ah_attr.ah_flags & IB_AH_GRH) &&
1264 (ah_attr.grh.sgid_index != slave)) {
1265 mlx4_ib_warn(ctx->ib_dev, "slave:%d accessed invalid sgid_index:%d\n",
1266 slave, ah_attr.grh.sgid_index);
1267 return;
1268 }
1269
1270 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1271 is_proxy_qp0(dev, wc->src_qp, slave) ?
1272 IB_QPT_SMI : IB_QPT_GSI,
1273 be16_to_cpu(tunnel->hdr.pkey_index),
1274 be32_to_cpu(tunnel->hdr.remote_qpn),
1275 be32_to_cpu(tunnel->hdr.qkey),
1276 &ah_attr, &tunnel->mad);
1277}
1278
1279static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1280 enum ib_qp_type qp_type, int is_tun)
1281{
1282 int i;
1283 struct mlx4_ib_demux_pv_qp *tun_qp;
1284 int rx_buf_size, tx_buf_size;
1285
1286 if (qp_type > IB_QPT_GSI)
1287 return -EINVAL;
1288
1289 tun_qp = &ctx->qp[qp_type];
1290
1291 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
1292 GFP_KERNEL);
1293 if (!tun_qp->ring)
1294 return -ENOMEM;
1295
1296 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
1297 sizeof (struct mlx4_ib_tun_tx_buf),
1298 GFP_KERNEL);
1299 if (!tun_qp->tx_ring) {
1300 kfree(tun_qp->ring);
1301 tun_qp->ring = NULL;
1302 return -ENOMEM;
1303 }
1304
1305 if (is_tun) {
1306 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1307 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1308 } else {
1309 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1310 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1311 }
1312
1313 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1314 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1315 if (!tun_qp->ring[i].addr)
1316 goto err;
1317 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1318 tun_qp->ring[i].addr,
1319 rx_buf_size,
1320 DMA_FROM_DEVICE);
1321 }
1322
1323 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1324 tun_qp->tx_ring[i].buf.addr =
1325 kmalloc(tx_buf_size, GFP_KERNEL);
1326 if (!tun_qp->tx_ring[i].buf.addr)
1327 goto tx_err;
1328 tun_qp->tx_ring[i].buf.map =
1329 ib_dma_map_single(ctx->ib_dev,
1330 tun_qp->tx_ring[i].buf.addr,
1331 tx_buf_size,
1332 DMA_TO_DEVICE);
1333 tun_qp->tx_ring[i].ah = NULL;
1334 }
1335 spin_lock_init(&tun_qp->tx_lock);
1336 tun_qp->tx_ix_head = 0;
1337 tun_qp->tx_ix_tail = 0;
1338 tun_qp->proxy_qpt = qp_type;
1339
1340 return 0;
1341
1342tx_err:
1343 while (i > 0) {
1344 --i;
1345 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1346 tx_buf_size, DMA_TO_DEVICE);
1347 kfree(tun_qp->tx_ring[i].buf.addr);
1348 }
1349 kfree(tun_qp->tx_ring);
1350 tun_qp->tx_ring = NULL;
1351 i = MLX4_NUM_TUNNEL_BUFS;
1352err:
1353 while (i > 0) {
1354 --i;
1355 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1356 rx_buf_size, DMA_FROM_DEVICE);
1357 kfree(tun_qp->ring[i].addr);
1358 }
1359 kfree(tun_qp->ring);
1360 tun_qp->ring = NULL;
1361 return -ENOMEM;
1362}
1363
1364static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1365 enum ib_qp_type qp_type, int is_tun)
1366{
1367 int i;
1368 struct mlx4_ib_demux_pv_qp *tun_qp;
1369 int rx_buf_size, tx_buf_size;
1370
1371 if (qp_type > IB_QPT_GSI)
1372 return;
1373
1374 tun_qp = &ctx->qp[qp_type];
1375 if (is_tun) {
1376 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1377 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1378 } else {
1379 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1380 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1381 }
1382
1383
1384 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1385 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1386 rx_buf_size, DMA_FROM_DEVICE);
1387 kfree(tun_qp->ring[i].addr);
1388 }
1389
1390 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1391 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1392 tx_buf_size, DMA_TO_DEVICE);
1393 kfree(tun_qp->tx_ring[i].buf.addr);
1394 if (tun_qp->tx_ring[i].ah)
1395 ib_destroy_ah(tun_qp->tx_ring[i].ah);
1396 }
1397 kfree(tun_qp->tx_ring);
1398 kfree(tun_qp->ring);
1399}
1400
1401static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1402{
1403 struct mlx4_ib_demux_pv_ctx *ctx;
1404 struct mlx4_ib_demux_pv_qp *tun_qp;
1405 struct ib_wc wc;
1406 int ret;
1407 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1408 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1409
1410 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1411 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1412 if (wc.status == IB_WC_SUCCESS) {
1413 switch (wc.opcode) {
1414 case IB_WC_RECV:
1415 mlx4_ib_multiplex_mad(ctx, &wc);
1416 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1417 wc.wr_id &
1418 (MLX4_NUM_TUNNEL_BUFS - 1));
1419 if (ret)
1420 pr_err("Failed reposting tunnel "
1421 "buf:%lld\n", wc.wr_id);
1422 break;
1423 case IB_WC_SEND:
1424 pr_debug("received tunnel send completion:"
1425 "wrid=0x%llx, status=0x%x\n",
1426 wc.wr_id, wc.status);
1427 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1428 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1429 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1430 = NULL;
1431 spin_lock(&tun_qp->tx_lock);
1432 tun_qp->tx_ix_tail++;
1433 spin_unlock(&tun_qp->tx_lock);
1434
1435 break;
1436 default:
1437 break;
1438 }
1439 } else {
1440 pr_debug("mlx4_ib: completion error in tunnel: %d."
1441 " status = %d, wrid = 0x%llx\n",
1442 ctx->slave, wc.status, wc.wr_id);
1443 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1444 ib_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1445 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1446 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1447 = NULL;
1448 spin_lock(&tun_qp->tx_lock);
1449 tun_qp->tx_ix_tail++;
1450 spin_unlock(&tun_qp->tx_lock);
1451 }
1452 }
1453 }
1454}
1455
1456static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1457{
1458 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1459
1460 /* It's worse than that! He's dead, Jim! */
1461 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1462 event->event, sqp->port);
1463}
1464
1465static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1466 enum ib_qp_type qp_type, int create_tun)
1467{
1468 int i, ret;
1469 struct mlx4_ib_demux_pv_qp *tun_qp;
1470 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1471 struct ib_qp_attr attr;
1472 int qp_attr_mask_INIT;
1473
1474 if (qp_type > IB_QPT_GSI)
1475 return -EINVAL;
1476
1477 tun_qp = &ctx->qp[qp_type];
1478
1479 memset(&qp_init_attr, 0, sizeof qp_init_attr);
1480 qp_init_attr.init_attr.send_cq = ctx->cq;
1481 qp_init_attr.init_attr.recv_cq = ctx->cq;
1482 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1483 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
1484 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
1485 qp_init_attr.init_attr.cap.max_send_sge = 1;
1486 qp_init_attr.init_attr.cap.max_recv_sge = 1;
1487 if (create_tun) {
1488 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1489 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1490 qp_init_attr.port = ctx->port;
1491 qp_init_attr.slave = ctx->slave;
1492 qp_init_attr.proxy_qp_type = qp_type;
1493 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1494 IB_QP_QKEY | IB_QP_PORT;
1495 } else {
1496 qp_init_attr.init_attr.qp_type = qp_type;
1497 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1498 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1499 }
1500 qp_init_attr.init_attr.port_num = ctx->port;
1501 qp_init_attr.init_attr.qp_context = ctx;
1502 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1503 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1504 if (IS_ERR(tun_qp->qp)) {
1505 ret = PTR_ERR(tun_qp->qp);
1506 tun_qp->qp = NULL;
1507 pr_err("Couldn't create %s QP (%d)\n",
1508 create_tun ? "tunnel" : "special", ret);
1509 return ret;
1510 }
1511
1512 memset(&attr, 0, sizeof attr);
1513 attr.qp_state = IB_QPS_INIT;
1514 attr.pkey_index =
1515 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1516 attr.qkey = IB_QP1_QKEY;
1517 attr.port_num = ctx->port;
1518 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1519 if (ret) {
1520 pr_err("Couldn't change %s qp state to INIT (%d)\n",
1521 create_tun ? "tunnel" : "special", ret);
1522 goto err_qp;
1523 }
1524 attr.qp_state = IB_QPS_RTR;
1525 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1526 if (ret) {
1527 pr_err("Couldn't change %s qp state to RTR (%d)\n",
1528 create_tun ? "tunnel" : "special", ret);
1529 goto err_qp;
1530 }
1531 attr.qp_state = IB_QPS_RTS;
1532 attr.sq_psn = 0;
1533 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1534 if (ret) {
1535 pr_err("Couldn't change %s qp state to RTS (%d)\n",
1536 create_tun ? "tunnel" : "special", ret);
1537 goto err_qp;
1538 }
1539
1540 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1541 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1542 if (ret) {
1543 pr_err(" mlx4_ib_post_pv_buf error"
1544 " (err = %d, i = %d)\n", ret, i);
1545 goto err_qp;
1546 }
1547 }
1548 return 0;
1549
1550err_qp:
1551 ib_destroy_qp(tun_qp->qp);
1552 tun_qp->qp = NULL;
1553 return ret;
1554}
1555
1556/*
1557 * IB MAD completion callback for real SQPs
1558 */
1559static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1560{
1561 struct mlx4_ib_demux_pv_ctx *ctx;
1562 struct mlx4_ib_demux_pv_qp *sqp;
1563 struct ib_wc wc;
1564 struct ib_grh *grh;
1565 struct ib_mad *mad;
1566
1567 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1568 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1569
1570 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1571 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1572 if (wc.status == IB_WC_SUCCESS) {
1573 switch (wc.opcode) {
1574 case IB_WC_SEND:
1575 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1576 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1577 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1578 = NULL;
1579 spin_lock(&sqp->tx_lock);
1580 sqp->tx_ix_tail++;
1581 spin_unlock(&sqp->tx_lock);
1582 break;
1583 case IB_WC_RECV:
1584 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1585 (sqp->ring[wc.wr_id &
1586 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1587 grh = &(((struct mlx4_mad_rcv_buf *)
1588 (sqp->ring[wc.wr_id &
1589 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1590 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1591 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1592 (MLX4_NUM_TUNNEL_BUFS - 1)))
1593 pr_err("Failed reposting SQP "
1594 "buf:%lld\n", wc.wr_id);
1595 break;
1596 default:
1597 BUG_ON(1);
1598 break;
1599 }
1600 } else {
1601 pr_debug("mlx4_ib: completion error in tunnel: %d."
1602 " status = %d, wrid = 0x%llx\n",
1603 ctx->slave, wc.status, wc.wr_id);
1604 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1605 ib_destroy_ah(sqp->tx_ring[wc.wr_id &
1606 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1607 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1608 = NULL;
1609 spin_lock(&sqp->tx_lock);
1610 sqp->tx_ix_tail++;
1611 spin_unlock(&sqp->tx_lock);
1612 }
1613 }
1614 }
1615}
1616
1617static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1618 struct mlx4_ib_demux_pv_ctx **ret_ctx)
1619{
1620 struct mlx4_ib_demux_pv_ctx *ctx;
1621
1622 *ret_ctx = NULL;
1623 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1624 if (!ctx) {
1625 pr_err("failed allocating pv resource context "
1626 "for port %d, slave %d\n", port, slave);
1627 return -ENOMEM;
1628 }
1629
1630 ctx->ib_dev = &dev->ib_dev;
1631 ctx->port = port;
1632 ctx->slave = slave;
1633 *ret_ctx = ctx;
1634 return 0;
1635}
1636
1637static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1638{
1639 if (dev->sriov.demux[port - 1].tun[slave]) {
1640 kfree(dev->sriov.demux[port - 1].tun[slave]);
1641 dev->sriov.demux[port - 1].tun[slave] = NULL;
1642 }
1643}
1644
1645static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1646 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1647{
1648 int ret, cq_size;
1649
1650 if (ctx->state != DEMUX_PV_STATE_DOWN)
1651 return -EEXIST;
1652
1653 ctx->state = DEMUX_PV_STATE_STARTING;
1654 /* have QP0 only on port owner, and only if link layer is IB */
1655 if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) &&
1656 rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND)
1657 ctx->has_smi = 1;
1658
1659 if (ctx->has_smi) {
1660 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1661 if (ret) {
1662 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
1663 goto err_out;
1664 }
1665 }
1666
1667 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
1668 if (ret) {
1669 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
1670 goto err_out_qp0;
1671 }
1672
1673 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
1674 if (ctx->has_smi)
1675 cq_size *= 2;
1676
1677 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
1678 NULL, ctx, cq_size, 0);
1679 if (IS_ERR(ctx->cq)) {
1680 ret = PTR_ERR(ctx->cq);
1681 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
1682 goto err_buf;
1683 }
1684
1685 ctx->pd = ib_alloc_pd(ctx->ib_dev);
1686 if (IS_ERR(ctx->pd)) {
1687 ret = PTR_ERR(ctx->pd);
1688 pr_err("Couldn't create tunnel PD (%d)\n", ret);
1689 goto err_cq;
1690 }
1691
1692 ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE);
1693 if (IS_ERR(ctx->mr)) {
1694 ret = PTR_ERR(ctx->mr);
1695 pr_err("Couldn't get tunnel DMA MR (%d)\n", ret);
1696 goto err_pd;
1697 }
1698
1699 if (ctx->has_smi) {
1700 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
1701 if (ret) {
1702 pr_err("Couldn't create %s QP0 (%d)\n",
1703 create_tun ? "tunnel for" : "", ret);
1704 goto err_mr;
1705 }
1706 }
1707
1708 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
1709 if (ret) {
1710 pr_err("Couldn't create %s QP1 (%d)\n",
1711 create_tun ? "tunnel for" : "", ret);
1712 goto err_qp0;
1713 }
1714
1715 if (create_tun)
1716 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
1717 else
1718 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
1719
1720 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
1721
1722 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1723 if (ret) {
1724 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
1725 goto err_wq;
1726 }
1727 ctx->state = DEMUX_PV_STATE_ACTIVE;
1728 return 0;
1729
1730err_wq:
1731 ctx->wq = NULL;
1732 ib_destroy_qp(ctx->qp[1].qp);
1733 ctx->qp[1].qp = NULL;
1734
1735
1736err_qp0:
1737 if (ctx->has_smi)
1738 ib_destroy_qp(ctx->qp[0].qp);
1739 ctx->qp[0].qp = NULL;
1740
1741err_mr:
1742 ib_dereg_mr(ctx->mr);
1743 ctx->mr = NULL;
1744
1745err_pd:
1746 ib_dealloc_pd(ctx->pd);
1747 ctx->pd = NULL;
1748
1749err_cq:
1750 ib_destroy_cq(ctx->cq);
1751 ctx->cq = NULL;
1752
1753err_buf:
1754 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
1755
1756err_out_qp0:
1757 if (ctx->has_smi)
1758 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
1759err_out:
1760 ctx->state = DEMUX_PV_STATE_DOWN;
1761 return ret;
1762}
1763
1764static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
1765 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
1766{
1767 if (!ctx)
1768 return;
1769 if (ctx->state > DEMUX_PV_STATE_DOWN) {
1770 ctx->state = DEMUX_PV_STATE_DOWNING;
1771 if (flush)
1772 flush_workqueue(ctx->wq);
1773 if (ctx->has_smi) {
1774 ib_destroy_qp(ctx->qp[0].qp);
1775 ctx->qp[0].qp = NULL;
1776 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
1777 }
1778 ib_destroy_qp(ctx->qp[1].qp);
1779 ctx->qp[1].qp = NULL;
1780 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
1781 ib_dereg_mr(ctx->mr);
1782 ctx->mr = NULL;
1783 ib_dealloc_pd(ctx->pd);
1784 ctx->pd = NULL;
1785 ib_destroy_cq(ctx->cq);
1786 ctx->cq = NULL;
1787 ctx->state = DEMUX_PV_STATE_DOWN;
1788 }
1789}
1790
1791static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
1792 int port, int do_init)
1793{
1794 int ret = 0;
1795
1796 if (!do_init) {
1797 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
1798 /* for master, destroy real sqp resources */
1799 if (slave == mlx4_master_func_num(dev->dev))
1800 destroy_pv_resources(dev, slave, port,
1801 dev->sriov.sqps[port - 1], 1);
1802 /* destroy the tunnel qp resources */
1803 destroy_pv_resources(dev, slave, port,
1804 dev->sriov.demux[port - 1].tun[slave], 1);
1805 return 0;
1806 }
1807
1808 /* create the tunnel qp resources */
1809 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
1810 dev->sriov.demux[port - 1].tun[slave]);
1811
1812 /* for master, create the real sqp resources */
1813 if (!ret && slave == mlx4_master_func_num(dev->dev))
1814 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
1815 dev->sriov.sqps[port - 1]);
1816 return ret;
1817}
1818
1819void mlx4_ib_tunnels_update_work(struct work_struct *work)
1820{
1821 struct mlx4_ib_demux_work *dmxw;
1822
1823 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
1824 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
1825 dmxw->do_init);
1826 kfree(dmxw);
1827 return;
1828}
1829
1830static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
1831 struct mlx4_ib_demux_ctx *ctx,
1832 int port)
1833{
1834 char name[12];
1835 int ret = 0;
1836 int i;
1837
1838 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
1839 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
1840 if (!ctx->tun)
1841 return -ENOMEM;
1842
1843 ctx->dev = dev;
1844 ctx->port = port;
1845 ctx->ib_dev = &dev->ib_dev;
1846
1847 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1848 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
1849 if (ret) {
1850 ret = -ENOMEM;
1851 goto err_mcg;
1852 }
1853 }
1854
1855 ret = mlx4_ib_mcg_port_init(ctx);
1856 if (ret) {
1857 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
1858 goto err_mcg;
1859 }
1860
1861 snprintf(name, sizeof name, "mlx4_ibt%d", port);
1862 ctx->wq = create_singlethread_workqueue(name);
1863 if (!ctx->wq) {
1864 pr_err("Failed to create tunnelling WQ for port %d\n", port);
1865 ret = -ENOMEM;
1866 goto err_wq;
1867 }
1868
1869 snprintf(name, sizeof name, "mlx4_ibud%d", port);
1870 ctx->ud_wq = create_singlethread_workqueue(name);
1871 if (!ctx->ud_wq) {
1872 pr_err("Failed to create up/down WQ for port %d\n", port);
1873 ret = -ENOMEM;
1874 goto err_udwq;
1875 }
1876
1877 return 0;
1878
1879err_udwq:
1880 destroy_workqueue(ctx->wq);
1881 ctx->wq = NULL;
1882
1883err_wq:
1884 mlx4_ib_mcg_port_cleanup(ctx, 1);
1885err_mcg:
1886 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
1887 free_pv_object(dev, i, port);
1888 kfree(ctx->tun);
1889 ctx->tun = NULL;
1890 return ret;
1891}
1892
1893static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
1894{
1895 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
1896 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
1897 flush_workqueue(sqp_ctx->wq);
1898 if (sqp_ctx->has_smi) {
1899 ib_destroy_qp(sqp_ctx->qp[0].qp);
1900 sqp_ctx->qp[0].qp = NULL;
1901 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
1902 }
1903 ib_destroy_qp(sqp_ctx->qp[1].qp);
1904 sqp_ctx->qp[1].qp = NULL;
1905 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
1906 ib_dereg_mr(sqp_ctx->mr);
1907 sqp_ctx->mr = NULL;
1908 ib_dealloc_pd(sqp_ctx->pd);
1909 sqp_ctx->pd = NULL;
1910 ib_destroy_cq(sqp_ctx->cq);
1911 sqp_ctx->cq = NULL;
1912 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
1913 }
1914}
1915
1916static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
1917{
1918 int i;
1919 if (ctx) {
1920 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1921 mlx4_ib_mcg_port_cleanup(ctx, 1);
1922 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1923 if (!ctx->tun[i])
1924 continue;
1925 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
1926 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
1927 }
1928 flush_workqueue(ctx->wq);
1929 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1930 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
1931 free_pv_object(dev, i, ctx->port);
1932 }
1933 kfree(ctx->tun);
1934 destroy_workqueue(ctx->ud_wq);
1935 destroy_workqueue(ctx->wq);
1936 }
1937}
1938
1939static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
1940{
1941 int i;
1942
1943 if (!mlx4_is_master(dev->dev))
1944 return;
1945 /* initialize or tear down tunnel QPs for the master */
1946 for (i = 0; i < dev->dev->caps.num_ports; i++)
1947 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
1948 return;
1949}
1950
1951int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
1952{
1953 int i = 0;
1954 int err;
1955
1956 if (!mlx4_is_mfunc(dev->dev))
1957 return 0;
1958
1959 dev->sriov.is_going_down = 0;
1960 spin_lock_init(&dev->sriov.going_down_lock);
1961 mlx4_ib_cm_paravirt_init(dev);
1962
1963 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
1964
1965 if (mlx4_is_slave(dev->dev)) {
1966 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
1967 return 0;
1968 }
1969
1970 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
1971 if (i == mlx4_master_func_num(dev->dev))
1972 mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
1973 else
1974 mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
1975 }
1976
1977 err = mlx4_ib_init_alias_guid_service(dev);
1978 if (err) {
1979 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
1980 goto paravirt_err;
1981 }
1982 err = mlx4_ib_device_register_sysfs(dev);
1983 if (err) {
1984 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
1985 goto sysfs_err;
1986 }
1987
1988 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
1989 dev->dev->caps.sqp_demux);
1990 for (i = 0; i < dev->num_ports; i++) {
1991 union ib_gid gid;
1992 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
1993 if (err)
1994 goto demux_err;
1995 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
1996 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
1997 &dev->sriov.sqps[i]);
1998 if (err)
1999 goto demux_err;
2000 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2001 if (err)
2002 goto demux_err;
2003 }
2004 mlx4_ib_master_tunnels(dev, 1);
2005 return 0;
2006
2007demux_err:
2008 while (i > 0) {
2009 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2010 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2011 --i;
2012 }
2013 mlx4_ib_device_unregister_sysfs(dev);
2014
2015sysfs_err:
2016 mlx4_ib_destroy_alias_guid_service(dev);
2017
2018paravirt_err:
2019 mlx4_ib_cm_paravirt_clean(dev, -1);
2020
2021 return err;
2022}
2023
2024void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
2025{
2026 int i;
2027 unsigned long flags;
2028
2029 if (!mlx4_is_mfunc(dev->dev))
2030 return;
2031
2032 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2033 dev->sriov.is_going_down = 1;
2034 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2035 if (mlx4_is_master(dev->dev)) {
2036 for (i = 0; i < dev->num_ports; i++) {
2037 flush_workqueue(dev->sriov.demux[i].ud_wq);
2038 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2039 kfree(dev->sriov.sqps[i]);
2040 dev->sriov.sqps[i] = NULL;
2041 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2042 }
2043
2044 mlx4_ib_cm_paravirt_clean(dev, -1);
2045 mlx4_ib_destroy_alias_guid_service(dev);
2046 mlx4_ib_device_unregister_sysfs(dev);
2047 }
2048}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e7d81c0d1ac..059a865b2b7 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -50,7 +50,7 @@
50#include "mlx4_ib.h" 50#include "mlx4_ib.h"
51#include "user.h" 51#include "user.h"
52 52
53#define DRV_NAME MLX4_IB_DRV_NAME 53#define DRV_NAME "mlx4_ib"
54#define DRV_VERSION "1.0" 54#define DRV_VERSION "1.0"
55#define DRV_RELDATE "April 4, 2008" 55#define DRV_RELDATE "April 4, 2008"
56 56
@@ -59,10 +59,6 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
59MODULE_LICENSE("Dual BSD/GPL"); 59MODULE_LICENSE("Dual BSD/GPL");
60MODULE_VERSION(DRV_VERSION); 60MODULE_VERSION(DRV_VERSION);
61 61
62int mlx4_ib_sm_guid_assign = 1;
63module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
64MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
65
66static const char mlx4_ib_version[] = 62static const char mlx4_ib_version[] =
67 DRV_NAME ": Mellanox ConnectX InfiniBand driver v" 63 DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
68 DRV_VERSION " (" DRV_RELDATE ")\n"; 64 DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -74,8 +70,6 @@ struct update_gid_work {
74 int port; 70 int port;
75}; 71};
76 72
77static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init);
78
79static struct workqueue_struct *wq; 73static struct workqueue_struct *wq;
80 74
81static void init_query_mad(struct ib_smp *mad) 75static void init_query_mad(struct ib_smp *mad)
@@ -104,8 +98,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
104 init_query_mad(in_mad); 98 init_query_mad(in_mad);
105 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 99 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
106 100
107 err = mlx4_MAD_IFC(to_mdev(ibdev), MLX4_MAD_IFC_IGNORE_KEYS, 101 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, 1, NULL, NULL, in_mad, out_mad);
108 1, NULL, NULL, in_mad, out_mad);
109 if (err) 102 if (err)
110 goto out; 103 goto out;
111 104
@@ -135,19 +128,17 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
135 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) && 128 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_REMOTE_INV) &&
136 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR)) 129 (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_FAST_REG_WR))
137 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; 130 props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
138 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC)
139 props->device_cap_flags |= IB_DEVICE_XRC;
140 131
141 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 132 props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
142 0xffffff; 133 0xffffff;
143 props->vendor_part_id = dev->dev->pdev->device; 134 props->vendor_part_id = be16_to_cpup((__be16 *) (out_mad->data + 30));
144 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); 135 props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
145 memcpy(&props->sys_image_guid, out_mad->data + 4, 8); 136 memcpy(&props->sys_image_guid, out_mad->data + 4, 8);
146 137
147 props->max_mr_size = ~0ull; 138 props->max_mr_size = ~0ull;
148 props->page_size_cap = dev->dev->caps.page_size_cap; 139 props->page_size_cap = dev->dev->caps.page_size_cap;
149 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps; 140 props->max_qp = dev->dev->caps.num_qps - dev->dev->caps.reserved_qps;
150 props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE; 141 props->max_qp_wr = dev->dev->caps.max_wqes;
151 props->max_sge = min(dev->dev->caps.max_sq_sg, 142 props->max_sge = min(dev->dev->caps.max_sq_sg,
152 dev->dev->caps.max_rq_sg); 143 dev->dev->caps.max_rq_sg);
153 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs; 144 props->max_cq = dev->dev->caps.num_cqs - dev->dev->caps.reserved_cqs;
@@ -164,13 +155,13 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
164 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay; 155 props->local_ca_ack_delay = dev->dev->caps.local_ca_ack_delay;
165 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ? 156 props->atomic_cap = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
166 IB_ATOMIC_HCA : IB_ATOMIC_NONE; 157 IB_ATOMIC_HCA : IB_ATOMIC_NONE;
167 props->masked_atomic_cap = props->atomic_cap; 158 props->masked_atomic_cap = IB_ATOMIC_HCA;
168 props->max_pkeys = dev->dev->caps.pkey_table_len[1]; 159 props->max_pkeys = dev->dev->caps.pkey_table_len[1];
169 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; 160 props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
170 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; 161 props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
171 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * 162 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
172 props->max_mcast_grp; 163 props->max_mcast_grp;
173 props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; 164 props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1;
174 165
175out: 166out:
176 kfree(in_mad); 167 kfree(in_mad);
@@ -184,37 +175,14 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
184{ 175{
185 struct mlx4_dev *dev = to_mdev(device)->dev; 176 struct mlx4_dev *dev = to_mdev(device)->dev;
186 177
187 return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? 178 return dev->caps.port_mask & (1 << (port_num - 1)) ?
188 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; 179 IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
189} 180}
190 181
191static int ib_link_query_port(struct ib_device *ibdev, u8 port, 182static int ib_link_query_port(struct ib_device *ibdev, u8 port,
192 struct ib_port_attr *props, int netw_view) 183 struct ib_port_attr *props,
184 struct ib_smp *out_mad)
193{ 185{
194 struct ib_smp *in_mad = NULL;
195 struct ib_smp *out_mad = NULL;
196 int ext_active_speed;
197 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
198 int err = -ENOMEM;
199
200 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
201 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
202 if (!in_mad || !out_mad)
203 goto out;
204
205 init_query_mad(in_mad);
206 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
207 in_mad->attr_mod = cpu_to_be32(port);
208
209 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view)
210 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
211
212 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
213 in_mad, out_mad);
214 if (err)
215 goto out;
216
217
218 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); 186 props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16));
219 props->lmc = out_mad->data[34] & 0x7; 187 props->lmc = out_mad->data[34] & 0x7;
220 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18)); 188 props->sm_lid = be16_to_cpup((__be16 *) (out_mad->data + 18));
@@ -222,10 +190,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
222 props->state = out_mad->data[32] & 0xf; 190 props->state = out_mad->data[32] & 0xf;
223 props->phys_state = out_mad->data[33] >> 4; 191 props->phys_state = out_mad->data[33] >> 4;
224 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20)); 192 props->port_cap_flags = be32_to_cpup((__be32 *) (out_mad->data + 20));
225 if (netw_view) 193 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
226 props->gid_tbl_len = out_mad->data[50];
227 else
228 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
229 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; 194 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
230 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port]; 195 props->pkey_tbl_len = to_mdev(ibdev)->dev->caps.pkey_table_len[port];
231 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); 196 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
@@ -238,44 +203,7 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port,
238 props->max_vl_num = out_mad->data[37] >> 4; 203 props->max_vl_num = out_mad->data[37] >> 4;
239 props->init_type_reply = out_mad->data[41] >> 4; 204 props->init_type_reply = out_mad->data[41] >> 4;
240 205
241 /* Check if extended speeds (EDR/FDR/...) are supported */ 206 return 0;
242 if (props->port_cap_flags & IB_PORT_EXTENDED_SPEEDS_SUP) {
243 ext_active_speed = out_mad->data[62] >> 4;
244
245 switch (ext_active_speed) {
246 case 1:
247 props->active_speed = IB_SPEED_FDR;
248 break;
249 case 2:
250 props->active_speed = IB_SPEED_EDR;
251 break;
252 }
253 }
254
255 /* If reported active speed is QDR, check if is FDR-10 */
256 if (props->active_speed == IB_SPEED_QDR) {
257 init_query_mad(in_mad);
258 in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO;
259 in_mad->attr_mod = cpu_to_be32(port);
260
261 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port,
262 NULL, NULL, in_mad, out_mad);
263 if (err)
264 goto out;
265
266 /* Checking LinkSpeedActive for FDR-10 */
267 if (out_mad->data[15] & 0x1)
268 props->active_speed = IB_SPEED_FDR10;
269 }
270
271 /* Avoid wrong speed value returned by FW if the IB link is down. */
272 if (props->state == IB_PORT_DOWN)
273 props->active_speed = IB_SPEED_SDR;
274
275out:
276 kfree(in_mad);
277 kfree(out_mad);
278 return err;
279} 207}
280 208
281static u8 state_to_phys_state(enum ib_port_state state) 209static u8 state_to_phys_state(enum ib_port_state state)
@@ -284,42 +212,32 @@ static u8 state_to_phys_state(enum ib_port_state state)
284} 212}
285 213
286static int eth_link_query_port(struct ib_device *ibdev, u8 port, 214static int eth_link_query_port(struct ib_device *ibdev, u8 port,
287 struct ib_port_attr *props, int netw_view) 215 struct ib_port_attr *props,
216 struct ib_smp *out_mad)
288{ 217{
289 218 struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe;
290 struct mlx4_ib_dev *mdev = to_mdev(ibdev);
291 struct mlx4_ib_iboe *iboe = &mdev->iboe;
292 struct net_device *ndev; 219 struct net_device *ndev;
293 enum ib_mtu tmp; 220 enum ib_mtu tmp;
294 struct mlx4_cmd_mailbox *mailbox;
295 int err = 0;
296
297 mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
298 if (IS_ERR(mailbox))
299 return PTR_ERR(mailbox);
300
301 err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
302 MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
303 MLX4_CMD_WRAPPED);
304 if (err)
305 goto out;
306 221
307 props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? 222 props->active_width = IB_WIDTH_1X;
308 IB_WIDTH_4X : IB_WIDTH_1X; 223 props->active_speed = 4;
309 props->active_speed = IB_SPEED_QDR;
310 props->port_cap_flags = IB_PORT_CM_SUP; 224 props->port_cap_flags = IB_PORT_CM_SUP;
311 props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; 225 props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port];
312 props->max_msg_sz = mdev->dev->caps.max_msg_sz; 226 props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz;
313 props->pkey_tbl_len = 1; 227 props->pkey_tbl_len = 1;
314 props->max_mtu = IB_MTU_4096; 228 props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46));
315 props->max_vl_num = 2; 229 props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48));
230 props->max_mtu = IB_MTU_2048;
231 props->subnet_timeout = 0;
232 props->max_vl_num = out_mad->data[37] >> 4;
233 props->init_type_reply = 0;
316 props->state = IB_PORT_DOWN; 234 props->state = IB_PORT_DOWN;
317 props->phys_state = state_to_phys_state(props->state); 235 props->phys_state = state_to_phys_state(props->state);
318 props->active_mtu = IB_MTU_256; 236 props->active_mtu = IB_MTU_256;
319 spin_lock(&iboe->lock); 237 spin_lock(&iboe->lock);
320 ndev = iboe->netdevs[port - 1]; 238 ndev = iboe->netdevs[port - 1];
321 if (!ndev) 239 if (!ndev)
322 goto out_unlock; 240 goto out;
323 241
324 tmp = iboe_get_mtu(ndev->mtu); 242 tmp = iboe_get_mtu(ndev->mtu);
325 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; 243 props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256;
@@ -327,43 +245,51 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
327 props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? 245 props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ?
328 IB_PORT_ACTIVE : IB_PORT_DOWN; 246 IB_PORT_ACTIVE : IB_PORT_DOWN;
329 props->phys_state = state_to_phys_state(props->state); 247 props->phys_state = state_to_phys_state(props->state);
330out_unlock: 248
331 spin_unlock(&iboe->lock);
332out: 249out:
333 mlx4_free_cmd_mailbox(mdev->dev, mailbox); 250 spin_unlock(&iboe->lock);
334 return err; 251 return 0;
335} 252}
336 253
337int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 254static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
338 struct ib_port_attr *props, int netw_view) 255 struct ib_port_attr *props)
339{ 256{
340 int err; 257 struct ib_smp *in_mad = NULL;
258 struct ib_smp *out_mad = NULL;
259 int err = -ENOMEM;
260
261 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
262 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
263 if (!in_mad || !out_mad)
264 goto out;
341 265
342 memset(props, 0, sizeof *props); 266 memset(props, 0, sizeof *props);
343 267
268 init_query_mad(in_mad);
269 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
270 in_mad->attr_mod = cpu_to_be32(port);
271
272 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
273 if (err)
274 goto out;
275
344 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? 276 err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
345 ib_link_query_port(ibdev, port, props, netw_view) : 277 ib_link_query_port(ibdev, port, props, out_mad) :
346 eth_link_query_port(ibdev, port, props, netw_view); 278 eth_link_query_port(ibdev, port, props, out_mad);
347 279
348 return err; 280out:
349} 281 kfree(in_mad);
282 kfree(out_mad);
350 283
351static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, 284 return err;
352 struct ib_port_attr *props)
353{
354 /* returns host view */
355 return __mlx4_ib_query_port(ibdev, port, props, 0);
356} 285}
357 286
358int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, 287static int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
359 union ib_gid *gid, int netw_view) 288 union ib_gid *gid)
360{ 289{
361 struct ib_smp *in_mad = NULL; 290 struct ib_smp *in_mad = NULL;
362 struct ib_smp *out_mad = NULL; 291 struct ib_smp *out_mad = NULL;
363 int err = -ENOMEM; 292 int err = -ENOMEM;
364 struct mlx4_ib_dev *dev = to_mdev(ibdev);
365 int clear = 0;
366 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
367 293
368 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 294 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
369 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); 295 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
@@ -374,38 +300,23 @@ int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
374 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; 300 in_mad->attr_id = IB_SMP_ATTR_PORT_INFO;
375 in_mad->attr_mod = cpu_to_be32(port); 301 in_mad->attr_mod = cpu_to_be32(port);
376 302
377 if (mlx4_is_mfunc(dev->dev) && netw_view) 303 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
378 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
379
380 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, NULL, NULL, in_mad, out_mad);
381 if (err) 304 if (err)
382 goto out; 305 goto out;
383 306
384 memcpy(gid->raw, out_mad->data + 8, 8); 307 memcpy(gid->raw, out_mad->data + 8, 8);
385 308
386 if (mlx4_is_mfunc(dev->dev) && !netw_view) {
387 if (index) {
388 /* For any index > 0, return the null guid */
389 err = 0;
390 clear = 1;
391 goto out;
392 }
393 }
394
395 init_query_mad(in_mad); 309 init_query_mad(in_mad);
396 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO; 310 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
397 in_mad->attr_mod = cpu_to_be32(index / 8); 311 in_mad->attr_mod = cpu_to_be32(index / 8);
398 312
399 err = mlx4_MAD_IFC(dev, mad_ifc_flags, port, 313 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
400 NULL, NULL, in_mad, out_mad);
401 if (err) 314 if (err)
402 goto out; 315 goto out;
403 316
404 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8); 317 memcpy(gid->raw + 8, out_mad->data + (index % 8) * 8, 8);
405 318
406out: 319out:
407 if (clear)
408 memset(gid->raw + 8, 0, 8);
409 kfree(in_mad); 320 kfree(in_mad);
410 kfree(out_mad); 321 kfree(out_mad);
411 return err; 322 return err;
@@ -425,17 +336,16 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
425 union ib_gid *gid) 336 union ib_gid *gid)
426{ 337{
427 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND) 338 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
428 return __mlx4_ib_query_gid(ibdev, port, index, gid, 0); 339 return __mlx4_ib_query_gid(ibdev, port, index, gid);
429 else 340 else
430 return iboe_query_gid(ibdev, port, index, gid); 341 return iboe_query_gid(ibdev, port, index, gid);
431} 342}
432 343
433int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, 344static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
434 u16 *pkey, int netw_view) 345 u16 *pkey)
435{ 346{
436 struct ib_smp *in_mad = NULL; 347 struct ib_smp *in_mad = NULL;
437 struct ib_smp *out_mad = NULL; 348 struct ib_smp *out_mad = NULL;
438 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
439 int err = -ENOMEM; 349 int err = -ENOMEM;
440 350
441 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 351 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -447,11 +357,7 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
447 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE; 357 in_mad->attr_id = IB_SMP_ATTR_PKEY_TABLE;
448 in_mad->attr_mod = cpu_to_be32(index / 32); 358 in_mad->attr_mod = cpu_to_be32(index / 32);
449 359
450 if (mlx4_is_mfunc(to_mdev(ibdev)->dev) && netw_view) 360 err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad);
451 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
452
453 err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL,
454 in_mad, out_mad);
455 if (err) 361 if (err)
456 goto out; 362 goto out;
457 363
@@ -463,16 +369,10 @@ out:
463 return err; 369 return err;
464} 370}
465 371
466static int mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
467{
468 return __mlx4_ib_query_pkey(ibdev, port, index, pkey, 0);
469}
470
471static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, 372static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
472 struct ib_device_modify *props) 373 struct ib_device_modify *props)
473{ 374{
474 struct mlx4_cmd_mailbox *mailbox; 375 struct mlx4_cmd_mailbox *mailbox;
475 unsigned long flags;
476 376
477 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) 377 if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
478 return -EOPNOTSUPP; 378 return -EOPNOTSUPP;
@@ -480,12 +380,9 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
480 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC)) 380 if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
481 return 0; 381 return 0;
482 382
483 if (mlx4_is_slave(to_mdev(ibdev)->dev)) 383 spin_lock(&to_mdev(ibdev)->sm_lock);
484 return -EOPNOTSUPP;
485
486 spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags);
487 memcpy(ibdev->node_desc, props->node_desc, 64); 384 memcpy(ibdev->node_desc, props->node_desc, 64);
488 spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); 385 spin_unlock(&to_mdev(ibdev)->sm_lock);
489 386
490 /* 387 /*
491 * If possible, pass node desc to FW, so it can generate 388 * If possible, pass node desc to FW, so it can generate
@@ -498,7 +395,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
498 memset(mailbox->buf, 0, 256); 395 memset(mailbox->buf, 0, 256);
499 memcpy(mailbox->buf, props->node_desc, 64); 396 memcpy(mailbox->buf, props->node_desc, 64);
500 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, 397 mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
501 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); 398 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
502 399
503 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); 400 mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
504 401
@@ -527,7 +424,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
527 } 424 }
528 425
529 err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, 426 err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
530 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); 427 MLX4_CMD_TIME_CLASS_B);
531 428
532 mlx4_free_cmd_mailbox(dev->dev, mailbox); 429 mlx4_free_cmd_mailbox(dev->dev, mailbox);
533 return err; 430 return err;
@@ -563,24 +460,15 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
563{ 460{
564 struct mlx4_ib_dev *dev = to_mdev(ibdev); 461 struct mlx4_ib_dev *dev = to_mdev(ibdev);
565 struct mlx4_ib_ucontext *context; 462 struct mlx4_ib_ucontext *context;
566 struct mlx4_ib_alloc_ucontext_resp_v3 resp_v3;
567 struct mlx4_ib_alloc_ucontext_resp resp; 463 struct mlx4_ib_alloc_ucontext_resp resp;
568 int err; 464 int err;
569 465
570 if (!dev->ib_active) 466 if (!dev->ib_active)
571 return ERR_PTR(-EAGAIN); 467 return ERR_PTR(-EAGAIN);
572 468
573 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) { 469 resp.qp_tab_size = dev->dev->caps.num_qps;
574 resp_v3.qp_tab_size = dev->dev->caps.num_qps; 470 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
575 resp_v3.bf_reg_size = dev->dev->caps.bf_reg_size; 471 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
576 resp_v3.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
577 } else {
578 resp.dev_caps = dev->dev->caps.userspace_caps;
579 resp.qp_tab_size = dev->dev->caps.num_qps;
580 resp.bf_reg_size = dev->dev->caps.bf_reg_size;
581 resp.bf_regs_per_page = dev->dev->caps.bf_regs_per_page;
582 resp.cqe_size = dev->dev->caps.cqe_size;
583 }
584 472
585 context = kmalloc(sizeof *context, GFP_KERNEL); 473 context = kmalloc(sizeof *context, GFP_KERNEL);
586 if (!context) 474 if (!context)
@@ -595,11 +483,7 @@ static struct ib_ucontext *mlx4_ib_alloc_ucontext(struct ib_device *ibdev,
595 INIT_LIST_HEAD(&context->db_page_list); 483 INIT_LIST_HEAD(&context->db_page_list);
596 mutex_init(&context->db_page_mutex); 484 mutex_init(&context->db_page_mutex);
597 485
598 if (ibdev->uverbs_abi_ver == MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION) 486 err = ib_copy_to_udata(udata, &resp, sizeof resp);
599 err = ib_copy_to_udata(udata, &resp_v3, sizeof(resp_v3));
600 else
601 err = ib_copy_to_udata(udata, &resp, sizeof(resp));
602
603 if (err) { 487 if (err) {
604 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar); 488 mlx4_uar_free(to_mdev(ibdev)->dev, &context->uar);
605 kfree(context); 489 kfree(context);
@@ -682,57 +566,6 @@ static int mlx4_ib_dealloc_pd(struct ib_pd *pd)
682 return 0; 566 return 0;
683} 567}
684 568
685static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
686 struct ib_ucontext *context,
687 struct ib_udata *udata)
688{
689 struct mlx4_ib_xrcd *xrcd;
690 int err;
691
692 if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
693 return ERR_PTR(-ENOSYS);
694
695 xrcd = kmalloc(sizeof *xrcd, GFP_KERNEL);
696 if (!xrcd)
697 return ERR_PTR(-ENOMEM);
698
699 err = mlx4_xrcd_alloc(to_mdev(ibdev)->dev, &xrcd->xrcdn);
700 if (err)
701 goto err1;
702
703 xrcd->pd = ib_alloc_pd(ibdev);
704 if (IS_ERR(xrcd->pd)) {
705 err = PTR_ERR(xrcd->pd);
706 goto err2;
707 }
708
709 xrcd->cq = ib_create_cq(ibdev, NULL, NULL, xrcd, 1, 0);
710 if (IS_ERR(xrcd->cq)) {
711 err = PTR_ERR(xrcd->cq);
712 goto err3;
713 }
714
715 return &xrcd->ibxrcd;
716
717err3:
718 ib_dealloc_pd(xrcd->pd);
719err2:
720 mlx4_xrcd_free(to_mdev(ibdev)->dev, xrcd->xrcdn);
721err1:
722 kfree(xrcd);
723 return ERR_PTR(err);
724}
725
726static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
727{
728 ib_destroy_cq(to_mxrcd(xrcd)->cq);
729 ib_dealloc_pd(to_mxrcd(xrcd)->pd);
730 mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
731 kfree(xrcd);
732
733 return 0;
734}
735
736static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid) 569static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
737{ 570{
738 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 571 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
@@ -784,53 +617,26 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
784 return ret; 617 return ret;
785} 618}
786 619
787struct mlx4_ib_steering {
788 struct list_head list;
789 u64 reg_id;
790 union ib_gid gid;
791};
792
793static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) 620static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
794{ 621{
795 int err; 622 int err;
796 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device); 623 struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
797 struct mlx4_ib_qp *mqp = to_mqp(ibqp); 624 struct mlx4_ib_qp *mqp = to_mqp(ibqp);
798 u64 reg_id;
799 struct mlx4_ib_steering *ib_steering = NULL;
800
801 if (mdev->dev->caps.steering_mode ==
802 MLX4_STEERING_MODE_DEVICE_MANAGED) {
803 ib_steering = kmalloc(sizeof(*ib_steering), GFP_KERNEL);
804 if (!ib_steering)
805 return -ENOMEM;
806 }
807 625
808 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port, 626 err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw,
809 !!(mqp->flags & 627 !!(mqp->flags & MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
810 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK), 628 MLX4_PROT_IB_IPV6);
811 MLX4_PROT_IB_IPV6, &reg_id);
812 if (err) 629 if (err)
813 goto err_malloc; 630 return err;
814 631
815 err = add_gid_entry(ibqp, gid); 632 err = add_gid_entry(ibqp, gid);
816 if (err) 633 if (err)
817 goto err_add; 634 goto err_add;
818 635
819 if (ib_steering) {
820 memcpy(ib_steering->gid.raw, gid->raw, 16);
821 ib_steering->reg_id = reg_id;
822 mutex_lock(&mqp->mutex);
823 list_add(&ib_steering->list, &mqp->steering_rules);
824 mutex_unlock(&mqp->mutex);
825 }
826 return 0; 636 return 0;
827 637
828err_add: 638err_add:
829 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, 639 mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
830 MLX4_PROT_IB_IPV6, reg_id);
831err_malloc:
832 kfree(ib_steering);
833
834 return err; 640 return err;
835} 641}
836 642
@@ -858,30 +664,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
858 u8 mac[6]; 664 u8 mac[6];
859 struct net_device *ndev; 665 struct net_device *ndev;
860 struct mlx4_ib_gid_entry *ge; 666 struct mlx4_ib_gid_entry *ge;
861 u64 reg_id = 0;
862
863 if (mdev->dev->caps.steering_mode ==
864 MLX4_STEERING_MODE_DEVICE_MANAGED) {
865 struct mlx4_ib_steering *ib_steering;
866 667
867 mutex_lock(&mqp->mutex); 668 err = mlx4_multicast_detach(mdev->dev,
868 list_for_each_entry(ib_steering, &mqp->steering_rules, list) { 669 &mqp->mqp, gid->raw, MLX4_PROT_IB_IPV6);
869 if (!memcmp(ib_steering->gid.raw, gid->raw, 16)) {
870 list_del(&ib_steering->list);
871 break;
872 }
873 }
874 mutex_unlock(&mqp->mutex);
875 if (&ib_steering->list == &mqp->steering_rules) {
876 pr_err("Couldn't find reg_id for mgid. Steering rule is left attached\n");
877 return -EINVAL;
878 }
879 reg_id = ib_steering->reg_id;
880 kfree(ib_steering);
881 }
882
883 err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
884 MLX4_PROT_IB_IPV6, reg_id);
885 if (err) 670 if (err)
886 return err; 671 return err;
887 672
@@ -903,7 +688,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
903 list_del(&ge->list); 688 list_del(&ge->list);
904 kfree(ge); 689 kfree(ge);
905 } else 690 } else
906 pr_warn("could not find mgid entry\n"); 691 printk(KERN_WARNING "could not find mgid entry\n");
907 692
908 mutex_unlock(&mqp->mutex); 693 mutex_unlock(&mqp->mutex);
909 694
@@ -914,7 +699,6 @@ static int init_node_data(struct mlx4_ib_dev *dev)
914{ 699{
915 struct ib_smp *in_mad = NULL; 700 struct ib_smp *in_mad = NULL;
916 struct ib_smp *out_mad = NULL; 701 struct ib_smp *out_mad = NULL;
917 int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS;
918 int err = -ENOMEM; 702 int err = -ENOMEM;
919 703
920 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); 704 in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
@@ -924,10 +708,8 @@ static int init_node_data(struct mlx4_ib_dev *dev)
924 708
925 init_query_mad(in_mad); 709 init_query_mad(in_mad);
926 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC; 710 in_mad->attr_id = IB_SMP_ATTR_NODE_DESC;
927 if (mlx4_is_master(dev->dev))
928 mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW;
929 711
930 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); 712 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
931 if (err) 713 if (err)
932 goto out; 714 goto out;
933 715
@@ -935,11 +717,10 @@ static int init_node_data(struct mlx4_ib_dev *dev)
935 717
936 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; 718 in_mad->attr_id = IB_SMP_ATTR_NODE_INFO;
937 719
938 err = mlx4_MAD_IFC(dev, mad_ifc_flags, 1, NULL, NULL, in_mad, out_mad); 720 err = mlx4_MAD_IFC(dev, 1, 1, 1, NULL, NULL, in_mad, out_mad);
939 if (err) 721 if (err)
940 goto out; 722 goto out;
941 723
942 dev->dev->rev_id = be32_to_cpup((__be32 *) (out_mad->data + 32));
943 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8); 724 memcpy(&dev->ib_dev.node_guid, out_mad->data + 12, 8);
944 725
945out: 726out:
@@ -1016,10 +797,11 @@ static void update_gids_task(struct work_struct *work)
1016 union ib_gid *gids; 797 union ib_gid *gids;
1017 int err; 798 int err;
1018 struct mlx4_dev *dev = gw->dev->dev; 799 struct mlx4_dev *dev = gw->dev->dev;
800 struct ib_event event;
1019 801
1020 mailbox = mlx4_alloc_cmd_mailbox(dev); 802 mailbox = mlx4_alloc_cmd_mailbox(dev);
1021 if (IS_ERR(mailbox)) { 803 if (IS_ERR(mailbox)) {
1022 pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox)); 804 printk(KERN_WARNING "update gid table failed %ld\n", PTR_ERR(mailbox));
1023 return; 805 return;
1024 } 806 }
1025 807
@@ -1027,13 +809,15 @@ static void update_gids_task(struct work_struct *work)
1027 memcpy(gids, gw->gids, sizeof gw->gids); 809 memcpy(gids, gw->gids, sizeof gw->gids);
1028 810
1029 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, 811 err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
1030 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, 812 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
1031 MLX4_CMD_WRAPPED);
1032 if (err) 813 if (err)
1033 pr_warn("set port command failed\n"); 814 printk(KERN_WARNING "set port command failed\n");
1034 else { 815 else {
1035 memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids); 816 memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
1036 mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE); 817 event.device = &gw->dev->ib_dev;
818 event.element.port_num = gw->port;
819 event.event = IB_EVENT_GID_CHANGE;
820 ib_dispatch_event(&event);
1037 } 821 }
1038 822
1039 mlx4_free_cmd_mailbox(dev, mailbox); 823 mlx4_free_cmd_mailbox(dev, mailbox);
@@ -1190,133 +974,16 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event
1190 return NOTIFY_DONE; 974 return NOTIFY_DONE;
1191} 975}
1192 976
1193static void init_pkeys(struct mlx4_ib_dev *ibdev)
1194{
1195 int port;
1196 int slave;
1197 int i;
1198
1199 if (mlx4_is_master(ibdev->dev)) {
1200 for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
1201 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1202 for (i = 0;
1203 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1204 ++i) {
1205 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] =
1206 /* master has the identity virt2phys pkey mapping */
1207 (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i :
1208 ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1;
1209 mlx4_sync_pkey_table(ibdev->dev, slave, port, i,
1210 ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]);
1211 }
1212 }
1213 }
1214 /* initialize pkey cache */
1215 for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
1216 for (i = 0;
1217 i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
1218 ++i)
1219 ibdev->pkeys.phys_pkey_cache[port-1][i] =
1220 (i) ? 0 : 0xFFFF;
1221 }
1222 }
1223}
1224
1225static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1226{
1227 char name[32];
1228 int eq_per_port = 0;
1229 int added_eqs = 0;
1230 int total_eqs = 0;
1231 int i, j, eq;
1232
1233 /* Legacy mode or comp_pool is not large enough */
1234 if (dev->caps.comp_pool == 0 ||
1235 dev->caps.num_ports > dev->caps.comp_pool)
1236 return;
1237
1238 eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
1239 dev->caps.num_ports);
1240
1241 /* Init eq table */
1242 added_eqs = 0;
1243 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
1244 added_eqs += eq_per_port;
1245
1246 total_eqs = dev->caps.num_comp_vectors + added_eqs;
1247
1248 ibdev->eq_table = kzalloc(total_eqs * sizeof(int), GFP_KERNEL);
1249 if (!ibdev->eq_table)
1250 return;
1251
1252 ibdev->eq_added = added_eqs;
1253
1254 eq = 0;
1255 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
1256 for (j = 0; j < eq_per_port; j++) {
1257 sprintf(name, "mlx4-ib-%d-%d@%s",
1258 i, j, dev->pdev->bus->name);
1259 /* Set IRQ for specific name (per ring) */
1260 if (mlx4_assign_eq(dev, name, NULL,
1261 &ibdev->eq_table[eq])) {
1262 /* Use legacy (same as mlx4_en driver) */
1263 pr_warn("Can't allocate EQ %d; reverting to legacy\n", eq);
1264 ibdev->eq_table[eq] =
1265 (eq % dev->caps.num_comp_vectors);
1266 }
1267 eq++;
1268 }
1269 }
1270
1271 /* Fill the reset of the vector with legacy EQ */
1272 for (i = 0, eq = added_eqs; i < dev->caps.num_comp_vectors; i++)
1273 ibdev->eq_table[eq++] = i;
1274
1275 /* Advertise the new number of EQs to clients */
1276 ibdev->ib_dev.num_comp_vectors = total_eqs;
1277}
1278
1279static void mlx4_ib_free_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
1280{
1281 int i;
1282
1283 /* no additional eqs were added */
1284 if (!ibdev->eq_table)
1285 return;
1286
1287 /* Reset the advertised EQ number */
1288 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1289
1290 /* Free only the added eqs */
1291 for (i = 0; i < ibdev->eq_added; i++) {
1292 /* Don't free legacy eqs if used */
1293 if (ibdev->eq_table[i] <= dev->caps.num_comp_vectors)
1294 continue;
1295 mlx4_release_eq(dev, ibdev->eq_table[i]);
1296 }
1297
1298 kfree(ibdev->eq_table);
1299}
1300
1301static void *mlx4_ib_add(struct mlx4_dev *dev) 977static void *mlx4_ib_add(struct mlx4_dev *dev)
1302{ 978{
1303 struct mlx4_ib_dev *ibdev; 979 struct mlx4_ib_dev *ibdev;
1304 int num_ports = 0; 980 int num_ports = 0;
1305 int i, j; 981 int i;
1306 int err; 982 int err;
1307 struct mlx4_ib_iboe *iboe; 983 struct mlx4_ib_iboe *iboe;
1308 984
1309 pr_info_once("%s", mlx4_ib_version); 985 printk_once(KERN_INFO "%s", mlx4_ib_version);
1310 986
1311 mlx4_foreach_non_ib_transport_port(i, dev)
1312 num_ports++;
1313
1314 if (mlx4_is_mfunc(dev) && num_ports) {
1315 dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n");
1316 return NULL;
1317 }
1318
1319 num_ports = 0;
1320 mlx4_foreach_ib_transport_port(i, dev) 987 mlx4_foreach_ib_transport_port(i, dev)
1321 num_ports++; 988 num_ports++;
1322 989
@@ -1355,11 +1022,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1355 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; 1022 ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
1356 ibdev->ib_dev.dma_device = &dev->pdev->dev; 1023 ibdev->ib_dev.dma_device = &dev->pdev->dev;
1357 1024
1358 if (dev->caps.userspace_caps) 1025 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1359 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
1360 else
1361 ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION;
1362
1363 ibdev->ib_dev.uverbs_cmd_mask = 1026 ibdev->ib_dev.uverbs_cmd_mask =
1364 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) | 1027 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
1365 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) | 1028 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
@@ -1381,9 +1044,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1381 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) | 1044 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
1382 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) | 1045 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
1383 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) | 1046 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
1384 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) | 1047 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
1385 (1ull << IB_USER_VERBS_CMD_CREATE_XSRQ) |
1386 (1ull << IB_USER_VERBS_CMD_OPEN_QP);
1387 1048
1388 ibdev->ib_dev.query_device = mlx4_ib_query_device; 1049 ibdev->ib_dev.query_device = mlx4_ib_query_device;
1389 ibdev->ib_dev.query_port = mlx4_ib_query_port; 1050 ibdev->ib_dev.query_port = mlx4_ib_query_port;
@@ -1427,22 +1088,10 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1427 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach; 1088 ibdev->ib_dev.detach_mcast = mlx4_ib_mcg_detach;
1428 ibdev->ib_dev.process_mad = mlx4_ib_process_mad; 1089 ibdev->ib_dev.process_mad = mlx4_ib_process_mad;
1429 1090
1430 if (!mlx4_is_slave(ibdev->dev)) { 1091 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc;
1431 ibdev->ib_dev.alloc_fmr = mlx4_ib_fmr_alloc; 1092 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr;
1432 ibdev->ib_dev.map_phys_fmr = mlx4_ib_map_phys_fmr; 1093 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr;
1433 ibdev->ib_dev.unmap_fmr = mlx4_ib_unmap_fmr; 1094 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
1434 ibdev->ib_dev.dealloc_fmr = mlx4_ib_fmr_dealloc;
1435 }
1436
1437 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) {
1438 ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
1439 ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
1440 ibdev->ib_dev.uverbs_cmd_mask |=
1441 (1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
1442 (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
1443 }
1444
1445 mlx4_ib_alloc_eqs(dev, ibdev);
1446 1095
1447 spin_lock_init(&iboe->lock); 1096 spin_lock_init(&iboe->lock);
1448 1097
@@ -1468,49 +1117,28 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
1468 if (mlx4_ib_mad_init(ibdev)) 1117 if (mlx4_ib_mad_init(ibdev))
1469 goto err_reg; 1118 goto err_reg;
1470 1119
1471 if (mlx4_ib_init_sriov(ibdev))
1472 goto err_mad;
1473
1474 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { 1120 if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
1475 iboe->nb.notifier_call = mlx4_ib_netdev_event; 1121 iboe->nb.notifier_call = mlx4_ib_netdev_event;
1476 err = register_netdevice_notifier(&iboe->nb); 1122 err = register_netdevice_notifier(&iboe->nb);
1477 if (err) 1123 if (err)
1478 goto err_sriov; 1124 goto err_reg;
1479 } 1125 }
1480 1126
1481 for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { 1127 for (i = 0; i < ARRAY_SIZE(mlx4_class_attributes); ++i) {
1482 if (device_create_file(&ibdev->ib_dev.dev, 1128 if (device_create_file(&ibdev->ib_dev.dev,
1483 mlx4_class_attributes[j])) 1129 mlx4_class_attributes[i]))
1484 goto err_notif; 1130 goto err_notif;
1485 } 1131 }
1486 1132
1487 ibdev->ib_active = true; 1133 ibdev->ib_active = true;
1488 1134
1489 if (mlx4_is_mfunc(ibdev->dev))
1490 init_pkeys(ibdev);
1491
1492 /* create paravirt contexts for any VFs which are active */
1493 if (mlx4_is_master(ibdev->dev)) {
1494 for (j = 0; j < MLX4_MFUNC_MAX; j++) {
1495 if (j == mlx4_master_func_num(ibdev->dev))
1496 continue;
1497 if (mlx4_is_slave_active(ibdev->dev, j))
1498 do_slave_init(ibdev, j, 1);
1499 }
1500 }
1501 return ibdev; 1135 return ibdev;
1502 1136
1503err_notif: 1137err_notif:
1504 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 1138 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1505 pr_warn("failure unregistering notifier\n"); 1139 printk(KERN_WARNING "failure unregistering notifier\n");
1506 flush_workqueue(wq); 1140 flush_workqueue(wq);
1507 1141
1508err_sriov:
1509 mlx4_ib_close_sriov(ibdev);
1510
1511err_mad:
1512 mlx4_ib_mad_cleanup(ibdev);
1513
1514err_reg: 1142err_reg:
1515 ib_unregister_device(&ibdev->ib_dev); 1143 ib_unregister_device(&ibdev->ib_dev);
1516 1144
@@ -1539,12 +1167,11 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1539 struct mlx4_ib_dev *ibdev = ibdev_ptr; 1167 struct mlx4_ib_dev *ibdev = ibdev_ptr;
1540 int p; 1168 int p;
1541 1169
1542 mlx4_ib_close_sriov(ibdev);
1543 mlx4_ib_mad_cleanup(ibdev); 1170 mlx4_ib_mad_cleanup(ibdev);
1544 ib_unregister_device(&ibdev->ib_dev); 1171 ib_unregister_device(&ibdev->ib_dev);
1545 if (ibdev->iboe.nb.notifier_call) { 1172 if (ibdev->iboe.nb.notifier_call) {
1546 if (unregister_netdevice_notifier(&ibdev->iboe.nb)) 1173 if (unregister_netdevice_notifier(&ibdev->iboe.nb))
1547 pr_warn("failure unregistering notifier\n"); 1174 printk(KERN_WARNING "failure unregistering notifier\n");
1548 ibdev->iboe.nb.notifier_call = NULL; 1175 ibdev->iboe.nb.notifier_call = NULL;
1549 } 1176 }
1550 iounmap(ibdev->uar_map); 1177 iounmap(ibdev->uar_map);
@@ -1554,87 +1181,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
1554 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) 1181 mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB)
1555 mlx4_CLOSE_PORT(dev, p); 1182 mlx4_CLOSE_PORT(dev, p);
1556 1183
1557 mlx4_ib_free_eqs(dev, ibdev);
1558
1559 mlx4_uar_free(dev, &ibdev->priv_uar); 1184 mlx4_uar_free(dev, &ibdev->priv_uar);
1560 mlx4_pd_free(dev, ibdev->priv_pdn); 1185 mlx4_pd_free(dev, ibdev->priv_pdn);
1561 ib_dealloc_device(&ibdev->ib_dev); 1186 ib_dealloc_device(&ibdev->ib_dev);
1562} 1187}
1563 1188
1564static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init)
1565{
1566 struct mlx4_ib_demux_work **dm = NULL;
1567 struct mlx4_dev *dev = ibdev->dev;
1568 int i;
1569 unsigned long flags;
1570
1571 if (!mlx4_is_master(dev))
1572 return;
1573
1574 dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC);
1575 if (!dm) {
1576 pr_err("failed to allocate memory for tunneling qp update\n");
1577 goto out;
1578 }
1579
1580 for (i = 0; i < dev->caps.num_ports; i++) {
1581 dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC);
1582 if (!dm[i]) {
1583 pr_err("failed to allocate memory for tunneling qp update work struct\n");
1584 for (i = 0; i < dev->caps.num_ports; i++) {
1585 if (dm[i])
1586 kfree(dm[i]);
1587 }
1588 goto out;
1589 }
1590 }
1591 /* initialize or tear down tunnel QPs for the slave */
1592 for (i = 0; i < dev->caps.num_ports; i++) {
1593 INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work);
1594 dm[i]->port = i + 1;
1595 dm[i]->slave = slave;
1596 dm[i]->do_init = do_init;
1597 dm[i]->dev = ibdev;
1598 spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags);
1599 if (!ibdev->sriov.is_going_down)
1600 queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work);
1601 spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags);
1602 }
1603out:
1604 if (dm)
1605 kfree(dm);
1606 return;
1607}
1608
1609static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, 1189static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1610 enum mlx4_dev_event event, unsigned long param) 1190 enum mlx4_dev_event event, int port)
1611{ 1191{
1612 struct ib_event ibev; 1192 struct ib_event ibev;
1613 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); 1193 struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr);
1614 struct mlx4_eqe *eqe = NULL;
1615 struct ib_event_work *ew;
1616 int p = 0;
1617 1194
1618 if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) 1195 if (port > ibdev->num_ports)
1619 eqe = (struct mlx4_eqe *)param; 1196 return;
1620 else
1621 p = (int) param;
1622 1197
1623 switch (event) { 1198 switch (event) {
1624 case MLX4_DEV_EVENT_PORT_UP: 1199 case MLX4_DEV_EVENT_PORT_UP:
1625 if (p > ibdev->num_ports)
1626 return;
1627 if (mlx4_is_master(dev) &&
1628 rdma_port_get_link_layer(&ibdev->ib_dev, p) ==
1629 IB_LINK_LAYER_INFINIBAND) {
1630 mlx4_ib_invalidate_all_guid_record(ibdev, p);
1631 }
1632 ibev.event = IB_EVENT_PORT_ACTIVE; 1200 ibev.event = IB_EVENT_PORT_ACTIVE;
1633 break; 1201 break;
1634 1202
1635 case MLX4_DEV_EVENT_PORT_DOWN: 1203 case MLX4_DEV_EVENT_PORT_DOWN:
1636 if (p > ibdev->num_ports)
1637 return;
1638 ibev.event = IB_EVENT_PORT_ERR; 1204 ibev.event = IB_EVENT_PORT_ERR;
1639 break; 1205 break;
1640 1206
@@ -1643,39 +1209,12 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
1643 ibev.event = IB_EVENT_DEVICE_FATAL; 1209 ibev.event = IB_EVENT_DEVICE_FATAL;
1644 break; 1210 break;
1645 1211
1646 case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
1647 ew = kmalloc(sizeof *ew, GFP_ATOMIC);
1648 if (!ew) {
1649 pr_err("failed to allocate memory for events work\n");
1650 break;
1651 }
1652
1653 INIT_WORK(&ew->work, handle_port_mgmt_change_event);
1654 memcpy(&ew->ib_eqe, eqe, sizeof *eqe);
1655 ew->ib_dev = ibdev;
1656 /* need to queue only for port owner, which uses GEN_EQE */
1657 if (mlx4_is_master(dev))
1658 queue_work(wq, &ew->work);
1659 else
1660 handle_port_mgmt_change_event(&ew->work);
1661 return;
1662
1663 case MLX4_DEV_EVENT_SLAVE_INIT:
1664 /* here, p is the slave id */
1665 do_slave_init(ibdev, p, 1);
1666 return;
1667
1668 case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
1669 /* here, p is the slave id */
1670 do_slave_init(ibdev, p, 0);
1671 return;
1672
1673 default: 1212 default:
1674 return; 1213 return;
1675 } 1214 }
1676 1215
1677 ibev.device = ibdev_ptr; 1216 ibev.device = ibdev_ptr;
1678 ibev.element.port_num = (u8) p; 1217 ibev.element.port_num = port;
1679 1218
1680 ib_dispatch_event(&ibev); 1219 ib_dispatch_event(&ibev);
1681} 1220}
@@ -1695,28 +1234,18 @@ static int __init mlx4_ib_init(void)
1695 if (!wq) 1234 if (!wq)
1696 return -ENOMEM; 1235 return -ENOMEM;
1697 1236
1698 err = mlx4_ib_mcg_init();
1699 if (err)
1700 goto clean_wq;
1701
1702 err = mlx4_register_interface(&mlx4_ib_interface); 1237 err = mlx4_register_interface(&mlx4_ib_interface);
1703 if (err) 1238 if (err) {
1704 goto clean_mcg; 1239 destroy_workqueue(wq);
1240 return err;
1241 }
1705 1242
1706 return 0; 1243 return 0;
1707
1708clean_mcg:
1709 mlx4_ib_mcg_destroy();
1710
1711clean_wq:
1712 destroy_workqueue(wq);
1713 return err;
1714} 1244}
1715 1245
1716static void __exit mlx4_ib_cleanup(void) 1246static void __exit mlx4_ib_cleanup(void)
1717{ 1247{
1718 mlx4_unregister_interface(&mlx4_ib_interface); 1248 mlx4_unregister_interface(&mlx4_ib_interface);
1719 mlx4_ib_mcg_destroy();
1720 destroy_workqueue(wq); 1249 destroy_workqueue(wq);
1721} 1250}
1722 1251
diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c
deleted file mode 100644
index 25b2cdff00f..00000000000
--- a/drivers/infiniband/hw/mlx4/mcg.c
+++ /dev/null
@@ -1,1256 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <rdma/ib_mad.h>
34#include <rdma/ib_smi.h>
35#include <rdma/ib_cache.h>
36#include <rdma/ib_sa.h>
37
38#include <linux/mlx4/cmd.h>
39#include <linux/rbtree.h>
40#include <linux/delay.h>
41
42#include "mlx4_ib.h"
43
44#define MAX_VFS 80
45#define MAX_PEND_REQS_PER_FUNC 4
46#define MAD_TIMEOUT_MS 2000
47
48#define mcg_warn(fmt, arg...) pr_warn("MCG WARNING: " fmt, ##arg)
49#define mcg_error(fmt, arg...) pr_err(fmt, ##arg)
50#define mcg_warn_group(group, format, arg...) \
51 pr_warn("%s-%d: %16s (port %d): WARNING: " format, __func__, __LINE__,\
52 (group)->name, group->demux->port, ## arg)
53
54#define mcg_error_group(group, format, arg...) \
55 pr_err(" %16s: " format, (group)->name, ## arg)
56
57
58static union ib_gid mgid0;
59
60static struct workqueue_struct *clean_wq;
61
62enum mcast_state {
63 MCAST_NOT_MEMBER = 0,
64 MCAST_MEMBER,
65};
66
67enum mcast_group_state {
68 MCAST_IDLE,
69 MCAST_JOIN_SENT,
70 MCAST_LEAVE_SENT,
71 MCAST_RESP_READY
72};
73
74struct mcast_member {
75 enum mcast_state state;
76 uint8_t join_state;
77 int num_pend_reqs;
78 struct list_head pending;
79};
80
81struct ib_sa_mcmember_data {
82 union ib_gid mgid;
83 union ib_gid port_gid;
84 __be32 qkey;
85 __be16 mlid;
86 u8 mtusel_mtu;
87 u8 tclass;
88 __be16 pkey;
89 u8 ratesel_rate;
90 u8 lifetmsel_lifetm;
91 __be32 sl_flowlabel_hoplimit;
92 u8 scope_join_state;
93 u8 proxy_join;
94 u8 reserved[2];
95};
96
97struct mcast_group {
98 struct ib_sa_mcmember_data rec;
99 struct rb_node node;
100 struct list_head mgid0_list;
101 struct mlx4_ib_demux_ctx *demux;
102 struct mcast_member func[MAX_VFS];
103 struct mutex lock;
104 struct work_struct work;
105 struct list_head pending_list;
106 int members[3];
107 enum mcast_group_state state;
108 enum mcast_group_state prev_state;
109 struct ib_sa_mad response_sa_mad;
110 __be64 last_req_tid;
111
112 char name[33]; /* MGID string */
113 struct device_attribute dentry;
114
115 /* refcount is the reference count for the following:
116 1. Each queued request
117 2. Each invocation of the worker thread
118 3. Membership of the port at the SA
119 */
120 atomic_t refcount;
121
122 /* delayed work to clean pending SM request */
123 struct delayed_work timeout_work;
124 struct list_head cleanup_list;
125};
126
127struct mcast_req {
128 int func;
129 struct ib_sa_mad sa_mad;
130 struct list_head group_list;
131 struct list_head func_list;
132 struct mcast_group *group;
133 int clean;
134};
135
136
137#define safe_atomic_dec(ref) \
138 do {\
139 if (atomic_dec_and_test(ref)) \
140 mcg_warn_group(group, "did not expect to reach zero\n"); \
141 } while (0)
142
143static const char *get_state_string(enum mcast_group_state state)
144{
145 switch (state) {
146 case MCAST_IDLE:
147 return "MCAST_IDLE";
148 case MCAST_JOIN_SENT:
149 return "MCAST_JOIN_SENT";
150 case MCAST_LEAVE_SENT:
151 return "MCAST_LEAVE_SENT";
152 case MCAST_RESP_READY:
153 return "MCAST_RESP_READY";
154 }
155 return "Invalid State";
156}
157
158static struct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx,
159 union ib_gid *mgid)
160{
161 struct rb_node *node = ctx->mcg_table.rb_node;
162 struct mcast_group *group;
163 int ret;
164
165 while (node) {
166 group = rb_entry(node, struct mcast_group, node);
167 ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid);
168 if (!ret)
169 return group;
170
171 if (ret < 0)
172 node = node->rb_left;
173 else
174 node = node->rb_right;
175 }
176 return NULL;
177}
178
179static struct mcast_group *mcast_insert(struct mlx4_ib_demux_ctx *ctx,
180 struct mcast_group *group)
181{
182 struct rb_node **link = &ctx->mcg_table.rb_node;
183 struct rb_node *parent = NULL;
184 struct mcast_group *cur_group;
185 int ret;
186
187 while (*link) {
188 parent = *link;
189 cur_group = rb_entry(parent, struct mcast_group, node);
190
191 ret = memcmp(group->rec.mgid.raw, cur_group->rec.mgid.raw,
192 sizeof group->rec.mgid);
193 if (ret < 0)
194 link = &(*link)->rb_left;
195 else if (ret > 0)
196 link = &(*link)->rb_right;
197 else
198 return cur_group;
199 }
200 rb_link_node(&group->node, parent, link);
201 rb_insert_color(&group->node, &ctx->mcg_table);
202 return NULL;
203}
204
205static int send_mad_to_wire(struct mlx4_ib_demux_ctx *ctx, struct ib_mad *mad)
206{
207 struct mlx4_ib_dev *dev = ctx->dev;
208 struct ib_ah_attr ah_attr;
209
210 spin_lock(&dev->sm_lock);
211 if (!dev->sm_ah[ctx->port - 1]) {
212 /* port is not yet Active, sm_ah not ready */
213 spin_unlock(&dev->sm_lock);
214 return -EAGAIN;
215 }
216 mlx4_ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
217 spin_unlock(&dev->sm_lock);
218 return mlx4_ib_send_to_wire(dev, mlx4_master_func_num(dev->dev), ctx->port,
219 IB_QPT_GSI, 0, 1, IB_QP1_QKEY, &ah_attr, mad);
220}
221
222static int send_mad_to_slave(int slave, struct mlx4_ib_demux_ctx *ctx,
223 struct ib_mad *mad)
224{
225 struct mlx4_ib_dev *dev = ctx->dev;
226 struct ib_mad_agent *agent = dev->send_agent[ctx->port - 1][1];
227 struct ib_wc wc;
228 struct ib_ah_attr ah_attr;
229
230 /* Our agent might not yet be registered when mads start to arrive */
231 if (!agent)
232 return -EAGAIN;
233
234 ib_query_ah(dev->sm_ah[ctx->port - 1], &ah_attr);
235
236 if (ib_find_cached_pkey(&dev->ib_dev, ctx->port, IB_DEFAULT_PKEY_FULL, &wc.pkey_index))
237 return -EINVAL;
238 wc.sl = 0;
239 wc.dlid_path_bits = 0;
240 wc.port_num = ctx->port;
241 wc.slid = ah_attr.dlid; /* opensm lid */
242 wc.src_qp = 1;
243 return mlx4_ib_send_to_slave(dev, slave, ctx->port, IB_QPT_GSI, &wc, NULL, mad);
244}
245
246static int send_join_to_wire(struct mcast_group *group, struct ib_sa_mad *sa_mad)
247{
248 struct ib_sa_mad mad;
249 struct ib_sa_mcmember_data *sa_mad_data = (struct ib_sa_mcmember_data *)&mad.data;
250 int ret;
251
252 /* we rely on a mad request as arrived from a VF */
253 memcpy(&mad, sa_mad, sizeof mad);
254
255 /* fix port GID to be the real one (slave 0) */
256 sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
257
258 /* assign our own TID */
259 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
260 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
261
262 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
263 /* set timeout handler */
264 if (!ret) {
265 /* calls mlx4_ib_mcg_timeout_handler */
266 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
267 msecs_to_jiffies(MAD_TIMEOUT_MS));
268 }
269
270 return ret;
271}
272
273static int send_leave_to_wire(struct mcast_group *group, u8 join_state)
274{
275 struct ib_sa_mad mad;
276 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
277 int ret;
278
279 memset(&mad, 0, sizeof mad);
280 mad.mad_hdr.base_version = 1;
281 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
282 mad.mad_hdr.class_version = 2;
283 mad.mad_hdr.method = IB_SA_METHOD_DELETE;
284 mad.mad_hdr.status = cpu_to_be16(0);
285 mad.mad_hdr.class_specific = cpu_to_be16(0);
286 mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
287 group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
288 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
289 mad.mad_hdr.attr_mod = cpu_to_be32(0);
290 mad.sa_hdr.sm_key = 0x0;
291 mad.sa_hdr.attr_offset = cpu_to_be16(7);
292 mad.sa_hdr.comp_mask = IB_SA_MCMEMBER_REC_MGID |
293 IB_SA_MCMEMBER_REC_PORT_GID | IB_SA_MCMEMBER_REC_JOIN_STATE;
294
295 *sa_data = group->rec;
296 sa_data->scope_join_state = join_state;
297
298 ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad);
299 if (ret)
300 group->state = MCAST_IDLE;
301
302 /* set timeout handler */
303 if (!ret) {
304 /* calls mlx4_ib_mcg_timeout_handler */
305 queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
306 msecs_to_jiffies(MAD_TIMEOUT_MS));
307 }
308
309 return ret;
310}
311
312static int send_reply_to_slave(int slave, struct mcast_group *group,
313 struct ib_sa_mad *req_sa_mad, u16 status)
314{
315 struct ib_sa_mad mad;
316 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)&mad.data;
317 struct ib_sa_mcmember_data *req_sa_data = (struct ib_sa_mcmember_data *)&req_sa_mad->data;
318 int ret;
319
320 memset(&mad, 0, sizeof mad);
321 mad.mad_hdr.base_version = 1;
322 mad.mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
323 mad.mad_hdr.class_version = 2;
324 mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
325 mad.mad_hdr.status = cpu_to_be16(status);
326 mad.mad_hdr.class_specific = cpu_to_be16(0);
327 mad.mad_hdr.tid = req_sa_mad->mad_hdr.tid;
328 *(u8 *)&mad.mad_hdr.tid = 0; /* resetting tid to 0 */
329 mad.mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
330 mad.mad_hdr.attr_mod = cpu_to_be32(0);
331 mad.sa_hdr.sm_key = req_sa_mad->sa_hdr.sm_key;
332 mad.sa_hdr.attr_offset = cpu_to_be16(7);
333 mad.sa_hdr.comp_mask = 0; /* ignored on responses, see IBTA spec */
334
335 *sa_data = group->rec;
336
337 /* reconstruct VF's requested join_state and port_gid */
338 sa_data->scope_join_state &= 0xf0;
339 sa_data->scope_join_state |= (group->func[slave].join_state & 0x0f);
340 memcpy(&sa_data->port_gid, &req_sa_data->port_gid, sizeof req_sa_data->port_gid);
341
342 ret = send_mad_to_slave(slave, group->demux, (struct ib_mad *)&mad);
343 return ret;
344}
345
346static int check_selector(ib_sa_comp_mask comp_mask,
347 ib_sa_comp_mask selector_mask,
348 ib_sa_comp_mask value_mask,
349 u8 src_value, u8 dst_value)
350{
351 int err;
352 u8 selector = dst_value >> 6;
353 dst_value &= 0x3f;
354 src_value &= 0x3f;
355
356 if (!(comp_mask & selector_mask) || !(comp_mask & value_mask))
357 return 0;
358
359 switch (selector) {
360 case IB_SA_GT:
361 err = (src_value <= dst_value);
362 break;
363 case IB_SA_LT:
364 err = (src_value >= dst_value);
365 break;
366 case IB_SA_EQ:
367 err = (src_value != dst_value);
368 break;
369 default:
370 err = 0;
371 break;
372 }
373
374 return err;
375}
376
377static u16 cmp_rec(struct ib_sa_mcmember_data *src,
378 struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
379{
380 /* src is group record, dst is request record */
381 /* MGID must already match */
382 /* Port_GID we always replace to our Port_GID, so it is a match */
383
384#define MAD_STATUS_REQ_INVALID 0x0200
385 if (comp_mask & IB_SA_MCMEMBER_REC_QKEY && src->qkey != dst->qkey)
386 return MAD_STATUS_REQ_INVALID;
387 if (comp_mask & IB_SA_MCMEMBER_REC_MLID && src->mlid != dst->mlid)
388 return MAD_STATUS_REQ_INVALID;
389 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_MTU_SELECTOR,
390 IB_SA_MCMEMBER_REC_MTU,
391 src->mtusel_mtu, dst->mtusel_mtu))
392 return MAD_STATUS_REQ_INVALID;
393 if (comp_mask & IB_SA_MCMEMBER_REC_TRAFFIC_CLASS &&
394 src->tclass != dst->tclass)
395 return MAD_STATUS_REQ_INVALID;
396 if (comp_mask & IB_SA_MCMEMBER_REC_PKEY && src->pkey != dst->pkey)
397 return MAD_STATUS_REQ_INVALID;
398 if (check_selector(comp_mask, IB_SA_MCMEMBER_REC_RATE_SELECTOR,
399 IB_SA_MCMEMBER_REC_RATE,
400 src->ratesel_rate, dst->ratesel_rate))
401 return MAD_STATUS_REQ_INVALID;
402 if (check_selector(comp_mask,
403 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR,
404 IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME,
405 src->lifetmsel_lifetm, dst->lifetmsel_lifetm))
406 return MAD_STATUS_REQ_INVALID;
407 if (comp_mask & IB_SA_MCMEMBER_REC_SL &&
408 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0xf0000000) !=
409 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0xf0000000))
410 return MAD_STATUS_REQ_INVALID;
411 if (comp_mask & IB_SA_MCMEMBER_REC_FLOW_LABEL &&
412 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x0fffff00) !=
413 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x0fffff00))
414 return MAD_STATUS_REQ_INVALID;
415 if (comp_mask & IB_SA_MCMEMBER_REC_HOP_LIMIT &&
416 (be32_to_cpu(src->sl_flowlabel_hoplimit) & 0x000000ff) !=
417 (be32_to_cpu(dst->sl_flowlabel_hoplimit) & 0x000000ff))
418 return MAD_STATUS_REQ_INVALID;
419 if (comp_mask & IB_SA_MCMEMBER_REC_SCOPE &&
420 (src->scope_join_state & 0xf0) !=
421 (dst->scope_join_state & 0xf0))
422 return MAD_STATUS_REQ_INVALID;
423
424 /* join_state checked separately, proxy_join ignored */
425
426 return 0;
427}
428
429/* release group, return 1 if this was last release and group is destroyed
430 * timout work is canceled sync */
431static int release_group(struct mcast_group *group, int from_timeout_handler)
432{
433 struct mlx4_ib_demux_ctx *ctx = group->demux;
434 int nzgroup;
435
436 mutex_lock(&ctx->mcg_table_lock);
437 mutex_lock(&group->lock);
438 if (atomic_dec_and_test(&group->refcount)) {
439 if (!from_timeout_handler) {
440 if (group->state != MCAST_IDLE &&
441 !cancel_delayed_work(&group->timeout_work)) {
442 atomic_inc(&group->refcount);
443 mutex_unlock(&group->lock);
444 mutex_unlock(&ctx->mcg_table_lock);
445 return 0;
446 }
447 }
448
449 nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0);
450 if (nzgroup)
451 del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
452 if (!list_empty(&group->pending_list))
453 mcg_warn_group(group, "releasing a group with non empty pending list\n");
454 if (nzgroup)
455 rb_erase(&group->node, &ctx->mcg_table);
456 list_del_init(&group->mgid0_list);
457 mutex_unlock(&group->lock);
458 mutex_unlock(&ctx->mcg_table_lock);
459 kfree(group);
460 return 1;
461 } else {
462 mutex_unlock(&group->lock);
463 mutex_unlock(&ctx->mcg_table_lock);
464 }
465 return 0;
466}
467
468static void adjust_membership(struct mcast_group *group, u8 join_state, int inc)
469{
470 int i;
471
472 for (i = 0; i < 3; i++, join_state >>= 1)
473 if (join_state & 0x1)
474 group->members[i] += inc;
475}
476
477static u8 get_leave_state(struct mcast_group *group)
478{
479 u8 leave_state = 0;
480 int i;
481
482 for (i = 0; i < 3; i++)
483 if (!group->members[i])
484 leave_state |= (1 << i);
485
486 return leave_state & (group->rec.scope_join_state & 7);
487}
488
489static int join_group(struct mcast_group *group, int slave, u8 join_mask)
490{
491 int ret = 0;
492 u8 join_state;
493
494 /* remove bits that slave is already member of, and adjust */
495 join_state = join_mask & (~group->func[slave].join_state);
496 adjust_membership(group, join_state, 1);
497 group->func[slave].join_state |= join_state;
498 if (group->func[slave].state != MCAST_MEMBER && join_state) {
499 group->func[slave].state = MCAST_MEMBER;
500 ret = 1;
501 }
502 return ret;
503}
504
505static int leave_group(struct mcast_group *group, int slave, u8 leave_state)
506{
507 int ret = 0;
508
509 adjust_membership(group, leave_state, -1);
510 group->func[slave].join_state &= ~leave_state;
511 if (!group->func[slave].join_state) {
512 group->func[slave].state = MCAST_NOT_MEMBER;
513 ret = 1;
514 }
515 return ret;
516}
517
518static int check_leave(struct mcast_group *group, int slave, u8 leave_mask)
519{
520 if (group->func[slave].state != MCAST_MEMBER)
521 return MAD_STATUS_REQ_INVALID;
522
523 /* make sure we're not deleting unset bits */
524 if (~group->func[slave].join_state & leave_mask)
525 return MAD_STATUS_REQ_INVALID;
526
527 if (!leave_mask)
528 return MAD_STATUS_REQ_INVALID;
529
530 return 0;
531}
532
533static void mlx4_ib_mcg_timeout_handler(struct work_struct *work)
534{
535 struct delayed_work *delay = to_delayed_work(work);
536 struct mcast_group *group;
537 struct mcast_req *req = NULL;
538
539 group = container_of(delay, typeof(*group), timeout_work);
540
541 mutex_lock(&group->lock);
542 if (group->state == MCAST_JOIN_SENT) {
543 if (!list_empty(&group->pending_list)) {
544 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
545 list_del(&req->group_list);
546 list_del(&req->func_list);
547 --group->func[req->func].num_pend_reqs;
548 mutex_unlock(&group->lock);
549 kfree(req);
550 if (memcmp(&group->rec.mgid, &mgid0, sizeof mgid0)) {
551 if (release_group(group, 1))
552 return;
553 } else {
554 kfree(group);
555 return;
556 }
557 mutex_lock(&group->lock);
558 } else
559 mcg_warn_group(group, "DRIVER BUG\n");
560 } else if (group->state == MCAST_LEAVE_SENT) {
561 if (group->rec.scope_join_state & 7)
562 group->rec.scope_join_state &= 0xf8;
563 group->state = MCAST_IDLE;
564 mutex_unlock(&group->lock);
565 if (release_group(group, 1))
566 return;
567 mutex_lock(&group->lock);
568 } else
569 mcg_warn_group(group, "invalid state %s\n", get_state_string(group->state));
570 group->state = MCAST_IDLE;
571 atomic_inc(&group->refcount);
572 if (!queue_work(group->demux->mcg_wq, &group->work))
573 safe_atomic_dec(&group->refcount);
574
575 mutex_unlock(&group->lock);
576}
577
578static int handle_leave_req(struct mcast_group *group, u8 leave_mask,
579 struct mcast_req *req)
580{
581 u16 status;
582
583 if (req->clean)
584 leave_mask = group->func[req->func].join_state;
585
586 status = check_leave(group, req->func, leave_mask);
587 if (!status)
588 leave_group(group, req->func, leave_mask);
589
590 if (!req->clean)
591 send_reply_to_slave(req->func, group, &req->sa_mad, status);
592 --group->func[req->func].num_pend_reqs;
593 list_del(&req->group_list);
594 list_del(&req->func_list);
595 kfree(req);
596 return 1;
597}
598
599static int handle_join_req(struct mcast_group *group, u8 join_mask,
600 struct mcast_req *req)
601{
602 u8 group_join_state = group->rec.scope_join_state & 7;
603 int ref = 0;
604 u16 status;
605 struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
606
607 if (join_mask == (group_join_state & join_mask)) {
608 /* port's membership need not change */
609 status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask);
610 if (!status)
611 join_group(group, req->func, join_mask);
612
613 --group->func[req->func].num_pend_reqs;
614 send_reply_to_slave(req->func, group, &req->sa_mad, status);
615 list_del(&req->group_list);
616 list_del(&req->func_list);
617 kfree(req);
618 ++ref;
619 } else {
620 /* port's membership needs to be updated */
621 group->prev_state = group->state;
622 if (send_join_to_wire(group, &req->sa_mad)) {
623 --group->func[req->func].num_pend_reqs;
624 list_del(&req->group_list);
625 list_del(&req->func_list);
626 kfree(req);
627 ref = 1;
628 group->state = group->prev_state;
629 } else
630 group->state = MCAST_JOIN_SENT;
631 }
632
633 return ref;
634}
635
636static void mlx4_ib_mcg_work_handler(struct work_struct *work)
637{
638 struct mcast_group *group;
639 struct mcast_req *req = NULL;
640 struct ib_sa_mcmember_data *sa_data;
641 u8 req_join_state;
642 int rc = 1; /* release_count - this is for the scheduled work */
643 u16 status;
644 u8 method;
645
646 group = container_of(work, typeof(*group), work);
647
648 mutex_lock(&group->lock);
649
650 /* First, let's see if a response from SM is waiting regarding this group.
651 * If so, we need to update the group's REC. If this is a bad response, we
652 * may need to send a bad response to a VF waiting for it. If VF is waiting
653 * and this is a good response, the VF will be answered later in this func. */
654 if (group->state == MCAST_RESP_READY) {
655 /* cancels mlx4_ib_mcg_timeout_handler */
656 cancel_delayed_work(&group->timeout_work);
657 status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
658 method = group->response_sa_mad.mad_hdr.method;
659 if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
660 mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
661 be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
662 be64_to_cpu(group->last_req_tid));
663 group->state = group->prev_state;
664 goto process_requests;
665 }
666 if (status) {
667 if (!list_empty(&group->pending_list))
668 req = list_first_entry(&group->pending_list,
669 struct mcast_req, group_list);
670 if ((method == IB_MGMT_METHOD_GET_RESP)) {
671 if (req) {
672 send_reply_to_slave(req->func, group, &req->sa_mad, status);
673 --group->func[req->func].num_pend_reqs;
674 list_del(&req->group_list);
675 list_del(&req->func_list);
676 kfree(req);
677 ++rc;
678 } else
679 mcg_warn_group(group, "no request for failed join\n");
680 } else if (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
681 ++rc;
682 } else {
683 u8 resp_join_state;
684 u8 cur_join_state;
685
686 resp_join_state = ((struct ib_sa_mcmember_data *)
687 group->response_sa_mad.data)->scope_join_state & 7;
688 cur_join_state = group->rec.scope_join_state & 7;
689
690 if (method == IB_MGMT_METHOD_GET_RESP) {
691 /* successfull join */
692 if (!cur_join_state && resp_join_state)
693 --rc;
694 } else if (!resp_join_state)
695 ++rc;
696 memcpy(&group->rec, group->response_sa_mad.data, sizeof group->rec);
697 }
698 group->state = MCAST_IDLE;
699 }
700
701process_requests:
702 /* We should now go over pending join/leave requests, as long as we are idle. */
703 while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
704 req = list_first_entry(&group->pending_list, struct mcast_req,
705 group_list);
706 sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
707 req_join_state = sa_data->scope_join_state & 0x7;
708
709 /* For a leave request, we will immediately answer the VF, and
710 * update our internal counters. The actual leave will be sent
711 * to SM later, if at all needed. We dequeue the request now. */
712 if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
713 rc += handle_leave_req(group, req_join_state, req);
714 else
715 rc += handle_join_req(group, req_join_state, req);
716 }
717
718 /* Handle leaves */
719 if (group->state == MCAST_IDLE) {
720 req_join_state = get_leave_state(group);
721 if (req_join_state) {
722 group->rec.scope_join_state &= ~req_join_state;
723 group->prev_state = group->state;
724 if (send_leave_to_wire(group, req_join_state)) {
725 group->state = group->prev_state;
726 ++rc;
727 } else
728 group->state = MCAST_LEAVE_SENT;
729 }
730 }
731
732 if (!list_empty(&group->pending_list) && group->state == MCAST_IDLE)
733 goto process_requests;
734 mutex_unlock(&group->lock);
735
736 while (rc--)
737 release_group(group, 0);
738}
739
740static struct mcast_group *search_relocate_mgid0_group(struct mlx4_ib_demux_ctx *ctx,
741 __be64 tid,
742 union ib_gid *new_mgid)
743{
744 struct mcast_group *group = NULL, *cur_group;
745 struct mcast_req *req;
746 struct list_head *pos;
747 struct list_head *n;
748
749 mutex_lock(&ctx->mcg_table_lock);
750 list_for_each_safe(pos, n, &ctx->mcg_mgid0_list) {
751 group = list_entry(pos, struct mcast_group, mgid0_list);
752 mutex_lock(&group->lock);
753 if (group->last_req_tid == tid) {
754 if (memcmp(new_mgid, &mgid0, sizeof mgid0)) {
755 group->rec.mgid = *new_mgid;
756 sprintf(group->name, "%016llx%016llx",
757 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
758 be64_to_cpu(group->rec.mgid.global.interface_id));
759 list_del_init(&group->mgid0_list);
760 cur_group = mcast_insert(ctx, group);
761 if (cur_group) {
762 /* A race between our code and SM. Silently cleaning the new one */
763 req = list_first_entry(&group->pending_list,
764 struct mcast_req, group_list);
765 --group->func[req->func].num_pend_reqs;
766 list_del(&req->group_list);
767 list_del(&req->func_list);
768 kfree(req);
769 mutex_unlock(&group->lock);
770 mutex_unlock(&ctx->mcg_table_lock);
771 release_group(group, 0);
772 return NULL;
773 }
774
775 atomic_inc(&group->refcount);
776 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
777 mutex_unlock(&group->lock);
778 mutex_unlock(&ctx->mcg_table_lock);
779 return group;
780 } else {
781 struct mcast_req *tmp1, *tmp2;
782
783 list_del(&group->mgid0_list);
784 if (!list_empty(&group->pending_list) && group->state != MCAST_IDLE)
785 cancel_delayed_work_sync(&group->timeout_work);
786
787 list_for_each_entry_safe(tmp1, tmp2, &group->pending_list, group_list) {
788 list_del(&tmp1->group_list);
789 kfree(tmp1);
790 }
791 mutex_unlock(&group->lock);
792 mutex_unlock(&ctx->mcg_table_lock);
793 kfree(group);
794 return NULL;
795 }
796 }
797 mutex_unlock(&group->lock);
798 }
799 mutex_unlock(&ctx->mcg_table_lock);
800
801 return NULL;
802}
803
804static ssize_t sysfs_show_group(struct device *dev,
805 struct device_attribute *attr, char *buf);
806
807static struct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx,
808 union ib_gid *mgid, int create,
809 gfp_t gfp_mask)
810{
811 struct mcast_group *group, *cur_group;
812 int is_mgid0;
813 int i;
814
815 is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0);
816 if (!is_mgid0) {
817 group = mcast_find(ctx, mgid);
818 if (group)
819 goto found;
820 }
821
822 if (!create)
823 return ERR_PTR(-ENOENT);
824
825 group = kzalloc(sizeof *group, gfp_mask);
826 if (!group)
827 return ERR_PTR(-ENOMEM);
828
829 group->demux = ctx;
830 group->rec.mgid = *mgid;
831 INIT_LIST_HEAD(&group->pending_list);
832 INIT_LIST_HEAD(&group->mgid0_list);
833 for (i = 0; i < MAX_VFS; ++i)
834 INIT_LIST_HEAD(&group->func[i].pending);
835 INIT_WORK(&group->work, mlx4_ib_mcg_work_handler);
836 INIT_DELAYED_WORK(&group->timeout_work, mlx4_ib_mcg_timeout_handler);
837 mutex_init(&group->lock);
838 sprintf(group->name, "%016llx%016llx",
839 be64_to_cpu(group->rec.mgid.global.subnet_prefix),
840 be64_to_cpu(group->rec.mgid.global.interface_id));
841 sysfs_attr_init(&group->dentry.attr);
842 group->dentry.show = sysfs_show_group;
843 group->dentry.store = NULL;
844 group->dentry.attr.name = group->name;
845 group->dentry.attr.mode = 0400;
846 group->state = MCAST_IDLE;
847
848 if (is_mgid0) {
849 list_add(&group->mgid0_list, &ctx->mcg_mgid0_list);
850 goto found;
851 }
852
853 cur_group = mcast_insert(ctx, group);
854 if (cur_group) {
855 mcg_warn("group just showed up %s - confused\n", cur_group->name);
856 kfree(group);
857 return ERR_PTR(-EINVAL);
858 }
859
860 add_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr);
861
862found:
863 atomic_inc(&group->refcount);
864 return group;
865}
866
867static void queue_req(struct mcast_req *req)
868{
869 struct mcast_group *group = req->group;
870
871 atomic_inc(&group->refcount); /* for the request */
872 atomic_inc(&group->refcount); /* for scheduling the work */
873 list_add_tail(&req->group_list, &group->pending_list);
874 list_add_tail(&req->func_list, &group->func[req->func].pending);
875 /* calls mlx4_ib_mcg_work_handler */
876 if (!queue_work(group->demux->mcg_wq, &group->work))
877 safe_atomic_dec(&group->refcount);
878}
879
880int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
881 struct ib_sa_mad *mad)
882{
883 struct mlx4_ib_dev *dev = to_mdev(ibdev);
884 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)mad->data;
885 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
886 struct mcast_group *group;
887
888 switch (mad->mad_hdr.method) {
889 case IB_MGMT_METHOD_GET_RESP:
890 case IB_SA_METHOD_DELETE_RESP:
891 mutex_lock(&ctx->mcg_table_lock);
892 group = acquire_group(ctx, &rec->mgid, 0, GFP_KERNEL);
893 mutex_unlock(&ctx->mcg_table_lock);
894 if (IS_ERR(group)) {
895 if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
896 __be64 tid = mad->mad_hdr.tid;
897 *(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
898 group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
899 } else
900 group = NULL;
901 }
902
903 if (!group)
904 return 1;
905
906 mutex_lock(&group->lock);
907 group->response_sa_mad = *mad;
908 group->prev_state = group->state;
909 group->state = MCAST_RESP_READY;
910 /* calls mlx4_ib_mcg_work_handler */
911 atomic_inc(&group->refcount);
912 if (!queue_work(ctx->mcg_wq, &group->work))
913 safe_atomic_dec(&group->refcount);
914 mutex_unlock(&group->lock);
915 release_group(group, 0);
916 return 1; /* consumed */
917 case IB_MGMT_METHOD_SET:
918 case IB_SA_METHOD_GET_TABLE:
919 case IB_SA_METHOD_GET_TABLE_RESP:
920 case IB_SA_METHOD_DELETE:
921 return 0; /* not consumed, pass-through to guest over tunnel */
922 default:
923 mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
924 port, mad->mad_hdr.method);
925 return 1; /* consumed */
926 }
927}
928
929int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port,
930 int slave, struct ib_sa_mad *sa_mad)
931{
932 struct mlx4_ib_dev *dev = to_mdev(ibdev);
933 struct ib_sa_mcmember_data *rec = (struct ib_sa_mcmember_data *)sa_mad->data;
934 struct mlx4_ib_demux_ctx *ctx = &dev->sriov.demux[port - 1];
935 struct mcast_group *group;
936 struct mcast_req *req;
937 int may_create = 0;
938
939 if (ctx->flushing)
940 return -EAGAIN;
941
942 switch (sa_mad->mad_hdr.method) {
943 case IB_MGMT_METHOD_SET:
944 may_create = 1;
945 case IB_SA_METHOD_DELETE:
946 req = kzalloc(sizeof *req, GFP_KERNEL);
947 if (!req)
948 return -ENOMEM;
949
950 req->func = slave;
951 req->sa_mad = *sa_mad;
952
953 mutex_lock(&ctx->mcg_table_lock);
954 group = acquire_group(ctx, &rec->mgid, may_create, GFP_KERNEL);
955 mutex_unlock(&ctx->mcg_table_lock);
956 if (IS_ERR(group)) {
957 kfree(req);
958 return PTR_ERR(group);
959 }
960 mutex_lock(&group->lock);
961 if (group->func[slave].num_pend_reqs > MAX_PEND_REQS_PER_FUNC) {
962 mutex_unlock(&group->lock);
963 mcg_warn_group(group, "Port %d, Func %d has too many pending requests (%d), dropping\n",
964 port, slave, MAX_PEND_REQS_PER_FUNC);
965 release_group(group, 0);
966 kfree(req);
967 return -ENOMEM;
968 }
969 ++group->func[slave].num_pend_reqs;
970 req->group = group;
971 queue_req(req);
972 mutex_unlock(&group->lock);
973 release_group(group, 0);
974 return 1; /* consumed */
975 case IB_SA_METHOD_GET_TABLE:
976 case IB_MGMT_METHOD_GET_RESP:
977 case IB_SA_METHOD_GET_TABLE_RESP:
978 case IB_SA_METHOD_DELETE_RESP:
979 return 0; /* not consumed, pass-through */
980 default:
981 mcg_warn("In multiplex, port %d, func %d: unexpected MCMember method: 0x%x, dropping\n",
982 port, slave, sa_mad->mad_hdr.method);
983 return 1; /* consumed */
984 }
985}
986
987static ssize_t sysfs_show_group(struct device *dev,
988 struct device_attribute *attr, char *buf)
989{
990 struct mcast_group *group =
991 container_of(attr, struct mcast_group, dentry);
992 struct mcast_req *req = NULL;
993 char pending_str[40];
994 char state_str[40];
995 ssize_t len = 0;
996 int f;
997
998 if (group->state == MCAST_IDLE)
999 sprintf(state_str, "%s", get_state_string(group->state));
1000 else
1001 sprintf(state_str, "%s(TID=0x%llx)",
1002 get_state_string(group->state),
1003 be64_to_cpu(group->last_req_tid));
1004 if (list_empty(&group->pending_list)) {
1005 sprintf(pending_str, "No");
1006 } else {
1007 req = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1008 sprintf(pending_str, "Yes(TID=0x%llx)",
1009 be64_to_cpu(req->sa_mad.mad_hdr.tid));
1010 }
1011 len += sprintf(buf + len, "%1d [%02d,%02d,%02d] %4d %4s %5s ",
1012 group->rec.scope_join_state & 0xf,
1013 group->members[2], group->members[1], group->members[0],
1014 atomic_read(&group->refcount),
1015 pending_str,
1016 state_str);
1017 for (f = 0; f < MAX_VFS; ++f)
1018 if (group->func[f].state == MCAST_MEMBER)
1019 len += sprintf(buf + len, "%d[%1x] ",
1020 f, group->func[f].join_state);
1021
1022 len += sprintf(buf + len, "\t\t(%4hx %4x %2x %2x %2x %2x %2x "
1023 "%4x %4x %2x %2x)\n",
1024 be16_to_cpu(group->rec.pkey),
1025 be32_to_cpu(group->rec.qkey),
1026 (group->rec.mtusel_mtu & 0xc0) >> 6,
1027 group->rec.mtusel_mtu & 0x3f,
1028 group->rec.tclass,
1029 (group->rec.ratesel_rate & 0xc0) >> 6,
1030 group->rec.ratesel_rate & 0x3f,
1031 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0xf0000000) >> 28,
1032 (be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x0fffff00) >> 8,
1033 be32_to_cpu(group->rec.sl_flowlabel_hoplimit) & 0x000000ff,
1034 group->rec.proxy_join);
1035
1036 return len;
1037}
1038
1039int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx)
1040{
1041 char name[20];
1042
1043 atomic_set(&ctx->tid, 0);
1044 sprintf(name, "mlx4_ib_mcg%d", ctx->port);
1045 ctx->mcg_wq = create_singlethread_workqueue(name);
1046 if (!ctx->mcg_wq)
1047 return -ENOMEM;
1048
1049 mutex_init(&ctx->mcg_table_lock);
1050 ctx->mcg_table = RB_ROOT;
1051 INIT_LIST_HEAD(&ctx->mcg_mgid0_list);
1052 ctx->flushing = 0;
1053
1054 return 0;
1055}
1056
1057static void force_clean_group(struct mcast_group *group)
1058{
1059 struct mcast_req *req, *tmp
1060 ;
1061 list_for_each_entry_safe(req, tmp, &group->pending_list, group_list) {
1062 list_del(&req->group_list);
1063 kfree(req);
1064 }
1065 del_sysfs_port_mcg_attr(group->demux->dev, group->demux->port, &group->dentry.attr);
1066 rb_erase(&group->node, &group->demux->mcg_table);
1067 kfree(group);
1068}
1069
1070static void _mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1071{
1072 int i;
1073 struct rb_node *p;
1074 struct mcast_group *group;
1075 unsigned long end;
1076 int count;
1077
1078 for (i = 0; i < MAX_VFS; ++i)
1079 clean_vf_mcast(ctx, i);
1080
1081 end = jiffies + msecs_to_jiffies(MAD_TIMEOUT_MS + 3000);
1082 do {
1083 count = 0;
1084 mutex_lock(&ctx->mcg_table_lock);
1085 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p))
1086 ++count;
1087 mutex_unlock(&ctx->mcg_table_lock);
1088 if (!count)
1089 break;
1090
1091 msleep(1);
1092 } while (time_after(end, jiffies));
1093
1094 flush_workqueue(ctx->mcg_wq);
1095 if (destroy_wq)
1096 destroy_workqueue(ctx->mcg_wq);
1097
1098 mutex_lock(&ctx->mcg_table_lock);
1099 while ((p = rb_first(&ctx->mcg_table)) != NULL) {
1100 group = rb_entry(p, struct mcast_group, node);
1101 if (atomic_read(&group->refcount))
1102 mcg_warn_group(group, "group refcount %d!!! (pointer %p)\n", atomic_read(&group->refcount), group);
1103
1104 force_clean_group(group);
1105 }
1106 mutex_unlock(&ctx->mcg_table_lock);
1107}
1108
1109struct clean_work {
1110 struct work_struct work;
1111 struct mlx4_ib_demux_ctx *ctx;
1112 int destroy_wq;
1113};
1114
1115static void mcg_clean_task(struct work_struct *work)
1116{
1117 struct clean_work *cw = container_of(work, struct clean_work, work);
1118
1119 _mlx4_ib_mcg_port_cleanup(cw->ctx, cw->destroy_wq);
1120 cw->ctx->flushing = 0;
1121 kfree(cw);
1122}
1123
1124void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq)
1125{
1126 struct clean_work *work;
1127
1128 if (ctx->flushing)
1129 return;
1130
1131 ctx->flushing = 1;
1132
1133 if (destroy_wq) {
1134 _mlx4_ib_mcg_port_cleanup(ctx, destroy_wq);
1135 ctx->flushing = 0;
1136 return;
1137 }
1138
1139 work = kmalloc(sizeof *work, GFP_KERNEL);
1140 if (!work) {
1141 ctx->flushing = 0;
1142 mcg_warn("failed allocating work for cleanup\n");
1143 return;
1144 }
1145
1146 work->ctx = ctx;
1147 work->destroy_wq = destroy_wq;
1148 INIT_WORK(&work->work, mcg_clean_task);
1149 queue_work(clean_wq, &work->work);
1150}
1151
1152static void build_leave_mad(struct mcast_req *req)
1153{
1154 struct ib_sa_mad *mad = &req->sa_mad;
1155
1156 mad->mad_hdr.method = IB_SA_METHOD_DELETE;
1157}
1158
1159
1160static void clear_pending_reqs(struct mcast_group *group, int vf)
1161{
1162 struct mcast_req *req, *tmp, *group_first = NULL;
1163 int clear;
1164 int pend = 0;
1165
1166 if (!list_empty(&group->pending_list))
1167 group_first = list_first_entry(&group->pending_list, struct mcast_req, group_list);
1168
1169 list_for_each_entry_safe(req, tmp, &group->func[vf].pending, func_list) {
1170 clear = 1;
1171 if (group_first == req &&
1172 (group->state == MCAST_JOIN_SENT ||
1173 group->state == MCAST_LEAVE_SENT)) {
1174 clear = cancel_delayed_work(&group->timeout_work);
1175 pend = !clear;
1176 group->state = MCAST_IDLE;
1177 }
1178 if (clear) {
1179 --group->func[vf].num_pend_reqs;
1180 list_del(&req->group_list);
1181 list_del(&req->func_list);
1182 kfree(req);
1183 atomic_dec(&group->refcount);
1184 }
1185 }
1186
1187 if (!pend && (!list_empty(&group->func[vf].pending) || group->func[vf].num_pend_reqs)) {
1188 mcg_warn_group(group, "DRIVER BUG: list_empty %d, num_pend_reqs %d\n",
1189 list_empty(&group->func[vf].pending), group->func[vf].num_pend_reqs);
1190 }
1191}
1192
1193static int push_deleteing_req(struct mcast_group *group, int slave)
1194{
1195 struct mcast_req *req;
1196 struct mcast_req *pend_req;
1197
1198 if (!group->func[slave].join_state)
1199 return 0;
1200
1201 req = kzalloc(sizeof *req, GFP_KERNEL);
1202 if (!req) {
1203 mcg_warn_group(group, "failed allocation - may leave stall groups\n");
1204 return -ENOMEM;
1205 }
1206
1207 if (!list_empty(&group->func[slave].pending)) {
1208 pend_req = list_entry(group->func[slave].pending.prev, struct mcast_req, group_list);
1209 if (pend_req->clean) {
1210 kfree(req);
1211 return 0;
1212 }
1213 }
1214
1215 req->clean = 1;
1216 req->func = slave;
1217 req->group = group;
1218 ++group->func[slave].num_pend_reqs;
1219 build_leave_mad(req);
1220 queue_req(req);
1221 return 0;
1222}
1223
1224void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave)
1225{
1226 struct mcast_group *group;
1227 struct rb_node *p;
1228
1229 mutex_lock(&ctx->mcg_table_lock);
1230 for (p = rb_first(&ctx->mcg_table); p; p = rb_next(p)) {
1231 group = rb_entry(p, struct mcast_group, node);
1232 mutex_lock(&group->lock);
1233 if (atomic_read(&group->refcount)) {
1234 /* clear pending requests of this VF */
1235 clear_pending_reqs(group, slave);
1236 push_deleteing_req(group, slave);
1237 }
1238 mutex_unlock(&group->lock);
1239 }
1240 mutex_unlock(&ctx->mcg_table_lock);
1241}
1242
1243
1244int mlx4_ib_mcg_init(void)
1245{
1246 clean_wq = create_singlethread_workqueue("mlx4_ib_mcg");
1247 if (!clean_wq)
1248 return -ENOMEM;
1249
1250 return 0;
1251}
1252
1253void mlx4_ib_mcg_destroy(void)
1254{
1255 destroy_workqueue(clean_wq);
1256}
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index dcd845bc30f..e4bf2cff866 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -37,37 +37,13 @@
37#include <linux/compiler.h> 37#include <linux/compiler.h>
38#include <linux/list.h> 38#include <linux/list.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/idr.h>
41 40
42#include <rdma/ib_verbs.h> 41#include <rdma/ib_verbs.h>
43#include <rdma/ib_umem.h> 42#include <rdma/ib_umem.h>
44#include <rdma/ib_mad.h>
45#include <rdma/ib_sa.h>
46 43
47#include <linux/mlx4/device.h> 44#include <linux/mlx4/device.h>
48#include <linux/mlx4/doorbell.h> 45#include <linux/mlx4/doorbell.h>
49 46
50#define MLX4_IB_DRV_NAME "mlx4_ib"
51
52#ifdef pr_fmt
53#undef pr_fmt
54#endif
55#define pr_fmt(fmt) "<" MLX4_IB_DRV_NAME "> %s: " fmt, __func__
56
57#define mlx4_ib_warn(ibdev, format, arg...) \
58 dev_warn((ibdev)->dma_device, MLX4_IB_DRV_NAME ": " format, ## arg)
59
60enum {
61 MLX4_IB_SQ_MIN_WQE_SHIFT = 6,
62 MLX4_IB_MAX_HEADROOM = 2048
63};
64
65#define MLX4_IB_SQ_HEADROOM(shift) ((MLX4_IB_MAX_HEADROOM >> (shift)) + 1)
66#define MLX4_IB_SQ_MAX_SPARE (MLX4_IB_SQ_HEADROOM(MLX4_IB_SQ_MIN_WQE_SHIFT))
67
68/*module param to indicate if SM assigns the alias_GUID*/
69extern int mlx4_ib_sm_guid_assign;
70
71struct mlx4_ib_ucontext { 47struct mlx4_ib_ucontext {
72 struct ib_ucontext ibucontext; 48 struct ib_ucontext ibucontext;
73 struct mlx4_uar uar; 49 struct mlx4_uar uar;
@@ -80,17 +56,9 @@ struct mlx4_ib_pd {
80 u32 pdn; 56 u32 pdn;
81}; 57};
82 58
83struct mlx4_ib_xrcd {
84 struct ib_xrcd ibxrcd;
85 u32 xrcdn;
86 struct ib_pd *pd;
87 struct ib_cq *cq;
88};
89
90struct mlx4_ib_cq_buf { 59struct mlx4_ib_cq_buf {
91 struct mlx4_buf buf; 60 struct mlx4_buf buf;
92 struct mlx4_mtt mtt; 61 struct mlx4_mtt mtt;
93 int entry_size;
94}; 62};
95 63
96struct mlx4_ib_cq_resize { 64struct mlx4_ib_cq_resize {
@@ -140,10 +108,8 @@ struct mlx4_ib_wq {
140}; 108};
141 109
142enum mlx4_ib_qp_flags { 110enum mlx4_ib_qp_flags {
143 MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, 111 MLX4_IB_QP_LSO = 1 << 0,
144 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, 112 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
145 MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
146 MLX4_IB_SRIOV_SQP = 1 << 31,
147}; 113};
148 114
149struct mlx4_ib_gid_entry { 115struct mlx4_ib_gid_entry {
@@ -153,80 +119,6 @@ struct mlx4_ib_gid_entry {
153 u8 port; 119 u8 port;
154}; 120};
155 121
156enum mlx4_ib_qp_type {
157 /*
158 * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries
159 * here (and in that order) since the MAD layer uses them as
160 * indices into a 2-entry table.
161 */
162 MLX4_IB_QPT_SMI = IB_QPT_SMI,
163 MLX4_IB_QPT_GSI = IB_QPT_GSI,
164
165 MLX4_IB_QPT_RC = IB_QPT_RC,
166 MLX4_IB_QPT_UC = IB_QPT_UC,
167 MLX4_IB_QPT_UD = IB_QPT_UD,
168 MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6,
169 MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE,
170 MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET,
171 MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI,
172 MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT,
173
174 MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16,
175 MLX4_IB_QPT_PROXY_SMI = 1 << 17,
176 MLX4_IB_QPT_PROXY_GSI = 1 << 18,
177 MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19,
178 MLX4_IB_QPT_TUN_SMI = 1 << 20,
179 MLX4_IB_QPT_TUN_GSI = 1 << 21,
180};
181
182#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \
183 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \
184 MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI)
185
186enum mlx4_ib_mad_ifc_flags {
187 MLX4_MAD_IFC_IGNORE_MKEY = 1,
188 MLX4_MAD_IFC_IGNORE_BKEY = 2,
189 MLX4_MAD_IFC_IGNORE_KEYS = (MLX4_MAD_IFC_IGNORE_MKEY |
190 MLX4_MAD_IFC_IGNORE_BKEY),
191 MLX4_MAD_IFC_NET_VIEW = 4,
192};
193
194enum {
195 MLX4_NUM_TUNNEL_BUFS = 256,
196};
197
198struct mlx4_ib_tunnel_header {
199 struct mlx4_av av;
200 __be32 remote_qpn;
201 __be32 qkey;
202 __be16 vlan;
203 u8 mac[6];
204 __be16 pkey_index;
205 u8 reserved[6];
206};
207
208struct mlx4_ib_buf {
209 void *addr;
210 dma_addr_t map;
211};
212
213struct mlx4_rcv_tunnel_hdr {
214 __be32 flags_src_qp; /* flags[6:5] is defined for VLANs:
215 * 0x0 - no vlan was in the packet
216 * 0x01 - C-VLAN was in the packet */
217 u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */
218 u8 reserved;
219 __be16 pkey_index;
220 __be16 sl_vid;
221 __be16 slid_mac_47_32;
222 __be32 mac_31_0;
223};
224
225struct mlx4_ib_proxy_sqp_hdr {
226 struct ib_grh grh;
227 struct mlx4_rcv_tunnel_hdr tun;
228} __packed;
229
230struct mlx4_ib_qp { 122struct mlx4_ib_qp {
231 struct ib_qp ibqp; 123 struct ib_qp ibqp;
232 struct mlx4_qp mqp; 124 struct mlx4_qp mqp;
@@ -242,12 +134,10 @@ struct mlx4_ib_qp {
242 int sq_spare_wqes; 134 int sq_spare_wqes;
243 struct mlx4_ib_wq sq; 135 struct mlx4_ib_wq sq;
244 136
245 enum mlx4_ib_qp_type mlx4_ib_qp_type;
246 struct ib_umem *umem; 137 struct ib_umem *umem;
247 struct mlx4_mtt mtt; 138 struct mlx4_mtt mtt;
248 int buf_size; 139 int buf_size;
249 struct mutex mutex; 140 struct mutex mutex;
250 u16 xrcdn;
251 u32 flags; 141 u32 flags;
252 u8 port; 142 u8 port;
253 u8 alt_port; 143 u8 alt_port;
@@ -257,9 +147,6 @@ struct mlx4_ib_qp {
257 u8 state; 147 u8 state;
258 int mlx_type; 148 int mlx_type;
259 struct list_head gid_list; 149 struct list_head gid_list;
260 struct list_head steering_rules;
261 struct mlx4_ib_buf *sqp_proxy_rcv;
262
263}; 150};
264 151
265struct mlx4_ib_srq { 152struct mlx4_ib_srq {
@@ -282,138 +169,6 @@ struct mlx4_ib_ah {
282 union mlx4_ext_av av; 169 union mlx4_ext_av av;
283}; 170};
284 171
285/****************************************/
286/* alias guid support */
287/****************************************/
288#define NUM_PORT_ALIAS_GUID 2
289#define NUM_ALIAS_GUID_IN_REC 8
290#define NUM_ALIAS_GUID_REC_IN_PORT 16
291#define GUID_REC_SIZE 8
292#define NUM_ALIAS_GUID_PER_PORT 128
293#define MLX4_NOT_SET_GUID (0x00LL)
294#define MLX4_GUID_FOR_DELETE_VAL (~(0x00LL))
295
296enum mlx4_guid_alias_rec_status {
297 MLX4_GUID_INFO_STATUS_IDLE,
298 MLX4_GUID_INFO_STATUS_SET,
299 MLX4_GUID_INFO_STATUS_PENDING,
300};
301
302enum mlx4_guid_alias_rec_ownership {
303 MLX4_GUID_DRIVER_ASSIGN,
304 MLX4_GUID_SYSADMIN_ASSIGN,
305 MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
306};
307
308enum mlx4_guid_alias_rec_method {
309 MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
310 MLX4_GUID_INFO_RECORD_DELETE = IB_SA_METHOD_DELETE,
311};
312
313struct mlx4_sriov_alias_guid_info_rec_det {
314 u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
315 ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
316 enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
317 u8 method; /*set or delete*/
318 enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
319};
320
321struct mlx4_sriov_alias_guid_port_rec_det {
322 struct mlx4_sriov_alias_guid_info_rec_det all_rec_per_port[NUM_ALIAS_GUID_REC_IN_PORT];
323 struct workqueue_struct *wq;
324 struct delayed_work alias_guid_work;
325 u8 port;
326 struct mlx4_sriov_alias_guid *parent;
327 struct list_head cb_list;
328};
329
330struct mlx4_sriov_alias_guid {
331 struct mlx4_sriov_alias_guid_port_rec_det ports_guid[MLX4_MAX_PORTS];
332 spinlock_t ag_work_lock;
333 struct ib_sa_client *sa_client;
334};
335
336struct mlx4_ib_demux_work {
337 struct work_struct work;
338 struct mlx4_ib_dev *dev;
339 int slave;
340 int do_init;
341 u8 port;
342
343};
344
345struct mlx4_ib_tun_tx_buf {
346 struct mlx4_ib_buf buf;
347 struct ib_ah *ah;
348};
349
350struct mlx4_ib_demux_pv_qp {
351 struct ib_qp *qp;
352 enum ib_qp_type proxy_qpt;
353 struct mlx4_ib_buf *ring;
354 struct mlx4_ib_tun_tx_buf *tx_ring;
355 spinlock_t tx_lock;
356 unsigned tx_ix_head;
357 unsigned tx_ix_tail;
358};
359
360enum mlx4_ib_demux_pv_state {
361 DEMUX_PV_STATE_DOWN,
362 DEMUX_PV_STATE_STARTING,
363 DEMUX_PV_STATE_ACTIVE,
364 DEMUX_PV_STATE_DOWNING,
365};
366
367struct mlx4_ib_demux_pv_ctx {
368 int port;
369 int slave;
370 enum mlx4_ib_demux_pv_state state;
371 int has_smi;
372 struct ib_device *ib_dev;
373 struct ib_cq *cq;
374 struct ib_pd *pd;
375 struct ib_mr *mr;
376 struct work_struct work;
377 struct workqueue_struct *wq;
378 struct mlx4_ib_demux_pv_qp qp[2];
379};
380
381struct mlx4_ib_demux_ctx {
382 struct ib_device *ib_dev;
383 int port;
384 struct workqueue_struct *wq;
385 struct workqueue_struct *ud_wq;
386 spinlock_t ud_lock;
387 __be64 subnet_prefix;
388 __be64 guid_cache[128];
389 struct mlx4_ib_dev *dev;
390 /* the following lock protects both mcg_table and mcg_mgid0_list */
391 struct mutex mcg_table_lock;
392 struct rb_root mcg_table;
393 struct list_head mcg_mgid0_list;
394 struct workqueue_struct *mcg_wq;
395 struct mlx4_ib_demux_pv_ctx **tun;
396 atomic_t tid;
397 int flushing; /* flushing the work queue */
398};
399
400struct mlx4_ib_sriov {
401 struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS];
402 struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS];
403 /* when using this spinlock you should use "irq" because
404 * it may be called from interrupt context.*/
405 spinlock_t going_down_lock;
406 int is_going_down;
407
408 struct mlx4_sriov_alias_guid alias_guid;
409
410 /* CM paravirtualization fields */
411 struct list_head cm_list;
412 spinlock_t id_map_lock;
413 struct rb_root sl_id_map;
414 struct idr pv_id_table;
415};
416
417struct mlx4_ib_iboe { 172struct mlx4_ib_iboe {
418 spinlock_t lock; 173 spinlock_t lock;
419 struct net_device *netdevs[MLX4_MAX_PORTS]; 174 struct net_device *netdevs[MLX4_MAX_PORTS];
@@ -421,42 +176,6 @@ struct mlx4_ib_iboe {
421 union ib_gid gid_table[MLX4_MAX_PORTS][128]; 176 union ib_gid gid_table[MLX4_MAX_PORTS][128];
422}; 177};
423 178
424struct pkey_mgt {
425 u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
426 u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
427 struct list_head pkey_port_list[MLX4_MFUNC_MAX];
428 struct kobject *device_parent[MLX4_MFUNC_MAX];
429};
430
431struct mlx4_ib_iov_sysfs_attr {
432 void *ctx;
433 struct kobject *kobj;
434 unsigned long data;
435 u32 entry_num;
436 char name[15];
437 struct device_attribute dentry;
438 struct device *dev;
439};
440
441struct mlx4_ib_iov_sysfs_attr_ar {
442 struct mlx4_ib_iov_sysfs_attr dentries[3 * NUM_ALIAS_GUID_PER_PORT + 1];
443};
444
445struct mlx4_ib_iov_port {
446 char name[100];
447 u8 num;
448 struct mlx4_ib_dev *dev;
449 struct list_head list;
450 struct mlx4_ib_iov_sysfs_attr_ar *dentr_ar;
451 struct ib_port_attr attr;
452 struct kobject *cur_port;
453 struct kobject *admin_alias_parent;
454 struct kobject *gids_parent;
455 struct kobject *pkeys_parent;
456 struct kobject *mcgs_parent;
457 struct mlx4_ib_iov_sysfs_attr mcg_dentry;
458};
459
460struct mlx4_ib_dev { 179struct mlx4_ib_dev {
461 struct ib_device ib_dev; 180 struct ib_device ib_dev;
462 struct mlx4_dev *dev; 181 struct mlx4_dev *dev;
@@ -470,32 +189,11 @@ struct mlx4_ib_dev {
470 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; 189 struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2];
471 struct ib_ah *sm_ah[MLX4_MAX_PORTS]; 190 struct ib_ah *sm_ah[MLX4_MAX_PORTS];
472 spinlock_t sm_lock; 191 spinlock_t sm_lock;
473 struct mlx4_ib_sriov sriov;
474 192
475 struct mutex cap_mask_mutex; 193 struct mutex cap_mask_mutex;
476 bool ib_active; 194 bool ib_active;
477 struct mlx4_ib_iboe iboe; 195 struct mlx4_ib_iboe iboe;
478 int counters[MLX4_MAX_PORTS]; 196 int counters[MLX4_MAX_PORTS];
479 int *eq_table;
480 int eq_added;
481 struct kobject *iov_parent;
482 struct kobject *ports_parent;
483 struct kobject *dev_ports_parent[MLX4_MFUNC_MAX];
484 struct mlx4_ib_iov_port iov_ports[MLX4_MAX_PORTS];
485 struct pkey_mgt pkeys;
486};
487
488struct ib_event_work {
489 struct work_struct work;
490 struct mlx4_ib_dev *ib_dev;
491 struct mlx4_eqe ib_eqe;
492};
493
494struct mlx4_ib_qp_tunnel_init_attr {
495 struct ib_qp_init_attr init_attr;
496 int slave;
497 enum ib_qp_type proxy_qp_type;
498 u8 port;
499}; 197};
500 198
501static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) 199static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev)
@@ -513,11 +211,6 @@ static inline struct mlx4_ib_pd *to_mpd(struct ib_pd *ibpd)
513 return container_of(ibpd, struct mlx4_ib_pd, ibpd); 211 return container_of(ibpd, struct mlx4_ib_pd, ibpd);
514} 212}
515 213
516static inline struct mlx4_ib_xrcd *to_mxrcd(struct ib_xrcd *ibxrcd)
517{
518 return container_of(ibxrcd, struct mlx4_ib_xrcd, ibxrcd);
519}
520
521static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq) 214static inline struct mlx4_ib_cq *to_mcq(struct ib_cq *ibcq)
522{ 215{
523 return container_of(ibcq, struct mlx4_ib_cq, ibcq); 216 return container_of(ibcq, struct mlx4_ib_cq, ibcq);
@@ -567,9 +260,6 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah)
567 return container_of(ibah, struct mlx4_ib_ah, ibah); 260 return container_of(ibah, struct mlx4_ib_ah, ibah);
568} 261}
569 262
570int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev);
571void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev);
572
573int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, 263int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt,
574 struct mlx4_db *db); 264 struct mlx4_db *db);
575void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); 265void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db);
@@ -626,7 +316,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
626int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, 316int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
627 struct ib_recv_wr **bad_wr); 317 struct ib_recv_wr **bad_wr);
628 318
629int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags, 319int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
630 int port, struct ib_wc *in_wc, struct ib_grh *in_grh, 320 int port, struct ib_wc *in_wc, struct ib_grh *in_grh,
631 void *in_mad, void *response_mad); 321 void *in_mad, void *response_mad);
632int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, 322int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -641,13 +331,6 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, int npages,
641 u64 iova); 331 u64 iova);
642int mlx4_ib_unmap_fmr(struct list_head *fmr_list); 332int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
643int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr); 333int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
644int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
645 struct ib_port_attr *props, int netw_view);
646int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
647 u16 *pkey, int netw_view);
648
649int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
650 union ib_gid *gid, int netw_view);
651 334
652int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr, 335int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
653 u8 *mac, int *is_mcast, u8 port); 336 u8 *mac, int *is_mcast, u8 port);
@@ -662,69 +345,7 @@ static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
662 return !!(ah->av.ib.g_slid & 0x80); 345 return !!(ah->av.ib.g_slid & 0x80);
663} 346}
664 347
665int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx);
666void mlx4_ib_mcg_port_cleanup(struct mlx4_ib_demux_ctx *ctx, int destroy_wq);
667void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave);
668int mlx4_ib_mcg_init(void);
669void mlx4_ib_mcg_destroy(void);
670
671int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid);
672
673int mlx4_ib_mcg_multiplex_handler(struct ib_device *ibdev, int port, int slave,
674 struct ib_sa_mad *sa_mad);
675int mlx4_ib_mcg_demux_handler(struct ib_device *ibdev, int port, int slave,
676 struct ib_sa_mad *mad);
677
678int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 348int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
679 union ib_gid *gid); 349 union ib_gid *gid);
680 350
681void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
682 enum ib_event_type type);
683
684void mlx4_ib_tunnels_update_work(struct work_struct *work);
685
686int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
687 enum ib_qp_type qpt, struct ib_wc *wc,
688 struct ib_grh *grh, struct ib_mad *mad);
689int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
690 enum ib_qp_type dest_qpt, u16 pkey_index, u32 remote_qpn,
691 u32 qkey, struct ib_ah_attr *attr, struct ib_mad *mad);
692__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx);
693
694int mlx4_ib_demux_cm_handler(struct ib_device *ibdev, int port, int *slave,
695 struct ib_mad *mad);
696
697int mlx4_ib_multiplex_cm_handler(struct ib_device *ibdev, int port, int slave_id,
698 struct ib_mad *mad);
699
700void mlx4_ib_cm_paravirt_init(struct mlx4_ib_dev *dev);
701void mlx4_ib_cm_paravirt_clean(struct mlx4_ib_dev *dev, int slave_id);
702
703/* alias guid support */
704void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port);
705int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev);
706void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev);
707void mlx4_ib_invalidate_all_guid_record(struct mlx4_ib_dev *dev, int port);
708
709void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
710 int block_num,
711 u8 port_num, u8 *p_data);
712
713void mlx4_ib_update_cache_on_guid_change(struct mlx4_ib_dev *dev,
714 int block_num, u8 port_num,
715 u8 *p_data);
716
717int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
718 struct attribute *attr);
719void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
720 struct attribute *attr);
721ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
722
723int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
724
725void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
726
727__be64 mlx4_ib_gen_node_guid(void);
728
729
730#endif /* MLX4_IB_H */ 351#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index bbaf6176f20..dca55b19a6f 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -338,7 +338,7 @@ int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
338 338
339 err = mlx4_SYNC_TPT(mdev); 339 err = mlx4_SYNC_TPT(mdev);
340 if (err) 340 if (err)
341 pr_warn("SYNC_TPT error %d when " 341 printk(KERN_WARNING "mlx4_ib: SYNC_TPT error %d when "
342 "unmapping FMRs\n", err); 342 "unmapping FMRs\n", err);
343 343
344 return 0; 344 return 0;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 19e0637220b..5c22514b871 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -38,7 +38,6 @@
38#include <rdma/ib_cache.h> 38#include <rdma/ib_cache.h>
39#include <rdma/ib_pack.h> 39#include <rdma/ib_pack.h>
40#include <rdma/ib_addr.h> 40#include <rdma/ib_addr.h>
41#include <rdma/ib_mad.h>
42 41
43#include <linux/mlx4/qp.h> 42#include <linux/mlx4/qp.h>
44 43
@@ -85,11 +84,6 @@ enum {
85 MLX4_IB_CACHE_LINE_SIZE = 64, 84 MLX4_IB_CACHE_LINE_SIZE = 64,
86}; 85};
87 86
88enum {
89 MLX4_RAW_QP_MTU = 7,
90 MLX4_RAW_QP_MSGMAX = 31,
91};
92
93static const __be32 mlx4_ib_opcode[] = { 87static const __be32 mlx4_ib_opcode[] = {
94 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND), 88 [IB_WR_SEND] = cpu_to_be32(MLX4_OPCODE_SEND),
95 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO), 89 [IB_WR_LSO] = cpu_to_be32(MLX4_OPCODE_LSO),
@@ -111,62 +105,16 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
111 return container_of(mqp, struct mlx4_ib_sqp, qp); 105 return container_of(mqp, struct mlx4_ib_sqp, qp);
112} 106}
113 107
114static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
115{
116 if (!mlx4_is_master(dev->dev))
117 return 0;
118
119 return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn &&
120 qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn +
121 8 * MLX4_MFUNC_MAX;
122}
123
124static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 108static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
125{ 109{
126 int proxy_sqp = 0; 110 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
127 int real_sqp = 0; 111 qp->mqp.qpn <= dev->dev->caps.sqp_start + 3;
128 int i;
129 /* PPF or Native -- real SQP */
130 real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
131 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
132 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3);
133 if (real_sqp)
134 return 1;
135 /* VF or PF -- proxy SQP */
136 if (mlx4_is_mfunc(dev->dev)) {
137 for (i = 0; i < dev->dev->caps.num_ports; i++) {
138 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] ||
139 qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) {
140 proxy_sqp = 1;
141 break;
142 }
143 }
144 }
145 return proxy_sqp;
146} 112}
147 113
148/* used for INIT/CLOSE port logic */
149static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) 114static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
150{ 115{
151 int proxy_qp0 = 0; 116 return qp->mqp.qpn >= dev->dev->caps.sqp_start &&
152 int real_qp0 = 0; 117 qp->mqp.qpn <= dev->dev->caps.sqp_start + 1;
153 int i;
154 /* PPF or Native -- real QP0 */
155 real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) &&
156 qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn &&
157 qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1);
158 if (real_qp0)
159 return 1;
160 /* VF or PF -- proxy QP0 */
161 if (mlx4_is_mfunc(dev->dev)) {
162 for (i = 0; i < dev->dev->caps.num_ports; i++) {
163 if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) {
164 proxy_qp0 = 1;
165 break;
166 }
167 }
168 }
169 return proxy_qp0;
170} 118}
171 119
172static void *get_wqe(struct mlx4_ib_qp *qp, int offset) 120static void *get_wqe(struct mlx4_ib_qp *qp, int offset)
@@ -308,7 +256,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
308 event.event = IB_EVENT_QP_ACCESS_ERR; 256 event.event = IB_EVENT_QP_ACCESS_ERR;
309 break; 257 break;
310 default: 258 default:
311 pr_warn("Unexpected event type %d " 259 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
312 "on QP %06x\n", type, qp->qpn); 260 "on QP %06x\n", type, qp->qpn);
313 return; 261 return;
314 } 262 }
@@ -317,7 +265,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
317 } 265 }
318} 266}
319 267
320static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) 268static int send_wqe_overhead(enum ib_qp_type type, u32 flags)
321{ 269{
322 /* 270 /*
323 * UD WQEs must have a datagram segment. 271 * UD WQEs must have a datagram segment.
@@ -326,29 +274,19 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
326 * header and space for the ICRC). 274 * header and space for the ICRC).
327 */ 275 */
328 switch (type) { 276 switch (type) {
329 case MLX4_IB_QPT_UD: 277 case IB_QPT_UD:
330 return sizeof (struct mlx4_wqe_ctrl_seg) + 278 return sizeof (struct mlx4_wqe_ctrl_seg) +
331 sizeof (struct mlx4_wqe_datagram_seg) + 279 sizeof (struct mlx4_wqe_datagram_seg) +
332 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); 280 ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0);
333 case MLX4_IB_QPT_PROXY_SMI_OWNER: 281 case IB_QPT_UC:
334 case MLX4_IB_QPT_PROXY_SMI:
335 case MLX4_IB_QPT_PROXY_GSI:
336 return sizeof (struct mlx4_wqe_ctrl_seg) +
337 sizeof (struct mlx4_wqe_datagram_seg) + 64;
338 case MLX4_IB_QPT_TUN_SMI_OWNER:
339 case MLX4_IB_QPT_TUN_GSI:
340 return sizeof (struct mlx4_wqe_ctrl_seg) +
341 sizeof (struct mlx4_wqe_datagram_seg);
342
343 case MLX4_IB_QPT_UC:
344 return sizeof (struct mlx4_wqe_ctrl_seg) + 282 return sizeof (struct mlx4_wqe_ctrl_seg) +
345 sizeof (struct mlx4_wqe_raddr_seg); 283 sizeof (struct mlx4_wqe_raddr_seg);
346 case MLX4_IB_QPT_RC: 284 case IB_QPT_RC:
347 return sizeof (struct mlx4_wqe_ctrl_seg) + 285 return sizeof (struct mlx4_wqe_ctrl_seg) +
348 sizeof (struct mlx4_wqe_atomic_seg) + 286 sizeof (struct mlx4_wqe_atomic_seg) +
349 sizeof (struct mlx4_wqe_raddr_seg); 287 sizeof (struct mlx4_wqe_raddr_seg);
350 case MLX4_IB_QPT_SMI: 288 case IB_QPT_SMI:
351 case MLX4_IB_QPT_GSI: 289 case IB_QPT_GSI:
352 return sizeof (struct mlx4_wqe_ctrl_seg) + 290 return sizeof (struct mlx4_wqe_ctrl_seg) +
353 ALIGN(MLX4_IB_UD_HEADER_SIZE + 291 ALIGN(MLX4_IB_UD_HEADER_SIZE +
354 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, 292 DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE,
@@ -364,14 +302,15 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
364} 302}
365 303
366static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 304static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
367 int is_user, int has_rq, struct mlx4_ib_qp *qp) 305 int is_user, int has_srq, struct mlx4_ib_qp *qp)
368{ 306{
369 /* Sanity check RQ size before proceeding */ 307 /* Sanity check RQ size before proceeding */
370 if (cap->max_recv_wr > dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE || 308 if (cap->max_recv_wr > dev->dev->caps.max_wqes ||
371 cap->max_recv_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg)) 309 cap->max_recv_sge > dev->dev->caps.max_rq_sg)
372 return -EINVAL; 310 return -EINVAL;
373 311
374 if (!has_rq) { 312 if (has_srq) {
313 /* QPs attached to an SRQ should have no RQ */
375 if (cap->max_recv_wr) 314 if (cap->max_recv_wr)
376 return -EINVAL; 315 return -EINVAL;
377 316
@@ -386,29 +325,20 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
386 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg)); 325 qp->rq.wqe_shift = ilog2(qp->rq.max_gs * sizeof (struct mlx4_wqe_data_seg));
387 } 326 }
388 327
389 /* leave userspace return values as they were, so as not to break ABI */ 328 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
390 if (is_user) { 329 cap->max_recv_sge = qp->rq.max_gs;
391 cap->max_recv_wr = qp->rq.max_post = qp->rq.wqe_cnt;
392 cap->max_recv_sge = qp->rq.max_gs;
393 } else {
394 cap->max_recv_wr = qp->rq.max_post =
395 min(dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE, qp->rq.wqe_cnt);
396 cap->max_recv_sge = min(qp->rq.max_gs,
397 min(dev->dev->caps.max_sq_sg,
398 dev->dev->caps.max_rq_sg));
399 }
400 330
401 return 0; 331 return 0;
402} 332}
403 333
404static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, 334static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
405 enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) 335 enum ib_qp_type type, struct mlx4_ib_qp *qp)
406{ 336{
407 int s; 337 int s;
408 338
409 /* Sanity check SQ size before proceeding */ 339 /* Sanity check SQ size before proceeding */
410 if (cap->max_send_wr > (dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE) || 340 if (cap->max_send_wr > dev->dev->caps.max_wqes ||
411 cap->max_send_sge > min(dev->dev->caps.max_sq_sg, dev->dev->caps.max_rq_sg) || 341 cap->max_send_sge > dev->dev->caps.max_sq_sg ||
412 cap->max_inline_data + send_wqe_overhead(type, qp->flags) + 342 cap->max_inline_data + send_wqe_overhead(type, qp->flags) +
413 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz) 343 sizeof (struct mlx4_wqe_inline_seg) > dev->dev->caps.max_sq_desc_sz)
414 return -EINVAL; 344 return -EINVAL;
@@ -417,8 +347,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
417 * For MLX transport we need 2 extra S/G entries: 347 * For MLX transport we need 2 extra S/G entries:
418 * one for the header and one for the checksum at the end 348 * one for the header and one for the checksum at the end
419 */ 349 */
420 if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || 350 if ((type == IB_QPT_SMI || type == IB_QPT_GSI) &&
421 type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) &&
422 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) 351 cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg)
423 return -EINVAL; 352 return -EINVAL;
424 353
@@ -462,9 +391,7 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap,
462 */ 391 */
463 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && 392 if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC &&
464 qp->sq_signal_bits && BITS_PER_LONG == 64 && 393 qp->sq_signal_bits && BITS_PER_LONG == 64 &&
465 type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && 394 type != IB_QPT_SMI && type != IB_QPT_GSI)
466 !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI |
467 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER)))
468 qp->sq.wqe_shift = ilog2(64); 395 qp->sq.wqe_shift = ilog2(64);
469 else 396 else
470 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); 397 qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s));
@@ -536,137 +463,23 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev,
536 return 0; 463 return 0;
537} 464}
538 465
539static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
540{
541 int i;
542
543 qp->sqp_proxy_rcv =
544 kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt,
545 GFP_KERNEL);
546 if (!qp->sqp_proxy_rcv)
547 return -ENOMEM;
548 for (i = 0; i < qp->rq.wqe_cnt; i++) {
549 qp->sqp_proxy_rcv[i].addr =
550 kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr),
551 GFP_KERNEL);
552 if (!qp->sqp_proxy_rcv[i].addr)
553 goto err;
554 qp->sqp_proxy_rcv[i].map =
555 ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
556 sizeof (struct mlx4_ib_proxy_sqp_hdr),
557 DMA_FROM_DEVICE);
558 }
559 return 0;
560
561err:
562 while (i > 0) {
563 --i;
564 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
565 sizeof (struct mlx4_ib_proxy_sqp_hdr),
566 DMA_FROM_DEVICE);
567 kfree(qp->sqp_proxy_rcv[i].addr);
568 }
569 kfree(qp->sqp_proxy_rcv);
570 qp->sqp_proxy_rcv = NULL;
571 return -ENOMEM;
572}
573
574static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
575{
576 int i;
577
578 for (i = 0; i < qp->rq.wqe_cnt; i++) {
579 ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map,
580 sizeof (struct mlx4_ib_proxy_sqp_hdr),
581 DMA_FROM_DEVICE);
582 kfree(qp->sqp_proxy_rcv[i].addr);
583 }
584 kfree(qp->sqp_proxy_rcv);
585}
586
587static int qp_has_rq(struct ib_qp_init_attr *attr)
588{
589 if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT)
590 return 0;
591
592 return !attr->srq;
593}
594
595static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, 466static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
596 struct ib_qp_init_attr *init_attr, 467 struct ib_qp_init_attr *init_attr,
597 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) 468 struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp)
598{ 469{
599 int qpn; 470 int qpn;
600 int err; 471 int err;
601 struct mlx4_ib_sqp *sqp;
602 struct mlx4_ib_qp *qp;
603 enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
604
605 /* When tunneling special qps, we use a plain UD qp */
606 if (sqpn) {
607 if (mlx4_is_mfunc(dev->dev) &&
608 (!mlx4_is_master(dev->dev) ||
609 !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) {
610 if (init_attr->qp_type == IB_QPT_GSI)
611 qp_type = MLX4_IB_QPT_PROXY_GSI;
612 else if (mlx4_is_master(dev->dev))
613 qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER;
614 else
615 qp_type = MLX4_IB_QPT_PROXY_SMI;
616 }
617 qpn = sqpn;
618 /* add extra sg entry for tunneling */
619 init_attr->cap.max_recv_sge++;
620 } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) {
621 struct mlx4_ib_qp_tunnel_init_attr *tnl_init =
622 container_of(init_attr,
623 struct mlx4_ib_qp_tunnel_init_attr, init_attr);
624 if ((tnl_init->proxy_qp_type != IB_QPT_SMI &&
625 tnl_init->proxy_qp_type != IB_QPT_GSI) ||
626 !mlx4_is_master(dev->dev))
627 return -EINVAL;
628 if (tnl_init->proxy_qp_type == IB_QPT_GSI)
629 qp_type = MLX4_IB_QPT_TUN_GSI;
630 else if (tnl_init->slave == mlx4_master_func_num(dev->dev))
631 qp_type = MLX4_IB_QPT_TUN_SMI_OWNER;
632 else
633 qp_type = MLX4_IB_QPT_TUN_SMI;
634 /* we are definitely in the PPF here, since we are creating
635 * tunnel QPs. base_tunnel_sqpn is therefore valid. */
636 qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave
637 + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1;
638 sqpn = qpn;
639 }
640
641 if (!*caller_qp) {
642 if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI ||
643 (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER |
644 MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) {
645 sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL);
646 if (!sqp)
647 return -ENOMEM;
648 qp = &sqp->qp;
649 } else {
650 qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL);
651 if (!qp)
652 return -ENOMEM;
653 }
654 } else
655 qp = *caller_qp;
656
657 qp->mlx4_ib_qp_type = qp_type;
658 472
659 mutex_init(&qp->mutex); 473 mutex_init(&qp->mutex);
660 spin_lock_init(&qp->sq.lock); 474 spin_lock_init(&qp->sq.lock);
661 spin_lock_init(&qp->rq.lock); 475 spin_lock_init(&qp->rq.lock);
662 INIT_LIST_HEAD(&qp->gid_list); 476 INIT_LIST_HEAD(&qp->gid_list);
663 INIT_LIST_HEAD(&qp->steering_rules);
664 477
665 qp->state = IB_QPS_RESET; 478 qp->state = IB_QPS_RESET;
666 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) 479 if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
667 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); 480 qp->sq_signal_bits = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
668 481
669 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, qp_has_rq(init_attr), qp); 482 err = set_rq_size(dev, &init_attr->cap, !!pd->uobject, !!init_attr->srq, qp);
670 if (err) 483 if (err)
671 goto err; 484 goto err;
672 485
@@ -700,7 +513,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
700 if (err) 513 if (err)
701 goto err_mtt; 514 goto err_mtt;
702 515
703 if (qp_has_rq(init_attr)) { 516 if (!init_attr->srq) {
704 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context), 517 err = mlx4_ib_db_map_user(to_mucontext(pd->uobject->context),
705 ucmd.db_addr, &qp->db); 518 ucmd.db_addr, &qp->db);
706 if (err) 519 if (err)
@@ -715,11 +528,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
715 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) 528 if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
716 qp->flags |= MLX4_IB_QP_LSO; 529 qp->flags |= MLX4_IB_QP_LSO;
717 530
718 err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); 531 err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp);
719 if (err) 532 if (err)
720 goto err; 533 goto err;
721 534
722 if (qp_has_rq(init_attr)) { 535 if (!init_attr->srq) {
723 err = mlx4_db_alloc(dev->dev, &qp->db, 0); 536 err = mlx4_db_alloc(dev->dev, &qp->db, 0);
724 if (err) 537 if (err)
725 goto err; 538 goto err;
@@ -751,31 +564,17 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
751 } 564 }
752 565
753 if (sqpn) { 566 if (sqpn) {
754 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | 567 qpn = sqpn;
755 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
756 if (alloc_proxy_bufs(pd->device, qp)) {
757 err = -ENOMEM;
758 goto err_wrid;
759 }
760 }
761 } else { 568 } else {
762 /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE 569 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
763 * BlueFlame setup flow wrongly causes VLAN insertion. */
764 if (init_attr->qp_type == IB_QPT_RAW_PACKET)
765 err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
766 else
767 err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
768 if (err) 570 if (err)
769 goto err_proxy; 571 goto err_wrid;
770 } 572 }
771 573
772 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); 574 err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp);
773 if (err) 575 if (err)
774 goto err_qpn; 576 goto err_qpn;
775 577
776 if (init_attr->qp_type == IB_QPT_XRC_TGT)
777 qp->mqp.qpn |= (1 << 23);
778
779 /* 578 /*
780 * Hardware wants QPN written in big-endian order (after 579 * Hardware wants QPN written in big-endian order (after
781 * shifting) for send doorbell. Precompute this value to save 580 * shifting) for send doorbell. Precompute this value to save
@@ -784,20 +583,18 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
784 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); 583 qp->doorbell_qpn = swab32(qp->mqp.qpn << 8);
785 584
786 qp->mqp.event = mlx4_ib_qp_event; 585 qp->mqp.event = mlx4_ib_qp_event;
787 if (!*caller_qp) 586
788 *caller_qp = qp;
789 return 0; 587 return 0;
790 588
791err_qpn: 589err_qpn:
792 if (!sqpn) 590 if (!sqpn)
793 mlx4_qp_release_range(dev->dev, qpn, 1); 591 mlx4_qp_release_range(dev->dev, qpn, 1);
794err_proxy: 592
795 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
796 free_proxy_bufs(pd->device, qp);
797err_wrid: 593err_wrid:
798 if (pd->uobject) { 594 if (pd->uobject) {
799 if (qp_has_rq(init_attr)) 595 if (!init_attr->srq)
800 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context), &qp->db); 596 mlx4_ib_db_unmap_user(to_mucontext(pd->uobject->context),
597 &qp->db);
801 } else { 598 } else {
802 kfree(qp->sq.wrid); 599 kfree(qp->sq.wrid);
803 kfree(qp->rq.wrid); 600 kfree(qp->rq.wrid);
@@ -813,12 +610,10 @@ err_buf:
813 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 610 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
814 611
815err_db: 612err_db:
816 if (!pd->uobject && qp_has_rq(init_attr)) 613 if (!pd->uobject && !init_attr->srq)
817 mlx4_db_free(dev->dev, &qp->db); 614 mlx4_db_free(dev->dev, &qp->db);
818 615
819err: 616err:
820 if (!*caller_qp)
821 kfree(qp);
822 return err; 617 return err;
823} 618}
824 619
@@ -876,33 +671,6 @@ static void del_gid_entries(struct mlx4_ib_qp *qp)
876 } 671 }
877} 672}
878 673
879static struct mlx4_ib_pd *get_pd(struct mlx4_ib_qp *qp)
880{
881 if (qp->ibqp.qp_type == IB_QPT_XRC_TGT)
882 return to_mpd(to_mxrcd(qp->ibqp.xrcd)->pd);
883 else
884 return to_mpd(qp->ibqp.pd);
885}
886
887static void get_cqs(struct mlx4_ib_qp *qp,
888 struct mlx4_ib_cq **send_cq, struct mlx4_ib_cq **recv_cq)
889{
890 switch (qp->ibqp.qp_type) {
891 case IB_QPT_XRC_TGT:
892 *send_cq = to_mcq(to_mxrcd(qp->ibqp.xrcd)->cq);
893 *recv_cq = *send_cq;
894 break;
895 case IB_QPT_XRC_INI:
896 *send_cq = to_mcq(qp->ibqp.send_cq);
897 *recv_cq = *send_cq;
898 break;
899 default:
900 *send_cq = to_mcq(qp->ibqp.send_cq);
901 *recv_cq = to_mcq(qp->ibqp.recv_cq);
902 break;
903 }
904}
905
906static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, 674static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
907 int is_user) 675 int is_user)
908{ 676{
@@ -911,10 +679,11 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
911 if (qp->state != IB_QPS_RESET) 679 if (qp->state != IB_QPS_RESET)
912 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state), 680 if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
913 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp)) 681 MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
914 pr_warn("modify QP %06x to RESET failed.\n", 682 printk(KERN_WARNING "mlx4_ib: modify QP %06x to RESET failed.\n",
915 qp->mqp.qpn); 683 qp->mqp.qpn);
916 684
917 get_cqs(qp, &send_cq, &recv_cq); 685 send_cq = to_mcq(qp->ibqp.send_cq);
686 recv_cq = to_mcq(qp->ibqp.recv_cq);
918 687
919 mlx4_ib_lock_cqs(send_cq, recv_cq); 688 mlx4_ib_lock_cqs(send_cq, recv_cq);
920 689
@@ -931,99 +700,64 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
931 700
932 mlx4_qp_free(dev->dev, &qp->mqp); 701 mlx4_qp_free(dev->dev, &qp->mqp);
933 702
934 if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) 703 if (!is_sqp(dev, qp))
935 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); 704 mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
936 705
937 mlx4_mtt_cleanup(dev->dev, &qp->mtt); 706 mlx4_mtt_cleanup(dev->dev, &qp->mtt);
938 707
939 if (is_user) { 708 if (is_user) {
940 if (qp->rq.wqe_cnt) 709 if (!qp->ibqp.srq)
941 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context), 710 mlx4_ib_db_unmap_user(to_mucontext(qp->ibqp.uobject->context),
942 &qp->db); 711 &qp->db);
943 ib_umem_release(qp->umem); 712 ib_umem_release(qp->umem);
944 } else { 713 } else {
945 kfree(qp->sq.wrid); 714 kfree(qp->sq.wrid);
946 kfree(qp->rq.wrid); 715 kfree(qp->rq.wrid);
947 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
948 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI))
949 free_proxy_bufs(&dev->ib_dev, qp);
950 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); 716 mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf);
951 if (qp->rq.wqe_cnt) 717 if (!qp->ibqp.srq)
952 mlx4_db_free(dev->dev, &qp->db); 718 mlx4_db_free(dev->dev, &qp->db);
953 } 719 }
954 720
955 del_gid_entries(qp); 721 del_gid_entries(qp);
956} 722}
957 723
958static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr)
959{
960 /* Native or PPF */
961 if (!mlx4_is_mfunc(dev->dev) ||
962 (mlx4_is_master(dev->dev) &&
963 attr->create_flags & MLX4_IB_SRIOV_SQP)) {
964 return dev->dev->phys_caps.base_sqpn +
965 (attr->qp_type == IB_QPT_SMI ? 0 : 2) +
966 attr->port_num - 1;
967 }
968 /* PF or VF -- creating proxies */
969 if (attr->qp_type == IB_QPT_SMI)
970 return dev->dev->caps.qp0_proxy[attr->port_num - 1];
971 else
972 return dev->dev->caps.qp1_proxy[attr->port_num - 1];
973}
974
975struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, 724struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
976 struct ib_qp_init_attr *init_attr, 725 struct ib_qp_init_attr *init_attr,
977 struct ib_udata *udata) 726 struct ib_udata *udata)
978{ 727{
979 struct mlx4_ib_qp *qp = NULL; 728 struct mlx4_ib_dev *dev = to_mdev(pd->device);
729 struct mlx4_ib_sqp *sqp;
730 struct mlx4_ib_qp *qp;
980 int err; 731 int err;
981 u16 xrcdn = 0;
982 732
983 /* 733 /*
984 * We only support LSO, vendor flag1, and multicast loopback blocking, 734 * We only support LSO and multicast loopback blocking, and
985 * and only for kernel UD QPs. 735 * only for kernel UD QPs.
986 */ 736 */
987 if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | 737 if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO |
988 MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | 738 IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
989 MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
990 return ERR_PTR(-EINVAL); 739 return ERR_PTR(-EINVAL);
991 740
992 if (init_attr->create_flags && 741 if (init_attr->create_flags &&
993 (udata || 742 (pd->uobject || init_attr->qp_type != IB_QPT_UD))
994 ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
995 init_attr->qp_type != IB_QPT_UD) ||
996 ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) &&
997 init_attr->qp_type > IB_QPT_GSI)))
998 return ERR_PTR(-EINVAL); 743 return ERR_PTR(-EINVAL);
999 744
1000 switch (init_attr->qp_type) { 745 switch (init_attr->qp_type) {
1001 case IB_QPT_XRC_TGT:
1002 pd = to_mxrcd(init_attr->xrcd)->pd;
1003 xrcdn = to_mxrcd(init_attr->xrcd)->xrcdn;
1004 init_attr->send_cq = to_mxrcd(init_attr->xrcd)->cq;
1005 /* fall through */
1006 case IB_QPT_XRC_INI:
1007 if (!(to_mdev(pd->device)->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
1008 return ERR_PTR(-ENOSYS);
1009 init_attr->recv_cq = init_attr->send_cq;
1010 /* fall through */
1011 case IB_QPT_RC: 746 case IB_QPT_RC:
1012 case IB_QPT_UC: 747 case IB_QPT_UC:
1013 case IB_QPT_RAW_PACKET: 748 case IB_QPT_UD:
749 {
1014 qp = kzalloc(sizeof *qp, GFP_KERNEL); 750 qp = kzalloc(sizeof *qp, GFP_KERNEL);
1015 if (!qp) 751 if (!qp)
1016 return ERR_PTR(-ENOMEM); 752 return ERR_PTR(-ENOMEM);
1017 /* fall through */ 753
1018 case IB_QPT_UD: 754 err = create_qp_common(dev, pd, init_attr, udata, 0, qp);
1019 { 755 if (err) {
1020 err = create_qp_common(to_mdev(pd->device), pd, init_attr, 756 kfree(qp);
1021 udata, 0, &qp);
1022 if (err)
1023 return ERR_PTR(err); 757 return ERR_PTR(err);
758 }
1024 759
1025 qp->ibqp.qp_num = qp->mqp.qpn; 760 qp->ibqp.qp_num = qp->mqp.qpn;
1026 qp->xrcdn = xrcdn;
1027 761
1028 break; 762 break;
1029 } 763 }
@@ -1031,14 +765,24 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
1031 case IB_QPT_GSI: 765 case IB_QPT_GSI:
1032 { 766 {
1033 /* Userspace is not allowed to create special QPs: */ 767 /* Userspace is not allowed to create special QPs: */
1034 if (udata) 768 if (pd->uobject)
1035 return ERR_PTR(-EINVAL); 769 return ERR_PTR(-EINVAL);
1036 770
1037 err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 771 sqp = kzalloc(sizeof *sqp, GFP_KERNEL);
1038 get_sqp_num(to_mdev(pd->device), init_attr), 772 if (!sqp)
1039 &qp); 773 return ERR_PTR(-ENOMEM);
1040 if (err) 774
775 qp = &sqp->qp;
776
777 err = create_qp_common(dev, pd, init_attr, udata,
778 dev->dev->caps.sqp_start +
779 (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) +
780 init_attr->port_num - 1,
781 qp);
782 if (err) {
783 kfree(sqp);
1041 return ERR_PTR(err); 784 return ERR_PTR(err);
785 }
1042 786
1043 qp->port = init_attr->port_num; 787 qp->port = init_attr->port_num;
1044 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; 788 qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
@@ -1057,13 +801,11 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
1057{ 801{
1058 struct mlx4_ib_dev *dev = to_mdev(qp->device); 802 struct mlx4_ib_dev *dev = to_mdev(qp->device);
1059 struct mlx4_ib_qp *mqp = to_mqp(qp); 803 struct mlx4_ib_qp *mqp = to_mqp(qp);
1060 struct mlx4_ib_pd *pd;
1061 804
1062 if (is_qp0(dev, mqp)) 805 if (is_qp0(dev, mqp))
1063 mlx4_CLOSE_PORT(dev->dev, mqp->port); 806 mlx4_CLOSE_PORT(dev->dev, mqp->port);
1064 807
1065 pd = get_pd(mqp); 808 destroy_qp_common(dev, mqp, !!qp->pd->uobject);
1066 destroy_qp_common(dev, mqp, !!pd->ibpd.uobject);
1067 809
1068 if (is_sqp(dev, mqp)) 810 if (is_sqp(dev, mqp))
1069 kfree(to_msqp(mqp)); 811 kfree(to_msqp(mqp));
@@ -1073,27 +815,15 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp)
1073 return 0; 815 return 0;
1074} 816}
1075 817
1076static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) 818static int to_mlx4_st(enum ib_qp_type type)
1077{ 819{
1078 switch (type) { 820 switch (type) {
1079 case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; 821 case IB_QPT_RC: return MLX4_QP_ST_RC;
1080 case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; 822 case IB_QPT_UC: return MLX4_QP_ST_UC;
1081 case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; 823 case IB_QPT_UD: return MLX4_QP_ST_UD;
1082 case MLX4_IB_QPT_XRC_INI: 824 case IB_QPT_SMI:
1083 case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; 825 case IB_QPT_GSI: return MLX4_QP_ST_MLX;
1084 case MLX4_IB_QPT_SMI: 826 default: return -1;
1085 case MLX4_IB_QPT_GSI:
1086 case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX;
1087
1088 case MLX4_IB_QPT_PROXY_SMI_OWNER:
1089 case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ?
1090 MLX4_QP_ST_MLX : -1);
1091 case MLX4_IB_QPT_PROXY_SMI:
1092 case MLX4_IB_QPT_TUN_SMI:
1093 case MLX4_IB_QPT_PROXY_GSI:
1094 case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ?
1095 MLX4_QP_ST_UD : -1);
1096 default: return -1;
1097 } 827 }
1098} 828}
1099 829
@@ -1166,7 +896,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1166 896
1167 if (ah->ah_flags & IB_AH_GRH) { 897 if (ah->ah_flags & IB_AH_GRH) {
1168 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) { 898 if (ah->grh.sgid_index >= dev->dev->caps.gid_table_len[port]) {
1169 pr_err("sgid_index (%u) too large. max is %d\n", 899 printk(KERN_ERR "sgid_index (%u) too large. max is %d\n",
1170 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1); 900 ah->grh.sgid_index, dev->dev->caps.gid_table_len[port] - 1);
1171 return -1; 901 return -1;
1172 } 902 }
@@ -1182,7 +912,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
1182 912
1183 if (is_eth) { 913 if (is_eth) {
1184 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | 914 path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
1185 ((port - 1) << 6) | ((ah->sl & 7) << 3); 915 ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1);
1186 916
1187 if (!(ah->ah_flags & IB_AH_GRH)) 917 if (!(ah->ah_flags & IB_AH_GRH))
1188 return -1; 918 return -1;
@@ -1229,8 +959,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1229{ 959{
1230 struct mlx4_ib_dev *dev = to_mdev(ibqp->device); 960 struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
1231 struct mlx4_ib_qp *qp = to_mqp(ibqp); 961 struct mlx4_ib_qp *qp = to_mqp(ibqp);
1232 struct mlx4_ib_pd *pd;
1233 struct mlx4_ib_cq *send_cq, *recv_cq;
1234 struct mlx4_qp_context *context; 962 struct mlx4_qp_context *context;
1235 enum mlx4_qp_optpar optpar = 0; 963 enum mlx4_qp_optpar optpar = 0;
1236 int sqd_event; 964 int sqd_event;
@@ -1241,7 +969,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1241 return -ENOMEM; 969 return -ENOMEM;
1242 970
1243 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | 971 context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) |
1244 (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); 972 (to_mlx4_st(ibqp->qp_type) << 16));
1245 973
1246 if (!(attr_mask & IB_QP_PATH_MIG_STATE)) 974 if (!(attr_mask & IB_QP_PATH_MIG_STATE))
1247 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); 975 context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11);
@@ -1262,8 +990,6 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1262 990
1263 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) 991 if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI)
1264 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11; 992 context->mtu_msgmax = (IB_MTU_4096 << 5) | 11;
1265 else if (ibqp->qp_type == IB_QPT_RAW_PACKET)
1266 context->mtu_msgmax = (MLX4_RAW_QP_MTU << 5) | MLX4_RAW_QP_MSGMAX;
1267 else if (ibqp->qp_type == IB_QPT_UD) { 993 else if (ibqp->qp_type == IB_QPT_UD) {
1268 if (qp->flags & MLX4_IB_QP_LSO) 994 if (qp->flags & MLX4_IB_QP_LSO)
1269 context->mtu_msgmax = (IB_MTU_4096 << 5) | 995 context->mtu_msgmax = (IB_MTU_4096 << 5) |
@@ -1272,7 +998,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1272 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12; 998 context->mtu_msgmax = (IB_MTU_4096 << 5) | 12;
1273 } else if (attr_mask & IB_QP_PATH_MTU) { 999 } else if (attr_mask & IB_QP_PATH_MTU) {
1274 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) { 1000 if (attr->path_mtu < IB_MTU_256 || attr->path_mtu > IB_MTU_4096) {
1275 pr_err("path MTU (%u) is invalid\n", 1001 printk(KERN_ERR "path MTU (%u) is invalid\n",
1276 attr->path_mtu); 1002 attr->path_mtu);
1277 goto out; 1003 goto out;
1278 } 1004 }
@@ -1288,10 +1014,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1288 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3; 1014 context->sq_size_stride = ilog2(qp->sq.wqe_cnt) << 3;
1289 context->sq_size_stride |= qp->sq.wqe_shift - 4; 1015 context->sq_size_stride |= qp->sq.wqe_shift - 4;
1290 1016
1291 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { 1017 if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1292 context->sq_size_stride |= !!qp->sq_no_prefetch << 7; 1018 context->sq_size_stride |= !!qp->sq_no_prefetch << 7;
1293 context->xrcd = cpu_to_be32((u32) qp->xrcdn);
1294 }
1295 1019
1296 if (qp->ibqp.uobject) 1020 if (qp->ibqp.uobject)
1297 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index); 1021 context->usr_page = cpu_to_be32(to_mucontext(ibqp->uobject->context)->uar.index);
@@ -1319,16 +1043,13 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1319 } 1043 }
1320 1044
1321 if (attr_mask & IB_QP_PKEY_INDEX) { 1045 if (attr_mask & IB_QP_PKEY_INDEX) {
1322 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1323 context->pri_path.disable_pkey_check = 0x40;
1324 context->pri_path.pkey_index = attr->pkey_index; 1046 context->pri_path.pkey_index = attr->pkey_index;
1325 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; 1047 optpar |= MLX4_QP_OPTPAR_PKEY_INDEX;
1326 } 1048 }
1327 1049
1328 if (attr_mask & IB_QP_AV) { 1050 if (attr_mask & IB_QP_AV) {
1329 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, 1051 if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
1330 attr_mask & IB_QP_PORT ? 1052 attr_mask & IB_QP_PORT ? attr->port_num : qp->port))
1331 attr->port_num : qp->port))
1332 goto out; 1053 goto out;
1333 1054
1334 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | 1055 optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH |
@@ -1358,12 +1079,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1358 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH; 1079 optpar |= MLX4_QP_OPTPAR_ALT_ADDR_PATH;
1359 } 1080 }
1360 1081
1361 pd = get_pd(qp); 1082 context->pd = cpu_to_be32(to_mpd(ibqp->pd)->pdn);
1362 get_cqs(qp, &send_cq, &recv_cq); 1083 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
1363 context->pd = cpu_to_be32(pd->pdn);
1364 context->cqn_send = cpu_to_be32(send_cq->mcq.cqn);
1365 context->cqn_recv = cpu_to_be32(recv_cq->mcq.cqn);
1366 context->params1 = cpu_to_be32(MLX4_IB_ACK_REQ_FREQ << 28);
1367 1084
1368 /* Set "fast registration enabled" for all kernel QPs */ 1085 /* Set "fast registration enabled" for all kernel QPs */
1369 if (!qp->ibqp.uobject) 1086 if (!qp->ibqp.uobject)
@@ -1389,6 +1106,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1389 if (attr_mask & IB_QP_SQ_PSN) 1106 if (attr_mask & IB_QP_SQ_PSN)
1390 context->next_send_psn = cpu_to_be32(attr->sq_psn); 1107 context->next_send_psn = cpu_to_be32(attr->sq_psn);
1391 1108
1109 context->cqn_send = cpu_to_be32(to_mcq(ibqp->send_cq)->mcq.cqn);
1110
1392 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { 1111 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
1393 if (attr->max_dest_rd_atomic) 1112 if (attr->max_dest_rd_atomic)
1394 context->params2 |= 1113 context->params2 |=
@@ -1411,50 +1130,28 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1411 if (attr_mask & IB_QP_RQ_PSN) 1130 if (attr_mask & IB_QP_RQ_PSN)
1412 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); 1131 context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn);
1413 1132
1414 /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ 1133 context->cqn_recv = cpu_to_be32(to_mcq(ibqp->recv_cq)->mcq.cqn);
1134
1415 if (attr_mask & IB_QP_QKEY) { 1135 if (attr_mask & IB_QP_QKEY) {
1416 if (qp->mlx4_ib_qp_type & 1136 context->qkey = cpu_to_be32(attr->qkey);
1417 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))
1418 context->qkey = cpu_to_be32(IB_QP_SET_QKEY);
1419 else {
1420 if (mlx4_is_mfunc(dev->dev) &&
1421 !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) &&
1422 (attr->qkey & MLX4_RESERVED_QKEY_MASK) ==
1423 MLX4_RESERVED_QKEY_BASE) {
1424 pr_err("Cannot use reserved QKEY"
1425 " 0x%x (range 0xffff0000..0xffffffff"
1426 " is reserved)\n", attr->qkey);
1427 err = -EINVAL;
1428 goto out;
1429 }
1430 context->qkey = cpu_to_be32(attr->qkey);
1431 }
1432 optpar |= MLX4_QP_OPTPAR_Q_KEY; 1137 optpar |= MLX4_QP_OPTPAR_Q_KEY;
1433 } 1138 }
1434 1139
1435 if (ibqp->srq) 1140 if (ibqp->srq)
1436 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn); 1141 context->srqn = cpu_to_be32(1 << 24 | to_msrq(ibqp->srq)->msrq.srqn);
1437 1142
1438 if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) 1143 if (!ibqp->srq && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1439 context->db_rec_addr = cpu_to_be64(qp->db.dma); 1144 context->db_rec_addr = cpu_to_be64(qp->db.dma);
1440 1145
1441 if (cur_state == IB_QPS_INIT && 1146 if (cur_state == IB_QPS_INIT &&
1442 new_state == IB_QPS_RTR && 1147 new_state == IB_QPS_RTR &&
1443 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI || 1148 (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI ||
1444 ibqp->qp_type == IB_QPT_UD || 1149 ibqp->qp_type == IB_QPT_UD)) {
1445 ibqp->qp_type == IB_QPT_RAW_PACKET)) {
1446 context->pri_path.sched_queue = (qp->port - 1) << 6; 1150 context->pri_path.sched_queue = (qp->port - 1) << 6;
1447 if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || 1151 if (is_qp0(dev, qp))
1448 qp->mlx4_ib_qp_type &
1449 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) {
1450 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; 1152 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE;
1451 if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) 1153 else
1452 context->pri_path.fl = 0x80;
1453 } else {
1454 if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV)
1455 context->pri_path.fl = 0x80;
1456 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; 1154 context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE;
1457 }
1458 } 1155 }
1459 1156
1460 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && 1157 if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD &&
@@ -1515,7 +1212,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1515 if (is_qp0(dev, qp)) { 1212 if (is_qp0(dev, qp)) {
1516 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR) 1213 if (cur_state != IB_QPS_RTR && new_state == IB_QPS_RTR)
1517 if (mlx4_INIT_PORT(dev->dev, qp->port)) 1214 if (mlx4_INIT_PORT(dev->dev, qp->port))
1518 pr_warn("INIT_PORT failed for port %d\n", 1215 printk(KERN_WARNING "INIT_PORT failed for port %d\n",
1519 qp->port); 1216 qp->port);
1520 1217
1521 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR && 1218 if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
@@ -1528,17 +1225,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
1528 * entries and reinitialize the QP. 1225 * entries and reinitialize the QP.
1529 */ 1226 */
1530 if (new_state == IB_QPS_RESET && !ibqp->uobject) { 1227 if (new_state == IB_QPS_RESET && !ibqp->uobject) {
1531 mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn, 1228 mlx4_ib_cq_clean(to_mcq(ibqp->recv_cq), qp->mqp.qpn,
1532 ibqp->srq ? to_msrq(ibqp->srq): NULL); 1229 ibqp->srq ? to_msrq(ibqp->srq): NULL);
1533 if (send_cq != recv_cq) 1230 if (ibqp->send_cq != ibqp->recv_cq)
1534 mlx4_ib_cq_clean(send_cq, qp->mqp.qpn, NULL); 1231 mlx4_ib_cq_clean(to_mcq(ibqp->send_cq), qp->mqp.qpn, NULL);
1535 1232
1536 qp->rq.head = 0; 1233 qp->rq.head = 0;
1537 qp->rq.tail = 0; 1234 qp->rq.tail = 0;
1538 qp->sq.head = 0; 1235 qp->sq.head = 0;
1539 qp->sq.tail = 0; 1236 qp->sq.tail = 0;
1540 qp->sq_next_wqe = 0; 1237 qp->sq_next_wqe = 0;
1541 if (qp->rq.wqe_cnt) 1238 if (!ibqp->srq)
1542 *qp->db.db = 0; 1239 *qp->db.db = 0;
1543 } 1240 }
1544 1241
@@ -1560,55 +1257,27 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1560 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state; 1257 cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
1561 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state; 1258 new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1562 1259
1563 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) { 1260 if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
1564 pr_debug("qpn 0x%x: invalid attribute mask specified "
1565 "for transition %d to %d. qp_type %d,"
1566 " attr_mask 0x%x\n",
1567 ibqp->qp_num, cur_state, new_state,
1568 ibqp->qp_type, attr_mask);
1569 goto out; 1261 goto out;
1570 }
1571 1262
1572 if ((attr_mask & IB_QP_PORT) && 1263 if ((attr_mask & IB_QP_PORT) &&
1573 (attr->port_num == 0 || attr->port_num > dev->num_ports)) { 1264 (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) {
1574 pr_debug("qpn 0x%x: invalid port number (%d) specified "
1575 "for transition %d to %d. qp_type %d\n",
1576 ibqp->qp_num, attr->port_num, cur_state,
1577 new_state, ibqp->qp_type);
1578 goto out; 1265 goto out;
1579 } 1266 }
1580 1267
1581 if ((attr_mask & IB_QP_PORT) && (ibqp->qp_type == IB_QPT_RAW_PACKET) &&
1582 (rdma_port_get_link_layer(&dev->ib_dev, attr->port_num) !=
1583 IB_LINK_LAYER_ETHERNET))
1584 goto out;
1585
1586 if (attr_mask & IB_QP_PKEY_INDEX) { 1268 if (attr_mask & IB_QP_PKEY_INDEX) {
1587 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port; 1269 int p = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1588 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p]) { 1270 if (attr->pkey_index >= dev->dev->caps.pkey_table_len[p])
1589 pr_debug("qpn 0x%x: invalid pkey index (%d) specified "
1590 "for transition %d to %d. qp_type %d\n",
1591 ibqp->qp_num, attr->pkey_index, cur_state,
1592 new_state, ibqp->qp_type);
1593 goto out; 1271 goto out;
1594 }
1595 } 1272 }
1596 1273
1597 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && 1274 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
1598 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) { 1275 attr->max_rd_atomic > dev->dev->caps.max_qp_init_rdma) {
1599 pr_debug("qpn 0x%x: max_rd_atomic (%d) too large. "
1600 "Transition %d to %d. qp_type %d\n",
1601 ibqp->qp_num, attr->max_rd_atomic, cur_state,
1602 new_state, ibqp->qp_type);
1603 goto out; 1276 goto out;
1604 } 1277 }
1605 1278
1606 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && 1279 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
1607 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) { 1280 attr->max_dest_rd_atomic > dev->dev->caps.max_qp_dest_rdma) {
1608 pr_debug("qpn 0x%x: max_dest_rd_atomic (%d) too large. "
1609 "Transition %d to %d. qp_type %d\n",
1610 ibqp->qp_num, attr->max_dest_rd_atomic, cur_state,
1611 new_state, ibqp->qp_type);
1612 goto out; 1281 goto out;
1613 } 1282 }
1614 1283
@@ -1624,114 +1293,6 @@ out:
1624 return err; 1293 return err;
1625} 1294}
1626 1295
1627static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
1628 struct ib_send_wr *wr,
1629 void *wqe, unsigned *mlx_seg_len)
1630{
1631 struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device);
1632 struct ib_device *ib_dev = &mdev->ib_dev;
1633 struct mlx4_wqe_mlx_seg *mlx = wqe;
1634 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1635 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1636 u16 pkey;
1637 u32 qkey;
1638 int send_size;
1639 int header_size;
1640 int spc;
1641 int i;
1642
1643 if (wr->opcode != IB_WR_SEND)
1644 return -EINVAL;
1645
1646 send_size = 0;
1647
1648 for (i = 0; i < wr->num_sge; ++i)
1649 send_size += wr->sg_list[i].length;
1650
1651 /* for proxy-qp0 sends, need to add in size of tunnel header */
1652 /* for tunnel-qp0 sends, tunnel header is already in s/g list */
1653 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER)
1654 send_size += sizeof (struct mlx4_ib_tunnel_header);
1655
1656 ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header);
1657
1658 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) {
1659 sqp->ud_header.lrh.service_level =
1660 be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28;
1661 sqp->ud_header.lrh.destination_lid =
1662 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1663 sqp->ud_header.lrh.source_lid =
1664 cpu_to_be16(ah->av.ib.g_slid & 0x7f);
1665 }
1666
1667 mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE);
1668
1669 /* force loopback */
1670 mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR);
1671 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1672
1673 sqp->ud_header.lrh.virtual_lane = 0;
1674 sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED);
1675 ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey);
1676 sqp->ud_header.bth.pkey = cpu_to_be16(pkey);
1677 if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER)
1678 sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
1679 else
1680 sqp->ud_header.bth.destination_qpn =
1681 cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]);
1682
1683 sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1));
1684 if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey))
1685 return -EINVAL;
1686 sqp->ud_header.deth.qkey = cpu_to_be32(qkey);
1687 sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn);
1688
1689 sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
1690 sqp->ud_header.immediate_present = 0;
1691
1692 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1693
1694 /*
1695 * Inline data segments may not cross a 64 byte boundary. If
1696 * our UD header is bigger than the space available up to the
1697 * next 64 byte boundary in the WQE, use two inline data
1698 * segments to hold the UD header.
1699 */
1700 spc = MLX4_INLINE_ALIGN -
1701 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
1702 if (header_size <= spc) {
1703 inl->byte_count = cpu_to_be32(1 << 31 | header_size);
1704 memcpy(inl + 1, sqp->header_buf, header_size);
1705 i = 1;
1706 } else {
1707 inl->byte_count = cpu_to_be32(1 << 31 | spc);
1708 memcpy(inl + 1, sqp->header_buf, spc);
1709
1710 inl = (void *) (inl + 1) + spc;
1711 memcpy(inl + 1, sqp->header_buf + spc, header_size - spc);
1712 /*
1713 * Need a barrier here to make sure all the data is
1714 * visible before the byte_count field is set.
1715 * Otherwise the HCA prefetcher could grab the 64-byte
1716 * chunk with this inline segment and get a valid (!=
1717 * 0xffffffff) byte count but stale data, and end up
1718 * generating a packet with bad headers.
1719 *
1720 * The first inline segment's byte_count field doesn't
1721 * need a barrier, because it comes after a
1722 * control/MLX segment and therefore is at an offset
1723 * of 16 mod 64.
1724 */
1725 wmb();
1726 inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc));
1727 i = 2;
1728 }
1729
1730 *mlx_seg_len =
1731 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16);
1732 return 0;
1733}
1734
1735static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, 1296static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1736 void *wqe, unsigned *mlx_seg_len) 1297 void *wqe, unsigned *mlx_seg_len)
1737{ 1298{
@@ -1739,7 +1300,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1739 struct mlx4_wqe_mlx_seg *mlx = wqe; 1300 struct mlx4_wqe_mlx_seg *mlx = wqe;
1740 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; 1301 struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
1741 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); 1302 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
1742 struct net_device *ndev;
1743 union ib_gid sgid; 1303 union ib_gid sgid;
1744 u16 pkey; 1304 u16 pkey;
1745 int send_size; 1305 int send_size;
@@ -1749,8 +1309,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1749 int is_eth; 1309 int is_eth;
1750 int is_vlan = 0; 1310 int is_vlan = 0;
1751 int is_grh; 1311 int is_grh;
1752 u16 vlan; 1312 u16 vlan = 0;
1753 int err = 0;
1754 1313
1755 send_size = 0; 1314 send_size = 0;
1756 for (i = 0; i < wr->num_sge; ++i) 1315 for (i = 0; i < wr->num_sge; ++i)
@@ -1759,24 +1318,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1759 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; 1318 is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
1760 is_grh = mlx4_ib_ah_grh_present(ah); 1319 is_grh = mlx4_ib_ah_grh_present(ah);
1761 if (is_eth) { 1320 if (is_eth) {
1762 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 1321 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
1763 /* When multi-function is enabled, the ib_core gid 1322 ah->av.ib.gid_index, &sgid);
1764 * indexes don't necessarily match the hw ones, so
1765 * we must use our own cache */
1766 sgid.global.subnet_prefix =
1767 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1768 subnet_prefix;
1769 sgid.global.interface_id =
1770 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1771 guid_cache[ah->av.ib.gid_index];
1772 } else {
1773 err = ib_get_cached_gid(ib_dev,
1774 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1775 ah->av.ib.gid_index, &sgid);
1776 if (err)
1777 return err;
1778 }
1779
1780 vlan = rdma_get_vlan_id(&sgid); 1323 vlan = rdma_get_vlan_id(&sgid);
1781 is_vlan = vlan < 0x1000; 1324 is_vlan = vlan < 0x1000;
1782 } 1325 }
@@ -1795,21 +1338,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1795 sqp->ud_header.grh.flow_label = 1338 sqp->ud_header.grh.flow_label =
1796 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); 1339 ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff);
1797 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; 1340 sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit;
1798 if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { 1341 ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24,
1799 /* When multi-function is enabled, the ib_core gid 1342 ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid);
1800 * indexes don't necessarily match the hw ones, so
1801 * we must use our own cache */
1802 sqp->ud_header.grh.source_gid.global.subnet_prefix =
1803 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1804 subnet_prefix;
1805 sqp->ud_header.grh.source_gid.global.interface_id =
1806 to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
1807 guid_cache[ah->av.ib.gid_index];
1808 } else
1809 ib_get_cached_gid(ib_dev,
1810 be32_to_cpu(ah->av.ib.port_pd) >> 24,
1811 ah->av.ib.gid_index,
1812 &sqp->ud_header.grh.source_gid);
1813 memcpy(sqp->ud_header.grh.destination_gid.raw, 1343 memcpy(sqp->ud_header.grh.destination_gid.raw,
1814 ah->av.ib.dgid, 16); 1344 ah->av.ib.dgid, 16);
1815 } 1345 }
@@ -1821,8 +1351,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1821 (sqp->ud_header.lrh.destination_lid == 1351 (sqp->ud_header.lrh.destination_lid ==
1822 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | 1352 IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) |
1823 (sqp->ud_header.lrh.service_level << 8)); 1353 (sqp->ud_header.lrh.service_level << 8));
1824 if (ah->av.ib.port_pd & cpu_to_be32(0x80000000))
1825 mlx->flags |= cpu_to_be32(0x1); /* force loopback */
1826 mlx->rlid = sqp->ud_header.lrh.destination_lid; 1354 mlx->rlid = sqp->ud_header.lrh.destination_lid;
1827 } 1355 }
1828 1356
@@ -1842,23 +1370,20 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1842 1370
1843 if (is_eth) { 1371 if (is_eth) {
1844 u8 *smac; 1372 u8 *smac;
1845 u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
1846
1847 mlx->sched_prio = cpu_to_be16(pcp);
1848 1373
1849 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6); 1374 memcpy(sqp->ud_header.eth.dmac_h, ah->av.eth.mac, 6);
1850 /* FIXME: cache smac value? */ 1375 /* FIXME: cache smac value? */
1851 ndev = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]; 1376 smac = to_mdev(sqp->qp.ibqp.device)->iboe.netdevs[sqp->qp.port - 1]->dev_addr;
1852 if (!ndev)
1853 return -ENODEV;
1854 smac = ndev->dev_addr;
1855 memcpy(sqp->ud_header.eth.smac_h, smac, 6); 1377 memcpy(sqp->ud_header.eth.smac_h, smac, 6);
1856 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6)) 1378 if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
1857 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK); 1379 mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
1858 if (!is_vlan) { 1380 if (!is_vlan) {
1859 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 1381 sqp->ud_header.eth.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1860 } else { 1382 } else {
1383 u16 pcp;
1384
1861 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); 1385 sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE);
1386 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13;
1862 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); 1387 sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp);
1863 } 1388 }
1864 } else { 1389 } else {
@@ -1881,16 +1406,16 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
1881 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); 1406 header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf);
1882 1407
1883 if (0) { 1408 if (0) {
1884 pr_err("built UD header of size %d:\n", header_size); 1409 printk(KERN_ERR "built UD header of size %d:\n", header_size);
1885 for (i = 0; i < header_size / 4; ++i) { 1410 for (i = 0; i < header_size / 4; ++i) {
1886 if (i % 8 == 0) 1411 if (i % 8 == 0)
1887 pr_err(" [%02x] ", i * 4); 1412 printk(" [%02x] ", i * 4);
1888 pr_cont(" %08x", 1413 printk(" %08x",
1889 be32_to_cpu(((__be32 *) sqp->header_buf)[i])); 1414 be32_to_cpu(((__be32 *) sqp->header_buf)[i]));
1890 if ((i + 1) % 8 == 0) 1415 if ((i + 1) % 8 == 0)
1891 pr_cont("\n"); 1416 printk("\n");
1892 } 1417 }
1893 pr_err("\n"); 1418 printk("\n");
1894 } 1419 }
1895 1420
1896 /* 1421 /*
@@ -2022,70 +1547,14 @@ static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
2022} 1547}
2023 1548
2024static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, 1549static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
2025 struct ib_send_wr *wr) 1550 struct ib_send_wr *wr, __be16 *vlan)
2026{ 1551{
2027 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av)); 1552 memcpy(dseg->av, &to_mah(wr->wr.ud.ah)->av, sizeof (struct mlx4_av));
2028 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn); 1553 dseg->dqpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2029 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey); 1554 dseg->qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2030 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan; 1555 dseg->vlan = to_mah(wr->wr.ud.ah)->av.eth.vlan;
2031 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); 1556 memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6);
2032} 1557 *vlan = dseg->vlan;
2033
2034static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev,
2035 struct mlx4_wqe_datagram_seg *dseg,
2036 struct ib_send_wr *wr, enum ib_qp_type qpt)
2037{
2038 union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av;
2039 struct mlx4_av sqp_av = {0};
2040 int port = *((u8 *) &av->ib.port_pd) & 0x3;
2041
2042 /* force loopback */
2043 sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000);
2044 sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */
2045 sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel &
2046 cpu_to_be32(0xf0000000);
2047
2048 memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av));
2049 /* This function used only for sending on QP1 proxies */
2050 dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]);
2051 /* Use QKEY from the QP context, which is set by master */
2052 dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY);
2053}
2054
2055static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len)
2056{
2057 struct mlx4_wqe_inline_seg *inl = wqe;
2058 struct mlx4_ib_tunnel_header hdr;
2059 struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah);
2060 int spc;
2061 int i;
2062
2063 memcpy(&hdr.av, &ah->av, sizeof hdr.av);
2064 hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn);
2065 hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index);
2066 hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey);
2067
2068 spc = MLX4_INLINE_ALIGN -
2069 ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1));
2070 if (sizeof (hdr) <= spc) {
2071 memcpy(inl + 1, &hdr, sizeof (hdr));
2072 wmb();
2073 inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr));
2074 i = 1;
2075 } else {
2076 memcpy(inl + 1, &hdr, spc);
2077 wmb();
2078 inl->byte_count = cpu_to_be32(1 << 31 | spc);
2079
2080 inl = (void *) (inl + 1) + spc;
2081 memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc);
2082 wmb();
2083 inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc));
2084 i = 2;
2085 }
2086
2087 *mlx_seg_len =
2088 ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16);
2089} 1558}
2090 1559
2091static void set_mlx_icrc_seg(void *dseg) 1560static void set_mlx_icrc_seg(void *dseg)
@@ -2169,13 +1638,6 @@ static __be32 send_ieth(struct ib_send_wr *wr)
2169 } 1638 }
2170} 1639}
2171 1640
2172static void add_zero_len_inline(void *wqe)
2173{
2174 struct mlx4_wqe_inline_seg *inl = wqe;
2175 memset(wqe, 0, 16);
2176 inl->byte_count = cpu_to_be32(1 << 31);
2177}
2178
2179int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, 1641int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2180 struct ib_send_wr **bad_wr) 1642 struct ib_send_wr **bad_wr)
2181{ 1643{
@@ -2195,6 +1657,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2195 __be32 uninitialized_var(lso_hdr_sz); 1657 __be32 uninitialized_var(lso_hdr_sz);
2196 __be32 blh; 1658 __be32 blh;
2197 int i; 1659 int i;
1660 __be16 vlan = cpu_to_be16(0xffff);
2198 1661
2199 spin_lock_irqsave(&qp->sq.lock, flags); 1662 spin_lock_irqsave(&qp->sq.lock, flags);
2200 1663
@@ -2234,9 +1697,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2234 wqe += sizeof *ctrl; 1697 wqe += sizeof *ctrl;
2235 size = sizeof *ctrl / 16; 1698 size = sizeof *ctrl / 16;
2236 1699
2237 switch (qp->mlx4_ib_qp_type) { 1700 switch (ibqp->qp_type) {
2238 case MLX4_IB_QPT_RC: 1701 case IB_QPT_RC:
2239 case MLX4_IB_QPT_UC: 1702 case IB_QPT_UC:
2240 switch (wr->opcode) { 1703 switch (wr->opcode) {
2241 case IB_WR_ATOMIC_CMP_AND_SWP: 1704 case IB_WR_ATOMIC_CMP_AND_SWP:
2242 case IB_WR_ATOMIC_FETCH_AND_ADD: 1705 case IB_WR_ATOMIC_FETCH_AND_ADD:
@@ -2297,26 +1760,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2297 } 1760 }
2298 break; 1761 break;
2299 1762
2300 case MLX4_IB_QPT_TUN_SMI_OWNER: 1763 case IB_QPT_UD:
2301 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); 1764 set_datagram_seg(wqe, wr, &vlan);
2302 if (unlikely(err)) {
2303 *bad_wr = wr;
2304 goto out;
2305 }
2306 wqe += seglen;
2307 size += seglen / 16;
2308 break;
2309 case MLX4_IB_QPT_TUN_SMI:
2310 case MLX4_IB_QPT_TUN_GSI:
2311 /* this is a UD qp used in MAD responses to slaves. */
2312 set_datagram_seg(wqe, wr);
2313 /* set the forced-loopback bit in the data seg av */
2314 *(__be32 *) wqe |= cpu_to_be32(0x80000000);
2315 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2316 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2317 break;
2318 case MLX4_IB_QPT_UD:
2319 set_datagram_seg(wqe, wr);
2320 wqe += sizeof (struct mlx4_wqe_datagram_seg); 1765 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2321 size += sizeof (struct mlx4_wqe_datagram_seg) / 16; 1766 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2322 1767
@@ -2332,47 +1777,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2332 } 1777 }
2333 break; 1778 break;
2334 1779
2335 case MLX4_IB_QPT_PROXY_SMI_OWNER: 1780 case IB_QPT_SMI:
2336 if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { 1781 case IB_QPT_GSI:
2337 err = -ENOSYS;
2338 *bad_wr = wr;
2339 goto out;
2340 }
2341 err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen);
2342 if (unlikely(err)) {
2343 *bad_wr = wr;
2344 goto out;
2345 }
2346 wqe += seglen;
2347 size += seglen / 16;
2348 /* to start tunnel header on a cache-line boundary */
2349 add_zero_len_inline(wqe);
2350 wqe += 16;
2351 size++;
2352 build_tunnel_header(wr, wqe, &seglen);
2353 wqe += seglen;
2354 size += seglen / 16;
2355 break;
2356 case MLX4_IB_QPT_PROXY_SMI:
2357 /* don't allow QP0 sends on guests */
2358 err = -ENOSYS;
2359 *bad_wr = wr;
2360 goto out;
2361 case MLX4_IB_QPT_PROXY_GSI:
2362 /* If we are tunneling special qps, this is a UD qp.
2363 * In this case we first add a UD segment targeting
2364 * the tunnel qp, and then add a header with address
2365 * information */
2366 set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type);
2367 wqe += sizeof (struct mlx4_wqe_datagram_seg);
2368 size += sizeof (struct mlx4_wqe_datagram_seg) / 16;
2369 build_tunnel_header(wr, wqe, &seglen);
2370 wqe += seglen;
2371 size += seglen / 16;
2372 break;
2373
2374 case MLX4_IB_QPT_SMI:
2375 case MLX4_IB_QPT_GSI:
2376 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); 1782 err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen);
2377 if (unlikely(err)) { 1783 if (unlikely(err)) {
2378 *bad_wr = wr; 1784 *bad_wr = wr;
@@ -2398,10 +1804,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2398 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); 1804 size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16);
2399 1805
2400 /* Add one more inline data segment for ICRC for MLX sends */ 1806 /* Add one more inline data segment for ICRC for MLX sends */
2401 if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || 1807 if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI ||
2402 qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || 1808 qp->ibqp.qp_type == IB_QPT_GSI)) {
2403 qp->mlx4_ib_qp_type &
2404 (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) {
2405 set_mlx_icrc_seg(dseg + 1); 1809 set_mlx_icrc_seg(dseg + 1);
2406 size += sizeof (struct mlx4_wqe_data_seg) / 16; 1810 size += sizeof (struct mlx4_wqe_data_seg) / 16;
2407 } 1811 }
@@ -2420,6 +1824,11 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2420 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? 1824 ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
2421 MLX4_WQE_CTRL_FENCE : 0) | size; 1825 MLX4_WQE_CTRL_FENCE : 0) | size;
2422 1826
1827 if (be16_to_cpu(vlan) < 0x1000) {
1828 ctrl->ins_vlan = 1 << 6;
1829 ctrl->vlan_tag = vlan;
1830 }
1831
2423 /* 1832 /*
2424 * Make sure descriptor is fully written before 1833 * Make sure descriptor is fully written before
2425 * setting ownership bit (because HW can start 1834 * setting ownership bit (because HW can start
@@ -2428,7 +1837,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2428 wmb(); 1837 wmb();
2429 1838
2430 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { 1839 if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) {
2431 *bad_wr = wr;
2432 err = -EINVAL; 1840 err = -EINVAL;
2433 goto out; 1841 goto out;
2434 } 1842 }
@@ -2493,10 +1901,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2493 int err = 0; 1901 int err = 0;
2494 int nreq; 1902 int nreq;
2495 int ind; 1903 int ind;
2496 int max_gs;
2497 int i; 1904 int i;
2498 1905
2499 max_gs = qp->rq.max_gs;
2500 spin_lock_irqsave(&qp->rq.lock, flags); 1906 spin_lock_irqsave(&qp->rq.lock, flags);
2501 1907
2502 ind = qp->rq.head & (qp->rq.wqe_cnt - 1); 1908 ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -2516,25 +1922,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
2516 1922
2517 scat = get_recv_wqe(qp, ind); 1923 scat = get_recv_wqe(qp, ind);
2518 1924
2519 if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER |
2520 MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) {
2521 ib_dma_sync_single_for_device(ibqp->device,
2522 qp->sqp_proxy_rcv[ind].map,
2523 sizeof (struct mlx4_ib_proxy_sqp_hdr),
2524 DMA_FROM_DEVICE);
2525 scat->byte_count =
2526 cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr));
2527 /* use dma lkey from upper layer entry */
2528 scat->lkey = cpu_to_be32(wr->sg_list->lkey);
2529 scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map);
2530 scat++;
2531 max_gs--;
2532 }
2533
2534 for (i = 0; i < wr->num_sge; ++i) 1925 for (i = 0; i < wr->num_sge; ++i)
2535 __set_data_seg(scat + i, wr->sg_list + i); 1926 __set_data_seg(scat + i, wr->sg_list + i);
2536 1927
2537 if (i < max_gs) { 1928 if (i < qp->rq.max_gs) {
2538 scat[i].byte_count = 0; 1929 scat[i].byte_count = 0;
2539 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); 1930 scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY);
2540 scat[i].addr = 0; 1931 scat[i].addr = 0;
@@ -2729,10 +2120,6 @@ done:
2729 if (qp->flags & MLX4_IB_QP_LSO) 2120 if (qp->flags & MLX4_IB_QP_LSO)
2730 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO; 2121 qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
2731 2122
2732 qp_init_attr->sq_sig_type =
2733 qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
2734 IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
2735
2736out: 2123out:
2737 mutex_unlock(&qp->mutex); 2124 mutex_unlock(&qp->mutex);
2738 return err; 2125 return err;
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 60c5fb025fc..818b7ecace5 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -59,7 +59,7 @@ static void mlx4_ib_srq_event(struct mlx4_srq *srq, enum mlx4_event type)
59 event.event = IB_EVENT_SRQ_ERR; 59 event.event = IB_EVENT_SRQ_ERR;
60 break; 60 break;
61 default: 61 default:
62 pr_warn("Unexpected event type %d " 62 printk(KERN_WARNING "mlx4_ib: Unexpected event type %d "
63 "on SRQ %06x\n", type, srq->srqn); 63 "on SRQ %06x\n", type, srq->srqn);
64 return; 64 return;
65 } 65 }
@@ -76,8 +76,6 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
76 struct mlx4_ib_srq *srq; 76 struct mlx4_ib_srq *srq;
77 struct mlx4_wqe_srq_next_seg *next; 77 struct mlx4_wqe_srq_next_seg *next;
78 struct mlx4_wqe_data_seg *scatter; 78 struct mlx4_wqe_data_seg *scatter;
79 u32 cqn;
80 u16 xrcdn;
81 int desc_size; 79 int desc_size;
82 int buf_size; 80 int buf_size;
83 int err; 81 int err;
@@ -176,18 +174,12 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
176 } 174 }
177 } 175 }
178 176
179 cqn = (init_attr->srq_type == IB_SRQT_XRC) ? 177 err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, &srq->mtt,
180 to_mcq(init_attr->ext.xrc.cq)->mcq.cqn : 0;
181 xrcdn = (init_attr->srq_type == IB_SRQT_XRC) ?
182 to_mxrcd(init_attr->ext.xrc.xrcd)->xrcdn :
183 (u16) dev->dev->caps.reserved_xrcds;
184 err = mlx4_srq_alloc(dev->dev, to_mpd(pd)->pdn, cqn, xrcdn, &srq->mtt,
185 srq->db.dma, &srq->msrq); 178 srq->db.dma, &srq->msrq);
186 if (err) 179 if (err)
187 goto err_wrid; 180 goto err_wrid;
188 181
189 srq->msrq.event = mlx4_ib_srq_event; 182 srq->msrq.event = mlx4_ib_srq_event;
190 srq->ibsrq.ext.xrc.srq_num = srq->msrq.srqn;
191 183
192 if (pd->uobject) 184 if (pd->uobject)
193 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) { 185 if (ib_copy_to_udata(udata, &srq->msrq.srqn, sizeof (__u32))) {
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
deleted file mode 100644
index 5b2a01dfb90..00000000000
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ /dev/null
@@ -1,794 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33/*#include "core_priv.h"*/
34#include "mlx4_ib.h"
35#include <linux/slab.h>
36#include <linux/string.h>
37#include <linux/stat.h>
38
39#include <rdma/ib_mad.h>
40/*show_admin_alias_guid returns the administratively assigned value of that GUID.
41 * Values returned in buf parameter string:
42 * 0 - requests opensm to assign a value.
43 * ffffffffffffffff - delete this entry.
44 * other - value assigned by administrator.
45 */
46static ssize_t show_admin_alias_guid(struct device *dev,
47 struct device_attribute *attr, char *buf)
48{
49 int record_num;/*0-15*/
50 int guid_index_in_rec; /*0 - 7*/
51 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
52 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
53 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
54 struct mlx4_ib_dev *mdev = port->dev;
55
56 record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
57 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
58
59 return sprintf(buf, "%llx\n",
60 be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
61 ports_guid[port->num - 1].
62 all_rec_per_port[record_num].
63 all_recs[8 * guid_index_in_rec]));
64}
65
66/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
67 * Values in buf parameter string:
68 * 0 - requests opensm to assign a value.
69 * 0xffffffffffffffff - delete this entry.
70 * other - guid value assigned by the administrator.
71 */
72static ssize_t store_admin_alias_guid(struct device *dev,
73 struct device_attribute *attr,
74 const char *buf, size_t count)
75{
76 int record_num;/*0-15*/
77 int guid_index_in_rec; /*0 - 7*/
78 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
79 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
80 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
81 struct mlx4_ib_dev *mdev = port->dev;
82 u64 sysadmin_ag_val;
83
84 record_num = mlx4_ib_iov_dentry->entry_num / 8;
85 guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
86 if (0 == record_num && 0 == guid_index_in_rec) {
87 pr_err("GUID 0 block 0 is RO\n");
88 return count;
89 }
90 sscanf(buf, "%llx", &sysadmin_ag_val);
91 *(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
92 all_rec_per_port[record_num].
93 all_recs[GUID_REC_SIZE * guid_index_in_rec] =
94 cpu_to_be64(sysadmin_ag_val);
95
96 /* Change the state to be pending for update */
97 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
98 = MLX4_GUID_INFO_STATUS_IDLE ;
99
100 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
101 = MLX4_GUID_INFO_RECORD_SET;
102
103 switch (sysadmin_ag_val) {
104 case MLX4_GUID_FOR_DELETE_VAL:
105 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
106 = MLX4_GUID_INFO_RECORD_DELETE;
107 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
108 = MLX4_GUID_SYSADMIN_ASSIGN;
109 break;
110 /* The sysadmin requests the SM to re-assign */
111 case MLX4_NOT_SET_GUID:
112 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
113 = MLX4_GUID_DRIVER_ASSIGN;
114 break;
115 /* The sysadmin requests a specific value.*/
116 default:
117 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
118 = MLX4_GUID_SYSADMIN_ASSIGN;
119 break;
120 }
121
122 /* set the record index */
123 mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
124 = mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
125
126 mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
127
128 return count;
129}
130
131static ssize_t show_port_gid(struct device *dev,
132 struct device_attribute *attr,
133 char *buf)
134{
135 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
136 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
137 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
138 struct mlx4_ib_dev *mdev = port->dev;
139 union ib_gid gid;
140 ssize_t ret;
141
142 ret = __mlx4_ib_query_gid(&mdev->ib_dev, port->num,
143 mlx4_ib_iov_dentry->entry_num, &gid, 1);
144 if (ret)
145 return ret;
146 ret = sprintf(buf, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
147 be16_to_cpu(((__be16 *) gid.raw)[0]),
148 be16_to_cpu(((__be16 *) gid.raw)[1]),
149 be16_to_cpu(((__be16 *) gid.raw)[2]),
150 be16_to_cpu(((__be16 *) gid.raw)[3]),
151 be16_to_cpu(((__be16 *) gid.raw)[4]),
152 be16_to_cpu(((__be16 *) gid.raw)[5]),
153 be16_to_cpu(((__be16 *) gid.raw)[6]),
154 be16_to_cpu(((__be16 *) gid.raw)[7]));
155 return ret;
156}
157
158static ssize_t show_phys_port_pkey(struct device *dev,
159 struct device_attribute *attr,
160 char *buf)
161{
162 struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
163 container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
164 struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
165 struct mlx4_ib_dev *mdev = port->dev;
166 u16 pkey;
167 ssize_t ret;
168
169 ret = __mlx4_ib_query_pkey(&mdev->ib_dev, port->num,
170 mlx4_ib_iov_dentry->entry_num, &pkey, 1);
171 if (ret)
172 return ret;
173
174 return sprintf(buf, "0x%04x\n", pkey);
175}
176
177#define DENTRY_REMOVE(_dentry) \
178do { \
179 sysfs_remove_file((_dentry)->kobj, &(_dentry)->dentry.attr); \
180} while (0);
181
182static int create_sysfs_entry(void *_ctx, struct mlx4_ib_iov_sysfs_attr *_dentry,
183 char *_name, struct kobject *_kobj,
184 ssize_t (*show)(struct device *dev,
185 struct device_attribute *attr,
186 char *buf),
187 ssize_t (*store)(struct device *dev,
188 struct device_attribute *attr,
189 const char *buf, size_t count)
190 )
191{
192 int ret = 0;
193 struct mlx4_ib_iov_sysfs_attr *vdentry = _dentry;
194
195 vdentry->ctx = _ctx;
196 vdentry->dentry.show = show;
197 vdentry->dentry.store = store;
198 sysfs_attr_init(&vdentry->dentry.attr);
199 vdentry->dentry.attr.name = vdentry->name;
200 vdentry->dentry.attr.mode = 0;
201 vdentry->kobj = _kobj;
202 snprintf(vdentry->name, 15, "%s", _name);
203
204 if (vdentry->dentry.store)
205 vdentry->dentry.attr.mode |= S_IWUSR;
206
207 if (vdentry->dentry.show)
208 vdentry->dentry.attr.mode |= S_IRUGO;
209
210 ret = sysfs_create_file(vdentry->kobj, &vdentry->dentry.attr);
211 if (ret) {
212 pr_err("failed to create %s\n", vdentry->dentry.attr.name);
213 vdentry->ctx = NULL;
214 return ret;
215 }
216
217 return ret;
218}
219
220int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
221 struct attribute *attr)
222{
223 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
224 int ret;
225
226 ret = sysfs_create_file(port->mcgs_parent, attr);
227 if (ret)
228 pr_err("failed to create %s\n", attr->name);
229
230 return ret;
231}
232
233void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
234 struct attribute *attr)
235{
236 struct mlx4_ib_iov_port *port = &device->iov_ports[port_num - 1];
237
238 sysfs_remove_file(port->mcgs_parent, attr);
239}
240
241static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
242{
243 int i;
244 char buff[10];
245 struct mlx4_ib_iov_port *port = NULL;
246 int ret = 0 ;
247 struct ib_port_attr attr;
248
249 /* get the physical gid and pkey table sizes.*/
250 ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
251 if (ret)
252 goto err;
253
254 port = &device->iov_ports[port_num - 1];
255 port->dev = device;
256 port->num = port_num;
257 /* Directory structure:
258 * iov -
259 * port num -
260 * admin_guids
261 * gids (operational)
262 * mcg_table
263 */
264 port->dentr_ar = kzalloc(sizeof (struct mlx4_ib_iov_sysfs_attr_ar),
265 GFP_KERNEL);
266 if (!port->dentr_ar) {
267 ret = -ENOMEM;
268 goto err;
269 }
270 sprintf(buff, "%d", port_num);
271 port->cur_port = kobject_create_and_add(buff,
272 kobject_get(device->ports_parent));
273 if (!port->cur_port) {
274 ret = -ENOMEM;
275 goto kobj_create_err;
276 }
277 /* admin GUIDs */
278 port->admin_alias_parent = kobject_create_and_add("admin_guids",
279 kobject_get(port->cur_port));
280 if (!port->admin_alias_parent) {
281 ret = -ENOMEM;
282 goto err_admin_guids;
283 }
284 for (i = 0 ; i < attr.gid_tbl_len; i++) {
285 sprintf(buff, "%d", i);
286 port->dentr_ar->dentries[i].entry_num = i;
287 ret = create_sysfs_entry(port, &port->dentr_ar->dentries[i],
288 buff, port->admin_alias_parent,
289 show_admin_alias_guid, store_admin_alias_guid);
290 if (ret)
291 goto err_admin_alias_parent;
292 }
293
294 /* gids subdirectory (operational gids) */
295 port->gids_parent = kobject_create_and_add("gids",
296 kobject_get(port->cur_port));
297 if (!port->gids_parent) {
298 ret = -ENOMEM;
299 goto err_gids;
300 }
301
302 for (i = 0 ; i < attr.gid_tbl_len; i++) {
303 sprintf(buff, "%d", i);
304 port->dentr_ar->dentries[attr.gid_tbl_len + i].entry_num = i;
305 ret = create_sysfs_entry(port,
306 &port->dentr_ar->dentries[attr.gid_tbl_len + i],
307 buff,
308 port->gids_parent, show_port_gid, NULL);
309 if (ret)
310 goto err_gids_parent;
311 }
312
313 /* physical port pkey table */
314 port->pkeys_parent =
315 kobject_create_and_add("pkeys", kobject_get(port->cur_port));
316 if (!port->pkeys_parent) {
317 ret = -ENOMEM;
318 goto err_pkeys;
319 }
320
321 for (i = 0 ; i < attr.pkey_tbl_len; i++) {
322 sprintf(buff, "%d", i);
323 port->dentr_ar->dentries[2 * attr.gid_tbl_len + i].entry_num = i;
324 ret = create_sysfs_entry(port,
325 &port->dentr_ar->dentries[2 * attr.gid_tbl_len + i],
326 buff, port->pkeys_parent,
327 show_phys_port_pkey, NULL);
328 if (ret)
329 goto err_pkeys_parent;
330 }
331
332 /* MCGs table */
333 port->mcgs_parent =
334 kobject_create_and_add("mcgs", kobject_get(port->cur_port));
335 if (!port->mcgs_parent) {
336 ret = -ENOMEM;
337 goto err_mcgs;
338 }
339 return 0;
340
341err_mcgs:
342 kobject_put(port->cur_port);
343
344err_pkeys_parent:
345 kobject_put(port->pkeys_parent);
346
347err_pkeys:
348 kobject_put(port->cur_port);
349
350err_gids_parent:
351 kobject_put(port->gids_parent);
352
353err_gids:
354 kobject_put(port->cur_port);
355
356err_admin_alias_parent:
357 kobject_put(port->admin_alias_parent);
358
359err_admin_guids:
360 kobject_put(port->cur_port);
361 kobject_put(port->cur_port); /* once more for create_and_add buff */
362
363kobj_create_err:
364 kobject_put(device->ports_parent);
365 kfree(port->dentr_ar);
366
367err:
368 pr_err("add_port_entries FAILED: for port:%d, error: %d\n",
369 port_num, ret);
370 return ret;
371}
372
373static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
374{
375 char base_name[9];
376
377 /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
378 strlcpy(name, pci_name(dev->dev->pdev), max);
379 strncpy(base_name, name, 8); /*till xxxx:yy:*/
380 base_name[8] = '\0';
381 /* with no ARI only 3 last bits are used so when the fn is higher than 8
382 * need to add it to the dev num, so count in the last number will be
383 * modulo 8 */
384 sprintf(name, "%s%.2d.%d", base_name, (i/8), (i%8));
385}
386
387struct mlx4_port {
388 struct kobject kobj;
389 struct mlx4_ib_dev *dev;
390 struct attribute_group pkey_group;
391 struct attribute_group gid_group;
392 u8 port_num;
393 int slave;
394};
395
396
397static void mlx4_port_release(struct kobject *kobj)
398{
399 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
400 struct attribute *a;
401 int i;
402
403 for (i = 0; (a = p->pkey_group.attrs[i]); ++i)
404 kfree(a);
405 kfree(p->pkey_group.attrs);
406 for (i = 0; (a = p->gid_group.attrs[i]); ++i)
407 kfree(a);
408 kfree(p->gid_group.attrs);
409 kfree(p);
410}
411
412struct port_attribute {
413 struct attribute attr;
414 ssize_t (*show)(struct mlx4_port *, struct port_attribute *, char *buf);
415 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
416 const char *buf, size_t count);
417};
418
419static ssize_t port_attr_show(struct kobject *kobj,
420 struct attribute *attr, char *buf)
421{
422 struct port_attribute *port_attr =
423 container_of(attr, struct port_attribute, attr);
424 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
425
426 if (!port_attr->show)
427 return -EIO;
428 return port_attr->show(p, port_attr, buf);
429}
430
431static ssize_t port_attr_store(struct kobject *kobj,
432 struct attribute *attr,
433 const char *buf, size_t size)
434{
435 struct port_attribute *port_attr =
436 container_of(attr, struct port_attribute, attr);
437 struct mlx4_port *p = container_of(kobj, struct mlx4_port, kobj);
438
439 if (!port_attr->store)
440 return -EIO;
441 return port_attr->store(p, port_attr, buf, size);
442}
443
444static const struct sysfs_ops port_sysfs_ops = {
445 .show = port_attr_show,
446 .store = port_attr_store,
447};
448
449static struct kobj_type port_type = {
450 .release = mlx4_port_release,
451 .sysfs_ops = &port_sysfs_ops,
452};
453
454struct port_table_attribute {
455 struct port_attribute attr;
456 char name[8];
457 int index;
458};
459
460static ssize_t show_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
461 char *buf)
462{
463 struct port_table_attribute *tab_attr =
464 container_of(attr, struct port_table_attribute, attr);
465 ssize_t ret = -ENODEV;
466
467 if (p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1][tab_attr->index] >=
468 (p->dev->dev->caps.pkey_table_len[p->port_num]))
469 ret = sprintf(buf, "none\n");
470 else
471 ret = sprintf(buf, "%d\n",
472 p->dev->pkeys.virt2phys_pkey[p->slave]
473 [p->port_num - 1][tab_attr->index]);
474 return ret;
475}
476
477static ssize_t store_port_pkey(struct mlx4_port *p, struct port_attribute *attr,
478 const char *buf, size_t count)
479{
480 struct port_table_attribute *tab_attr =
481 container_of(attr, struct port_table_attribute, attr);
482 int idx;
483 int err;
484
485 /* do not allow remapping Dom0 virtual pkey table */
486 if (p->slave == mlx4_master_func_num(p->dev->dev))
487 return -EINVAL;
488
489 if (!strncasecmp(buf, "no", 2))
490 idx = p->dev->dev->phys_caps.pkey_phys_table_len[p->port_num] - 1;
491 else if (sscanf(buf, "%i", &idx) != 1 ||
492 idx >= p->dev->dev->caps.pkey_table_len[p->port_num] ||
493 idx < 0)
494 return -EINVAL;
495
496 p->dev->pkeys.virt2phys_pkey[p->slave][p->port_num - 1]
497 [tab_attr->index] = idx;
498 mlx4_sync_pkey_table(p->dev->dev, p->slave, p->port_num,
499 tab_attr->index, idx);
500 err = mlx4_gen_pkey_eqe(p->dev->dev, p->slave, p->port_num);
501 if (err) {
502 pr_err("mlx4_gen_pkey_eqe failed for slave %d,"
503 " port %d, index %d\n", p->slave, p->port_num, idx);
504 return err;
505 }
506 return count;
507}
508
509static ssize_t show_port_gid_idx(struct mlx4_port *p,
510 struct port_attribute *attr, char *buf)
511{
512 return sprintf(buf, "%d\n", p->slave);
513}
514
515static struct attribute **
516alloc_group_attrs(ssize_t (*show)(struct mlx4_port *,
517 struct port_attribute *, char *buf),
518 ssize_t (*store)(struct mlx4_port *, struct port_attribute *,
519 const char *buf, size_t count),
520 int len)
521{
522 struct attribute **tab_attr;
523 struct port_table_attribute *element;
524 int i;
525
526 tab_attr = kcalloc(1 + len, sizeof (struct attribute *), GFP_KERNEL);
527 if (!tab_attr)
528 return NULL;
529
530 for (i = 0; i < len; i++) {
531 element = kzalloc(sizeof (struct port_table_attribute),
532 GFP_KERNEL);
533 if (!element)
534 goto err;
535 if (snprintf(element->name, sizeof (element->name),
536 "%d", i) >= sizeof (element->name)) {
537 kfree(element);
538 goto err;
539 }
540 sysfs_attr_init(&element->attr.attr);
541 element->attr.attr.name = element->name;
542 if (store) {
543 element->attr.attr.mode = S_IWUSR | S_IRUGO;
544 element->attr.store = store;
545 } else
546 element->attr.attr.mode = S_IRUGO;
547
548 element->attr.show = show;
549 element->index = i;
550 tab_attr[i] = &element->attr.attr;
551 }
552 return tab_attr;
553
554err:
555 while (--i >= 0)
556 kfree(tab_attr[i]);
557 kfree(tab_attr);
558 return NULL;
559}
560
561static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
562{
563 struct mlx4_port *p;
564 int i;
565 int ret;
566
567 p = kzalloc(sizeof *p, GFP_KERNEL);
568 if (!p)
569 return -ENOMEM;
570
571 p->dev = dev;
572 p->port_num = port_num;
573 p->slave = slave;
574
575 ret = kobject_init_and_add(&p->kobj, &port_type,
576 kobject_get(dev->dev_ports_parent[slave]),
577 "%d", port_num);
578 if (ret)
579 goto err_alloc;
580
581 p->pkey_group.name = "pkey_idx";
582 p->pkey_group.attrs =
583 alloc_group_attrs(show_port_pkey, store_port_pkey,
584 dev->dev->caps.pkey_table_len[port_num]);
585 if (!p->pkey_group.attrs)
586 goto err_alloc;
587
588 ret = sysfs_create_group(&p->kobj, &p->pkey_group);
589 if (ret)
590 goto err_free_pkey;
591
592 p->gid_group.name = "gid_idx";
593 p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
594 if (!p->gid_group.attrs)
595 goto err_free_pkey;
596
597 ret = sysfs_create_group(&p->kobj, &p->gid_group);
598 if (ret)
599 goto err_free_gid;
600
601 list_add_tail(&p->kobj.entry, &dev->pkeys.pkey_port_list[slave]);
602 return 0;
603
604err_free_gid:
605 kfree(p->gid_group.attrs[0]);
606 kfree(p->gid_group.attrs);
607
608err_free_pkey:
609 for (i = 0; i < dev->dev->caps.pkey_table_len[port_num]; ++i)
610 kfree(p->pkey_group.attrs[i]);
611 kfree(p->pkey_group.attrs);
612
613err_alloc:
614 kobject_put(dev->dev_ports_parent[slave]);
615 kfree(p);
616 return ret;
617}
618
619static int register_one_pkey_tree(struct mlx4_ib_dev *dev, int slave)
620{
621 char name[32];
622 int err;
623 int port;
624 struct kobject *p, *t;
625 struct mlx4_port *mport;
626
627 get_name(dev, name, slave, sizeof name);
628
629 dev->pkeys.device_parent[slave] =
630 kobject_create_and_add(name, kobject_get(dev->iov_parent));
631
632 if (!dev->pkeys.device_parent[slave]) {
633 err = -ENOMEM;
634 goto fail_dev;
635 }
636
637 INIT_LIST_HEAD(&dev->pkeys.pkey_port_list[slave]);
638
639 dev->dev_ports_parent[slave] =
640 kobject_create_and_add("ports",
641 kobject_get(dev->pkeys.device_parent[slave]));
642
643 if (!dev->dev_ports_parent[slave]) {
644 err = -ENOMEM;
645 goto err_ports;
646 }
647
648 for (port = 1; port <= dev->dev->caps.num_ports; ++port) {
649 err = add_port(dev, port, slave);
650 if (err)
651 goto err_add;
652 }
653 return 0;
654
655err_add:
656 list_for_each_entry_safe(p, t,
657 &dev->pkeys.pkey_port_list[slave],
658 entry) {
659 list_del(&p->entry);
660 mport = container_of(p, struct mlx4_port, kobj);
661 sysfs_remove_group(p, &mport->pkey_group);
662 sysfs_remove_group(p, &mport->gid_group);
663 kobject_put(p);
664 }
665 kobject_put(dev->dev_ports_parent[slave]);
666
667err_ports:
668 kobject_put(dev->pkeys.device_parent[slave]);
669 /* extra put for the device_parent create_and_add */
670 kobject_put(dev->pkeys.device_parent[slave]);
671
672fail_dev:
673 kobject_put(dev->iov_parent);
674 return err;
675}
676
677static int register_pkey_tree(struct mlx4_ib_dev *device)
678{
679 int i;
680
681 if (!mlx4_is_master(device->dev))
682 return 0;
683
684 for (i = 0; i <= device->dev->num_vfs; ++i)
685 register_one_pkey_tree(device, i);
686
687 return 0;
688}
689
690static void unregister_pkey_tree(struct mlx4_ib_dev *device)
691{
692 int slave;
693 struct kobject *p, *t;
694 struct mlx4_port *port;
695
696 if (!mlx4_is_master(device->dev))
697 return;
698
699 for (slave = device->dev->num_vfs; slave >= 0; --slave) {
700 list_for_each_entry_safe(p, t,
701 &device->pkeys.pkey_port_list[slave],
702 entry) {
703 list_del(&p->entry);
704 port = container_of(p, struct mlx4_port, kobj);
705 sysfs_remove_group(p, &port->pkey_group);
706 sysfs_remove_group(p, &port->gid_group);
707 kobject_put(p);
708 kobject_put(device->dev_ports_parent[slave]);
709 }
710 kobject_put(device->dev_ports_parent[slave]);
711 kobject_put(device->pkeys.device_parent[slave]);
712 kobject_put(device->pkeys.device_parent[slave]);
713 kobject_put(device->iov_parent);
714 }
715}
716
717int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *dev)
718{
719 int i;
720 int ret = 0;
721
722 if (!mlx4_is_master(dev->dev))
723 return 0;
724
725 dev->iov_parent =
726 kobject_create_and_add("iov",
727 kobject_get(dev->ib_dev.ports_parent->parent));
728 if (!dev->iov_parent) {
729 ret = -ENOMEM;
730 goto err;
731 }
732 dev->ports_parent =
733 kobject_create_and_add("ports",
734 kobject_get(dev->iov_parent));
735 if (!dev->iov_parent) {
736 ret = -ENOMEM;
737 goto err_ports;
738 }
739
740 for (i = 1; i <= dev->ib_dev.phys_port_cnt; ++i) {
741 ret = add_port_entries(dev, i);
742 if (ret)
743 goto err_add_entries;
744 }
745
746 ret = register_pkey_tree(dev);
747 if (ret)
748 goto err_add_entries;
749 return 0;
750
751err_add_entries:
752 kobject_put(dev->ports_parent);
753
754err_ports:
755 kobject_put(dev->iov_parent);
756err:
757 kobject_put(dev->ib_dev.ports_parent->parent);
758 pr_err("mlx4_ib_device_register_sysfs error (%d)\n", ret);
759 return ret;
760}
761
762static void unregister_alias_guid_tree(struct mlx4_ib_dev *device)
763{
764 struct mlx4_ib_iov_port *p;
765 int i;
766
767 if (!mlx4_is_master(device->dev))
768 return;
769
770 for (i = 0; i < device->dev->caps.num_ports; i++) {
771 p = &device->iov_ports[i];
772 kobject_put(p->admin_alias_parent);
773 kobject_put(p->gids_parent);
774 kobject_put(p->pkeys_parent);
775 kobject_put(p->mcgs_parent);
776 kobject_put(p->cur_port);
777 kobject_put(p->cur_port);
778 kobject_put(p->cur_port);
779 kobject_put(p->cur_port);
780 kobject_put(p->cur_port);
781 kobject_put(p->dev->ports_parent);
782 kfree(p->dentr_ar);
783 }
784}
785
786void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device)
787{
788 unregister_alias_guid_tree(device);
789 unregister_pkey_tree(device);
790 kobject_put(device->ports_parent);
791 kobject_put(device->iov_parent);
792 kobject_put(device->iov_parent);
793 kobject_put(device->ib_dev.ports_parent->parent);
794}
diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h
index 07e6769ef43..13beedeeef9 100644
--- a/drivers/infiniband/hw/mlx4/user.h
+++ b/drivers/infiniband/hw/mlx4/user.h
@@ -40,9 +40,7 @@
40 * Increment this value if any changes that break userspace ABI 40 * Increment this value if any changes that break userspace ABI
41 * compatibility are made. 41 * compatibility are made.
42 */ 42 */
43 43#define MLX4_IB_UVERBS_ABI_VERSION 3
44#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3
45#define MLX4_IB_UVERBS_ABI_VERSION 4
46 44
47/* 45/*
48 * Make sure that all structs defined in this file remain laid out so 46 * Make sure that all structs defined in this file remain laid out so
@@ -52,18 +50,10 @@
52 * instead. 50 * instead.
53 */ 51 */
54 52
55struct mlx4_ib_alloc_ucontext_resp_v3 {
56 __u32 qp_tab_size;
57 __u16 bf_reg_size;
58 __u16 bf_regs_per_page;
59};
60
61struct mlx4_ib_alloc_ucontext_resp { 53struct mlx4_ib_alloc_ucontext_resp {
62 __u32 dev_caps;
63 __u32 qp_tab_size; 54 __u32 qp_tab_size;
64 __u16 bf_reg_size; 55 __u16 bf_reg_size;
65 __u16 bf_regs_per_page; 56 __u16 bf_regs_per_page;
66 __u32 cqe_size;
67}; 57};
68 58
69struct mlx4_ib_alloc_pd_resp { 59struct mlx4_ib_alloc_pd_resp {
diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c
index 712d2a30fbe..e4a08c2819e 100644
--- a/drivers/infiniband/hw/mthca/mthca_catas.c
+++ b/drivers/infiniband/hw/mthca/mthca_catas.c
@@ -31,7 +31,6 @@
31 */ 31 */
32 32
33#include <linux/jiffies.h> 33#include <linux/jiffies.h>
34#include <linux/module.h>
35#include <linux/timer.h> 34#include <linux/timer.h>
36#include <linux/workqueue.h> 35#include <linux/workqueue.h>
37 36
diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 9d3e5c1ac60..3082b3b3d62 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -36,7 +36,6 @@
36#include <linux/pci.h> 36#include <linux/pci.h>
37#include <linux/errno.h> 37#include <linux/errno.h>
38#include <linux/sched.h> 38#include <linux/sched.h>
39#include <linux/module.h>
40#include <linux/slab.h> 39#include <linux/slab.h>
41#include <asm/io.h> 40#include <asm/io.h>
42#include <rdma/ib_mad.h> 41#include <rdma/ib_mad.h>
diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c
index 40ba8333815..53157b86a1b 100644
--- a/drivers/infiniband/hw/mthca/mthca_cq.c
+++ b/drivers/infiniband/hw/mthca/mthca_cq.c
@@ -643,8 +643,7 @@ static inline int mthca_poll_one(struct mthca_dev *dev,
643 entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0; 643 entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0;
644 checksum = (be32_to_cpu(cqe->rqpn) >> 24) | 644 checksum = (be32_to_cpu(cqe->rqpn) >> 24) |
645 ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00); 645 ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00);
646 entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ? 646 entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff);
647 IB_WC_IP_CSUM_OK : 0;
648 } 647 }
649 648
650 entry->status = IB_WC_SUCCESS; 649 entry->status = IB_WC_SUCCESS;
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c
index 87897b95666..aa12a533ae9 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -130,7 +130,7 @@ static int log_mtts_per_seg = ilog2(MTHCA_MTT_SEG_SIZE / 8);
130module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); 130module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
131MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)"); 131MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-5)");
132 132
133static char mthca_version[] = 133static char mthca_version[] __devinitdata =
134 DRV_NAME ": Mellanox InfiniBand HCA driver v" 134 DRV_NAME ": Mellanox InfiniBand HCA driver v"
135 DRV_VERSION " (" DRV_RELDATE ")\n"; 135 DRV_VERSION " (" DRV_RELDATE ")\n";
136 136
@@ -1139,7 +1139,8 @@ int __mthca_restart_one(struct pci_dev *pdev)
1139 return __mthca_init_one(pdev, hca_type); 1139 return __mthca_init_one(pdev, hca_type);
1140} 1140}
1141 1141
1142static int mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id) 1142static int __devinit mthca_init_one(struct pci_dev *pdev,
1143 const struct pci_device_id *id)
1143{ 1144{
1144 int ret; 1145 int ret;
1145 1146
@@ -1161,7 +1162,7 @@ static int mthca_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
1161 return ret; 1162 return ret;
1162} 1163}
1163 1164
1164static void mthca_remove_one(struct pci_dev *pdev) 1165static void __devexit mthca_remove_one(struct pci_dev *pdev)
1165{ 1166{
1166 mutex_lock(&mthca_device_mutex); 1167 mutex_lock(&mthca_device_mutex);
1167 __mthca_remove_one(pdev); 1168 __mthca_remove_one(pdev);
@@ -1198,7 +1199,7 @@ static struct pci_driver mthca_driver = {
1198 .name = DRV_NAME, 1199 .name = DRV_NAME,
1199 .id_table = mthca_pci_table, 1200 .id_table = mthca_pci_table,
1200 .probe = mthca_init_one, 1201 .probe = mthca_init_one,
1201 .remove = mthca_remove_one, 1202 .remove = __devexit_p(mthca_remove_one)
1202}; 1203};
1203 1204
1204static void __init __mthca_check_profile_val(const char *name, int *pval, 1205static void __init __mthca_check_profile_val(const char *name, int *pval,
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c
index ed9a989e501..ab876f928a1 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -146,7 +146,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
146 146
147 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), 147 buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *),
148 GFP_KERNEL); 148 GFP_KERNEL);
149 buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, 149 buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *),
150 GFP_KERNEL); 150 GFP_KERNEL);
151 if (!buddy->bits || !buddy->num_free) 151 if (!buddy->bits || !buddy->num_free)
152 goto err_out; 152 goto err_out;
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 5b71d43bd89..365fe0e1419 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -40,9 +40,7 @@
40 40
41#include <linux/sched.h> 41#include <linux/sched.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/stat.h>
44#include <linux/mm.h> 43#include <linux/mm.h>
45#include <linux/export.h>
46 44
47#include "mthca_dev.h" 45#include "mthca_dev.h"
48#include "mthca_cmd.h" 46#include "mthca_cmd.h"
@@ -440,9 +438,6 @@ static struct ib_srq *mthca_create_srq(struct ib_pd *pd,
440 struct mthca_srq *srq; 438 struct mthca_srq *srq;
441 int err; 439 int err;
442 440
443 if (init_attr->srq_type != IB_SRQT_BASIC)
444 return ERR_PTR(-ENOSYS);
445
446 srq = kmalloc(sizeof *srq, GFP_KERNEL); 441 srq = kmalloc(sizeof *srq, GFP_KERNEL);
447 if (!srq) 442 if (!srq)
448 return ERR_PTR(-ENOMEM); 443 return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 26a68453610..9601049e14d 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -247,8 +247,7 @@ void mthca_qp_event(struct mthca_dev *dev, u32 qpn,
247 spin_unlock(&dev->qp_table.lock); 247 spin_unlock(&dev->qp_table.lock);
248 248
249 if (!qp) { 249 if (!qp) {
250 mthca_warn(dev, "Async event %d for bogus QP %08x\n", 250 mthca_warn(dev, "Async event for bogus QP %08x\n", qpn);
251 event_type, qpn);
252 return; 251 return;
253 } 252 }
254 253
@@ -502,7 +501,6 @@ done:
502 qp_attr->cap.max_inline_data = qp->max_inline_data; 501 qp_attr->cap.max_inline_data = qp->max_inline_data;
503 502
504 qp_init_attr->cap = qp_attr->cap; 503 qp_init_attr->cap = qp_attr->cap;
505 qp_init_attr->sq_sig_type = qp->sq_policy;
506 504
507out_mailbox: 505out_mailbox:
508 mthca_free_mailbox(dev, mailbox); 506 mthca_free_mailbox(dev, mailbox);
diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c
index 74c6a942604..4fa3534ec23 100644
--- a/drivers/infiniband/hw/mthca/mthca_reset.c
+++ b/drivers/infiniband/hw/mthca/mthca_reset.c
@@ -241,16 +241,16 @@ good:
241 241
242 if (hca_pcie_cap) { 242 if (hca_pcie_cap) {
243 devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4]; 243 devctl = hca_header[(hca_pcie_cap + PCI_EXP_DEVCTL) / 4];
244 if (pcie_capability_write_word(mdev->pdev, PCI_EXP_DEVCTL, 244 if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_DEVCTL,
245 devctl)) { 245 devctl)) {
246 err = -ENODEV; 246 err = -ENODEV;
247 mthca_err(mdev, "Couldn't restore HCA PCI Express " 247 mthca_err(mdev, "Couldn't restore HCA PCI Express "
248 "Device Control register, aborting.\n"); 248 "Device Control register, aborting.\n");
249 goto out; 249 goto out;
250 } 250 }
251 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4]; 251 linkctl = hca_header[(hca_pcie_cap + PCI_EXP_LNKCTL) / 4];
252 if (pcie_capability_write_word(mdev->pdev, PCI_EXP_LNKCTL, 252 if (pci_write_config_word(mdev->pdev, hca_pcie_cap + PCI_EXP_LNKCTL,
253 linkctl)) { 253 linkctl)) {
254 err = -ENODEV; 254 err = -ENODEV;
255 mthca_err(mdev, "Couldn't restore HCA PCI Express " 255 mthca_err(mdev, "Couldn't restore HCA PCI Express "
256 "Link control register, aborting.\n"); 256 "Link control register, aborting.\n");
diff --git a/drivers/infiniband/hw/nes/Makefile b/drivers/infiniband/hw/nes/Makefile
index 97820c23ece..35148513c47 100644
--- a/drivers/infiniband/hw/nes/Makefile
+++ b/drivers/infiniband/hw/nes/Makefile
@@ -1,3 +1,3 @@
1obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o 1obj-$(CONFIG_INFINIBAND_NES) += iw_nes.o
2 2
3iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o nes_mgt.o 3iw_nes-objs := nes.o nes_hw.o nes_nic.o nes_utils.o nes_verbs.o nes_cm.o
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 5b152a366df..2d668c69f6d 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -79,7 +79,12 @@ int disable_mpa_crc = 0;
79module_param(disable_mpa_crc, int, 0644); 79module_param(disable_mpa_crc, int, 0644);
80MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC"); 80MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
81 81
82unsigned int nes_drv_opt = NES_DRV_OPT_DISABLE_INT_MOD | NES_DRV_OPT_ENABLE_PAU; 82unsigned int send_first = 0;
83module_param(send_first, int, 0644);
84MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
85
86
87unsigned int nes_drv_opt = 0;
83module_param(nes_drv_opt, int, 0644); 88module_param(nes_drv_opt, int, 0644);
84MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters"); 89MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
85 90
@@ -91,7 +96,7 @@ unsigned int wqm_quanta = 0x10000;
91module_param(wqm_quanta, int, 0644); 96module_param(wqm_quanta, int, 0644);
92MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); 97MODULE_PARM_DESC(wqm_quanta, "WQM quanta");
93 98
94static bool limit_maxrdreqsz; 99static unsigned int limit_maxrdreqsz;
95module_param(limit_maxrdreqsz, bool, 0644); 100module_param(limit_maxrdreqsz, bool, 0644);
96MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); 101MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes");
97 102
@@ -125,6 +130,9 @@ static struct notifier_block nes_net_notifier = {
125 .notifier_call = nes_net_event 130 .notifier_call = nes_net_event
126}; 131};
127 132
133
134
135
128/** 136/**
129 * nes_inetaddr_event 137 * nes_inetaddr_event
130 */ 138 */
@@ -313,9 +321,6 @@ void nes_rem_ref(struct ib_qp *ibqp)
313 } 321 }
314 322
315 if (atomic_dec_and_test(&nesqp->refcount)) { 323 if (atomic_dec_and_test(&nesqp->refcount)) {
316 if (nesqp->pau_mode)
317 nes_destroy_pau_qp(nesdev, nesqp);
318
319 /* Destroy the QP */ 324 /* Destroy the QP */
320 cqp_request = nes_get_cqp_request(nesdev); 325 cqp_request = nes_get_cqp_request(nesdev);
321 if (cqp_request == NULL) { 326 if (cqp_request == NULL) {
@@ -444,7 +449,7 @@ static irqreturn_t nes_interrupt(int irq, void *dev_id)
444/** 449/**
445 * nes_probe - Device initialization 450 * nes_probe - Device initialization
446 */ 451 */
447static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent) 452static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
448{ 453{
449 struct net_device *netdev = NULL; 454 struct net_device *netdev = NULL;
450 struct nes_device *nesdev = NULL; 455 struct nes_device *nesdev = NULL;
@@ -749,7 +754,7 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
749/** 754/**
750 * nes_remove - unload from kernel 755 * nes_remove - unload from kernel
751 */ 756 */
752static void nes_remove(struct pci_dev *pcidev) 757static void __devexit nes_remove(struct pci_dev *pcidev)
753{ 758{
754 struct nes_device *nesdev = pci_get_drvdata(pcidev); 759 struct nes_device *nesdev = pci_get_drvdata(pcidev);
755 struct net_device *netdev; 760 struct net_device *netdev;
@@ -810,7 +815,7 @@ static struct pci_driver nes_pci_driver = {
810 .name = DRV_NAME, 815 .name = DRV_NAME,
811 .id_table = nes_pci_table, 816 .id_table = nes_pci_table,
812 .probe = nes_probe, 817 .probe = nes_probe,
813 .remove = nes_remove, 818 .remove = __devexit_p(nes_remove),
814}; 819};
815 820
816static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf) 821static ssize_t nes_show_adapter(struct device_driver *ddp, char *buf)
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 33cc58941a3..6fe79876009 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -57,7 +57,7 @@
57#define QUEUE_DISCONNECTS 57#define QUEUE_DISCONNECTS
58 58
59#define DRV_NAME "iw_nes" 59#define DRV_NAME "iw_nes"
60#define DRV_VERSION "1.5.0.1" 60#define DRV_VERSION "1.5.0.0"
61#define PFX DRV_NAME ": " 61#define PFX DRV_NAME ": "
62 62
63/* 63/*
@@ -102,7 +102,6 @@
102#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080 102#define NES_DRV_OPT_NO_INLINE_DATA 0x00000080
103#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100 103#define NES_DRV_OPT_DISABLE_INT_MOD 0x00000100
104#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200 104#define NES_DRV_OPT_DISABLE_VIRT_WQ 0x00000200
105#define NES_DRV_OPT_ENABLE_PAU 0x00000400
106 105
107#define NES_AEQ_EVENT_TIMEOUT 2500 106#define NES_AEQ_EVENT_TIMEOUT 2500
108#define NES_DISCONNECT_EVENT_TIMEOUT 2000 107#define NES_DISCONNECT_EVENT_TIMEOUT 2000
@@ -129,7 +128,6 @@
129#define NES_DBG_IW_RX 0x00020000 128#define NES_DBG_IW_RX 0x00020000
130#define NES_DBG_IW_TX 0x00040000 129#define NES_DBG_IW_TX 0x00040000
131#define NES_DBG_SHUTDOWN 0x00080000 130#define NES_DBG_SHUTDOWN 0x00080000
132#define NES_DBG_PAU 0x00100000
133#define NES_DBG_RSVD1 0x10000000 131#define NES_DBG_RSVD1 0x10000000
134#define NES_DBG_RSVD2 0x20000000 132#define NES_DBG_RSVD2 0x20000000
135#define NES_DBG_RSVD3 0x40000000 133#define NES_DBG_RSVD3 0x40000000
@@ -164,7 +162,6 @@ do { \
164#include "nes_context.h" 162#include "nes_context.h"
165#include "nes_user.h" 163#include "nes_user.h"
166#include "nes_cm.h" 164#include "nes_cm.h"
167#include "nes_mgt.h"
168 165
169extern int max_mtu; 166extern int max_mtu;
170#define max_frame_len (max_mtu+ETH_HLEN) 167#define max_frame_len (max_mtu+ETH_HLEN)
@@ -172,6 +169,7 @@ extern int interrupt_mod_interval;
172extern int nes_if_count; 169extern int nes_if_count;
173extern int mpa_version; 170extern int mpa_version;
174extern int disable_mpa_crc; 171extern int disable_mpa_crc;
172extern unsigned int send_first;
175extern unsigned int nes_drv_opt; 173extern unsigned int nes_drv_opt;
176extern unsigned int nes_debug_level; 174extern unsigned int nes_debug_level;
177extern unsigned int wqm_quanta; 175extern unsigned int wqm_quanta;
@@ -204,8 +202,6 @@ extern atomic_t cm_nodes_created;
204extern atomic_t cm_nodes_destroyed; 202extern atomic_t cm_nodes_destroyed;
205extern atomic_t cm_accel_dropped_pkts; 203extern atomic_t cm_accel_dropped_pkts;
206extern atomic_t cm_resets_recvd; 204extern atomic_t cm_resets_recvd;
207extern atomic_t pau_qps_created;
208extern atomic_t pau_qps_destroyed;
209 205
210extern u32 int_mod_timer_init; 206extern u32 int_mod_timer_init;
211extern u32 int_mod_cq_depth_256; 207extern u32 int_mod_cq_depth_256;
@@ -277,14 +273,6 @@ struct nes_device {
277 u8 link_recheck; 273 u8 link_recheck;
278}; 274};
279 275
280/* Receive skb private area - must fit in skb->cb area */
281struct nes_rskb_cb {
282 u64 busaddr;
283 u32 maplen;
284 u32 seqnum;
285 u8 *data_start;
286 struct nes_qp *nesqp;
287};
288 276
289static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad) 277static inline __le32 get_crc_value(struct nes_v4_quad *nes_quad)
290{ 278{
@@ -317,8 +305,8 @@ set_wqe_32bit_value(__le32 *wqe_words, u32 index, u32 value)
317static inline void 305static inline void
318nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev) 306nes_fill_init_cqp_wqe(struct nes_hw_cqp_wqe *cqp_wqe, struct nes_device *nesdev)
319{ 307{
320 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX] = 0; 308 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_CTX_LOW_IDX,
321 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0; 309 (u64)((unsigned long) &nesdev->cqp));
322 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0; 310 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
323 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0; 311 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
324 cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0; 312 cqp_wqe->wqe_words[NES_CQP_STAG_WQE_PBL_BLK_COUNT_IDX] = 0;
@@ -398,20 +386,11 @@ static inline void nes_write8(void __iomem *addr, u8 val)
398 writeb(val, addr); 386 writeb(val, addr);
399} 387}
400 388
401enum nes_resource { 389
402 NES_RESOURCE_MW = 1,
403 NES_RESOURCE_FAST_MR,
404 NES_RESOURCE_PHYS_MR,
405 NES_RESOURCE_USER_MR,
406 NES_RESOURCE_PD,
407 NES_RESOURCE_QP,
408 NES_RESOURCE_CQ,
409 NES_RESOURCE_ARP
410};
411 390
412static inline int nes_alloc_resource(struct nes_adapter *nesadapter, 391static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
413 unsigned long *resource_array, u32 max_resources, 392 unsigned long *resource_array, u32 max_resources,
414 u32 *req_resource_num, u32 *next, enum nes_resource resource_type) 393 u32 *req_resource_num, u32 *next)
415{ 394{
416 unsigned long flags; 395 unsigned long flags;
417 u32 resource_num; 396 u32 resource_num;
@@ -422,7 +401,7 @@ static inline int nes_alloc_resource(struct nes_adapter *nesadapter,
422 if (resource_num >= max_resources) { 401 if (resource_num >= max_resources) {
423 resource_num = find_first_zero_bit(resource_array, max_resources); 402 resource_num = find_first_zero_bit(resource_array, max_resources);
424 if (resource_num >= max_resources) { 403 if (resource_num >= max_resources) {
425 printk(KERN_ERR PFX "%s: No available resources [type=%u].\n", __func__, resource_type); 404 printk(KERN_ERR PFX "%s: No available resourcess.\n", __func__);
426 spin_unlock_irqrestore(&nesadapter->resource_lock, flags); 405 spin_unlock_irqrestore(&nesadapter->resource_lock, flags);
427 return -EMFILE; 406 return -EMFILE;
428 } 407 }
@@ -532,7 +511,6 @@ void nes_iwarp_ce_handler(struct nes_device *, struct nes_hw_cq *);
532int nes_destroy_cqp(struct nes_device *); 511int nes_destroy_cqp(struct nes_device *);
533int nes_nic_cm_xmit(struct sk_buff *, struct net_device *); 512int nes_nic_cm_xmit(struct sk_buff *, struct net_device *);
534void nes_recheck_link_status(struct work_struct *work); 513void nes_recheck_link_status(struct work_struct *work);
535void nes_terminate_timeout(unsigned long context);
536 514
537/* nes_nic.c */ 515/* nes_nic.c */
538struct net_device *nes_netdev_init(struct nes_device *, void __iomem *); 516struct net_device *nes_netdev_init(struct nes_device *, void __iomem *);
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 22ea67eea5d..a237547330b 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -77,19 +77,26 @@ atomic_t cm_nodes_destroyed;
77atomic_t cm_accel_dropped_pkts; 77atomic_t cm_accel_dropped_pkts;
78atomic_t cm_resets_recvd; 78atomic_t cm_resets_recvd;
79 79
80static inline int mini_cm_accelerated(struct nes_cm_core *, struct nes_cm_node *); 80static inline int mini_cm_accelerated(struct nes_cm_core *,
81static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *, struct nes_vnic *, struct nes_cm_info *); 81 struct nes_cm_node *);
82static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *,
83 struct nes_vnic *, struct nes_cm_info *);
82static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *); 84static int mini_cm_del_listen(struct nes_cm_core *, struct nes_cm_listener *);
83static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *, struct nes_vnic *, u16, void *, struct nes_cm_info *); 85static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *,
86 struct nes_vnic *, u16, void *, struct nes_cm_info *);
84static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *); 87static int mini_cm_close(struct nes_cm_core *, struct nes_cm_node *);
85static int mini_cm_accept(struct nes_cm_core *, struct nes_cm_node *); 88static int mini_cm_accept(struct nes_cm_core *, struct ietf_mpa_frame *,
86static int mini_cm_reject(struct nes_cm_core *, struct nes_cm_node *); 89 struct nes_cm_node *);
87static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *, struct sk_buff *); 90static int mini_cm_reject(struct nes_cm_core *, struct ietf_mpa_frame *,
91 struct nes_cm_node *);
92static int mini_cm_recv_pkt(struct nes_cm_core *, struct nes_vnic *,
93 struct sk_buff *);
88static int mini_cm_dealloc_core(struct nes_cm_core *); 94static int mini_cm_dealloc_core(struct nes_cm_core *);
89static int mini_cm_get(struct nes_cm_core *); 95static int mini_cm_get(struct nes_cm_core *);
90static int mini_cm_set(struct nes_cm_core *, u32, u32); 96static int mini_cm_set(struct nes_cm_core *, u32, u32);
91 97
92static void form_cm_frame(struct sk_buff *, struct nes_cm_node *, void *, u32, void *, u32, u8); 98static void form_cm_frame(struct sk_buff *, struct nes_cm_node *,
99 void *, u32, void *, u32, u8);
93static int add_ref_cm_node(struct nes_cm_node *); 100static int add_ref_cm_node(struct nes_cm_node *);
94static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *); 101static int rem_ref_cm_node(struct nes_cm_core *, struct nes_cm_node *);
95 102
@@ -104,14 +111,16 @@ static int send_syn(struct nes_cm_node *, u32, struct sk_buff *);
104static int send_reset(struct nes_cm_node *, struct sk_buff *); 111static int send_reset(struct nes_cm_node *, struct sk_buff *);
105static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb); 112static int send_ack(struct nes_cm_node *cm_node, struct sk_buff *skb);
106static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb); 113static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb);
107static void process_packet(struct nes_cm_node *, struct sk_buff *, struct nes_cm_core *); 114static void process_packet(struct nes_cm_node *, struct sk_buff *,
115 struct nes_cm_core *);
108 116
109static void active_open_err(struct nes_cm_node *, struct sk_buff *, int); 117static void active_open_err(struct nes_cm_node *, struct sk_buff *, int);
110static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int); 118static void passive_open_err(struct nes_cm_node *, struct sk_buff *, int);
111static void cleanup_retrans_entry(struct nes_cm_node *); 119static void cleanup_retrans_entry(struct nes_cm_node *);
112static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *); 120static void handle_rcv_mpa(struct nes_cm_node *, struct sk_buff *);
113static void free_retrans_entry(struct nes_cm_node *cm_node); 121static void free_retrans_entry(struct nes_cm_node *cm_node);
114static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, struct sk_buff *skb, int optionsize, int passive); 122static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
123 struct sk_buff *skb, int optionsize, int passive);
115 124
116/* CM event handler functions */ 125/* CM event handler functions */
117static void cm_event_connected(struct nes_cm_event *); 126static void cm_event_connected(struct nes_cm_event *);
@@ -121,12 +130,6 @@ static void cm_event_mpa_req(struct nes_cm_event *);
121static void cm_event_mpa_reject(struct nes_cm_event *); 130static void cm_event_mpa_reject(struct nes_cm_event *);
122static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node); 131static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node);
123 132
124/* MPA build functions */
125static int cm_build_mpa_frame(struct nes_cm_node *, u8 **, u16 *, u8 *, u8);
126static void build_mpa_v2(struct nes_cm_node *, void *, u8);
127static void build_mpa_v1(struct nes_cm_node *, void *, u8);
128static void build_rdma0_msg(struct nes_cm_node *, struct nes_qp **);
129
130static void print_core(struct nes_cm_core *core); 133static void print_core(struct nes_cm_core *core);
131 134
132/* External CM API Interface */ 135/* External CM API Interface */
@@ -156,21 +159,12 @@ atomic_t cm_connecteds;
156atomic_t cm_connect_reqs; 159atomic_t cm_connect_reqs;
157atomic_t cm_rejects; 160atomic_t cm_rejects;
158 161
159int nes_add_ref_cm_node(struct nes_cm_node *cm_node)
160{
161 return add_ref_cm_node(cm_node);
162}
163
164int nes_rem_ref_cm_node(struct nes_cm_node *cm_node)
165{
166 return rem_ref_cm_node(cm_node->cm_core, cm_node);
167}
168 162
169/** 163/**
170 * create_event 164 * create_event
171 */ 165 */
172static struct nes_cm_event *create_event(struct nes_cm_node * cm_node, 166static struct nes_cm_event *create_event(struct nes_cm_node *cm_node,
173 enum nes_cm_event_type type) 167 enum nes_cm_event_type type)
174{ 168{
175 struct nes_cm_event *event; 169 struct nes_cm_event *event;
176 170
@@ -192,10 +186,10 @@ static struct nes_cm_event *create_event(struct nes_cm_node * cm_node,
192 event->cm_info.cm_id = cm_node->cm_id; 186 event->cm_info.cm_id = cm_node->cm_id;
193 187
194 nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, " 188 nes_debug(NES_DBG_CM, "cm_node=%p Created event=%p, type=%u, "
195 "dst_addr=%08x[%x], src_addr=%08x[%x]\n", 189 "dst_addr=%08x[%x], src_addr=%08x[%x]\n",
196 cm_node, event, type, event->cm_info.loc_addr, 190 cm_node, event, type, event->cm_info.loc_addr,
197 event->cm_info.loc_port, event->cm_info.rem_addr, 191 event->cm_info.loc_port, event->cm_info.rem_addr,
198 event->cm_info.rem_port); 192 event->cm_info.rem_port);
199 193
200 nes_cm_post_event(event); 194 nes_cm_post_event(event);
201 return event; 195 return event;
@@ -207,19 +201,14 @@ static struct nes_cm_event *create_event(struct nes_cm_node * cm_node,
207 */ 201 */
208static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb) 202static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
209{ 203{
210 u8 start_addr = 0;
211 u8 *start_ptr = &start_addr;
212 u8 **start_buff = &start_ptr;
213 u16 buff_len = 0;
214
215 if (!skb) { 204 if (!skb) {
216 nes_debug(NES_DBG_CM, "skb set to NULL\n"); 205 nes_debug(NES_DBG_CM, "skb set to NULL\n");
217 return -1; 206 return -1;
218 } 207 }
219 208
220 /* send an MPA Request frame */ 209 /* send an MPA Request frame */
221 cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REQUEST); 210 form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
222 form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK); 211 cm_node->mpa_frame_size, SET_ACK);
223 212
224 return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); 213 return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
225} 214}
@@ -228,12 +217,7 @@ static int send_mpa_request(struct nes_cm_node *cm_node, struct sk_buff *skb)
228 217
229static int send_mpa_reject(struct nes_cm_node *cm_node) 218static int send_mpa_reject(struct nes_cm_node *cm_node)
230{ 219{
231 struct sk_buff *skb = NULL; 220 struct sk_buff *skb = NULL;
232 u8 start_addr = 0;
233 u8 *start_ptr = &start_addr;
234 u8 **start_buff = &start_ptr;
235 u16 buff_len = 0;
236 struct ietf_mpa_v1 *mpa_frame;
237 221
238 skb = dev_alloc_skb(MAX_CM_BUFFER); 222 skb = dev_alloc_skb(MAX_CM_BUFFER);
239 if (!skb) { 223 if (!skb) {
@@ -242,10 +226,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node)
242 } 226 }
243 227
244 /* send an MPA reject frame */ 228 /* send an MPA reject frame */
245 cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); 229 form_cm_frame(skb, cm_node, NULL, 0, &cm_node->mpa_frame,
246 mpa_frame = (struct ietf_mpa_v1 *)*start_buff; 230 cm_node->mpa_frame_size, SET_ACK | SET_FIN);
247 mpa_frame->flags |= IETF_MPA_FLAGS_REJECT;
248 form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN);
249 231
250 cm_node->state = NES_CM_STATE_FIN_WAIT1; 232 cm_node->state = NES_CM_STATE_FIN_WAIT1;
251 return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0); 233 return schedule_nes_timer(cm_node, skb, NES_TIMER_TYPE_SEND, 1, 0);
@@ -257,31 +239,24 @@ static int send_mpa_reject(struct nes_cm_node *cm_node)
257 * IETF MPA frame 239 * IETF MPA frame
258 */ 240 */
259static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, 241static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
260 u32 len) 242 u32 len)
261{ 243{
262 struct ietf_mpa_v1 *mpa_frame; 244 struct ietf_mpa_frame *mpa_frame;
263 struct ietf_mpa_v2 *mpa_v2_frame;
264 struct ietf_rtr_msg *rtr_msg;
265 int mpa_hdr_len;
266 int priv_data_len;
267 245
268 *type = NES_MPA_REQUEST_ACCEPT; 246 *type = NES_MPA_REQUEST_ACCEPT;
269 247
270 /* assume req frame is in tcp data payload */ 248 /* assume req frame is in tcp data payload */
271 if (len < sizeof(struct ietf_mpa_v1)) { 249 if (len < sizeof(struct ietf_mpa_frame)) {
272 nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len); 250 nes_debug(NES_DBG_CM, "The received ietf buffer was too small (%x)\n", len);
273 return -EINVAL; 251 return -EINVAL;
274 } 252 }
275 253
276 /* points to the beginning of the frame, which could be MPA V1 or V2 */ 254 mpa_frame = (struct ietf_mpa_frame *)buffer;
277 mpa_frame = (struct ietf_mpa_v1 *)buffer; 255 cm_node->mpa_frame_size = ntohs(mpa_frame->priv_data_len);
278 mpa_hdr_len = sizeof(struct ietf_mpa_v1);
279 priv_data_len = ntohs(mpa_frame->priv_data_len);
280
281 /* make sure mpa private data len is less than 512 bytes */ 256 /* make sure mpa private data len is less than 512 bytes */
282 if (priv_data_len > IETF_MAX_PRIV_DATA_LEN) { 257 if (cm_node->mpa_frame_size > IETF_MAX_PRIV_DATA_LEN) {
283 nes_debug(NES_DBG_CM, "The received Length of Private" 258 nes_debug(NES_DBG_CM, "The received Length of Private"
284 " Data field exceeds 512 octets\n"); 259 " Data field exceeds 512 octets\n");
285 return -EINVAL; 260 return -EINVAL;
286 } 261 }
287 /* 262 /*
@@ -289,22 +264,11 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
289 * received MPA version and MPA key information 264 * received MPA version and MPA key information
290 * 265 *
291 */ 266 */
292 if (mpa_frame->rev != IETF_MPA_V1 && mpa_frame->rev != IETF_MPA_V2) { 267 if (mpa_frame->rev != mpa_version) {
293 nes_debug(NES_DBG_CM, "The received mpa version" 268 nes_debug(NES_DBG_CM, "The received mpa version"
294 " is not supported\n"); 269 " can not be interoperated\n");
295 return -EINVAL; 270 return -EINVAL;
296 } 271 }
297 /*
298 * backwards compatibility only
299 */
300 if (mpa_frame->rev > cm_node->mpa_frame_rev) {
301 nes_debug(NES_DBG_CM, "The received mpa version"
302 " can not be interoperated\n");
303 return -EINVAL;
304 } else {
305 cm_node->mpa_frame_rev = mpa_frame->rev;
306 }
307
308 if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) { 272 if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
309 if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) { 273 if (memcmp(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE)) {
310 nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n"); 274 nes_debug(NES_DBG_CM, "Unexpected MPA Key received \n");
@@ -317,78 +281,25 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
317 } 281 }
318 } 282 }
319 283
320 284 if (cm_node->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
321 if (priv_data_len + mpa_hdr_len != len) {
322 nes_debug(NES_DBG_CM, "The received ietf buffer was not right" 285 nes_debug(NES_DBG_CM, "The received ietf buffer was not right"
323 " complete (%x + %x != %x)\n", 286 " complete (%x + %x != %x)\n",
324 priv_data_len, mpa_hdr_len, len); 287 cm_node->mpa_frame_size,
288 (u32)sizeof(struct ietf_mpa_frame), len);
325 return -EINVAL; 289 return -EINVAL;
326 } 290 }
327 /* make sure it does not exceed the max size */ 291 /* make sure it does not exceed the max size */
328 if (len > MAX_CM_BUFFER) { 292 if (len > MAX_CM_BUFFER) {
329 nes_debug(NES_DBG_CM, "The received ietf buffer was too large" 293 nes_debug(NES_DBG_CM, "The received ietf buffer was too large"
330 " (%x + %x != %x)\n", 294 " (%x + %x != %x)\n",
331 priv_data_len, mpa_hdr_len, len); 295 cm_node->mpa_frame_size,
296 (u32)sizeof(struct ietf_mpa_frame), len);
332 return -EINVAL; 297 return -EINVAL;
333 } 298 }
334 299
335 cm_node->mpa_frame_size = priv_data_len;
336
337 switch (mpa_frame->rev) {
338 case IETF_MPA_V2: {
339 u16 ird_size;
340 u16 ord_size;
341 u16 rtr_ctrl_ird;
342 u16 rtr_ctrl_ord;
343
344 mpa_v2_frame = (struct ietf_mpa_v2 *)buffer;
345 mpa_hdr_len += IETF_RTR_MSG_SIZE;
346 cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE;
347 rtr_msg = &mpa_v2_frame->rtr_msg;
348
349 /* parse rtr message */
350 rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird);
351 rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord);
352 ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD;
353 ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD;
354
355 if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) {
356 /* send reset */
357 return -EINVAL;
358 }
359
360 if (cm_node->state != NES_CM_STATE_MPAREQ_SENT) {
361 /* responder */
362 if (cm_node->ord_size > ird_size)
363 cm_node->ord_size = ird_size;
364 } else {
365 /* initiator */
366 if (cm_node->ord_size > ird_size)
367 cm_node->ord_size = ird_size;
368
369 if (cm_node->ird_size < ord_size) {
370 /* no resources available */
371 /* send terminate message */
372 return -EINVAL;
373 }
374 }
375
376 if (rtr_ctrl_ord & IETF_RDMA0_READ) {
377 cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
378 } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) {
379 cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO;
380 } else { /* Not supported RDMA0 operation */
381 return -EINVAL;
382 }
383 break;
384 }
385 case IETF_MPA_V1:
386 default:
387 break;
388 }
389
390 /* copy entire MPA frame to our cm_node's frame */ 300 /* copy entire MPA frame to our cm_node's frame */
391 memcpy(cm_node->mpa_frame_buf, buffer + mpa_hdr_len, cm_node->mpa_frame_size); 301 memcpy(cm_node->mpa_frame_buf, buffer + sizeof(struct ietf_mpa_frame),
302 cm_node->mpa_frame_size);
392 303
393 if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT) 304 if (mpa_frame->flags & IETF_MPA_FLAGS_REJECT)
394 *type = NES_MPA_REQUEST_REJECT; 305 *type = NES_MPA_REQUEST_REJECT;
@@ -401,8 +312,8 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type,
401 * node info to build. 312 * node info to build.
402 */ 313 */
403static void form_cm_frame(struct sk_buff *skb, 314static void form_cm_frame(struct sk_buff *skb,
404 struct nes_cm_node *cm_node, void *options, u32 optionsize, 315 struct nes_cm_node *cm_node, void *options, u32 optionsize,
405 void *data, u32 datasize, u8 flags) 316 void *data, u32 datasize, u8 flags)
406{ 317{
407 struct tcphdr *tcph; 318 struct tcphdr *tcph;
408 struct iphdr *iph; 319 struct iphdr *iph;
@@ -411,14 +322,14 @@ static void form_cm_frame(struct sk_buff *skb,
411 u16 packetsize = sizeof(*iph); 322 u16 packetsize = sizeof(*iph);
412 323
413 packetsize += sizeof(*tcph); 324 packetsize += sizeof(*tcph);
414 packetsize += optionsize + datasize; 325 packetsize += optionsize + datasize;
415 326
416 skb_trim(skb, 0);
417 memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph)); 327 memset(skb->data, 0x00, ETH_HLEN + sizeof(*iph) + sizeof(*tcph));
418 328
329 skb->len = 0;
419 buf = skb_put(skb, packetsize + ETH_HLEN); 330 buf = skb_put(skb, packetsize + ETH_HLEN);
420 331
421 ethh = (struct ethhdr *)buf; 332 ethh = (struct ethhdr *) buf;
422 buf += ETH_HLEN; 333 buf += ETH_HLEN;
423 334
424 iph = (struct iphdr *)buf; 335 iph = (struct iphdr *)buf;
@@ -426,12 +337,10 @@ static void form_cm_frame(struct sk_buff *skb,
426 tcph = (struct tcphdr *)buf; 337 tcph = (struct tcphdr *)buf;
427 skb_reset_mac_header(skb); 338 skb_reset_mac_header(skb);
428 skb_set_network_header(skb, ETH_HLEN); 339 skb_set_network_header(skb, ETH_HLEN);
429 skb_set_transport_header(skb, ETH_HLEN + sizeof(*iph)); 340 skb_set_transport_header(skb, ETH_HLEN+sizeof(*iph));
430 buf += sizeof(*tcph); 341 buf += sizeof(*tcph);
431 342
432 skb->ip_summed = CHECKSUM_PARTIAL; 343 skb->ip_summed = CHECKSUM_PARTIAL;
433 if (!(cm_node->netdev->features & NETIF_F_IP_CSUM))
434 skb->ip_summed = CHECKSUM_NONE;
435 skb->protocol = htons(0x800); 344 skb->protocol = htons(0x800);
436 skb->data_len = 0; 345 skb->data_len = 0;
437 skb->mac_len = ETH_HLEN; 346 skb->mac_len = ETH_HLEN;
@@ -441,14 +350,14 @@ static void form_cm_frame(struct sk_buff *skb,
441 ethh->h_proto = htons(0x0800); 350 ethh->h_proto = htons(0x0800);
442 351
443 iph->version = IPVERSION; 352 iph->version = IPVERSION;
444 iph->ihl = 5; /* 5 * 4Byte words, IP headr len */ 353 iph->ihl = 5; /* 5 * 4Byte words, IP headr len */
445 iph->tos = 0; 354 iph->tos = 0;
446 iph->tot_len = htons(packetsize); 355 iph->tot_len = htons(packetsize);
447 iph->id = htons(++cm_node->tcp_cntxt.loc_id); 356 iph->id = htons(++cm_node->tcp_cntxt.loc_id);
448 357
449 iph->frag_off = htons(0x4000); 358 iph->frag_off = htons(0x4000);
450 iph->ttl = 0x40; 359 iph->ttl = 0x40;
451 iph->protocol = 0x06; /* IPPROTO_TCP */ 360 iph->protocol = 0x06; /* IPPROTO_TCP */
452 361
453 iph->saddr = htonl(cm_node->loc_addr); 362 iph->saddr = htonl(cm_node->loc_addr);
454 iph->daddr = htonl(cm_node->rem_addr); 363 iph->daddr = htonl(cm_node->rem_addr);
@@ -461,16 +370,14 @@ static void form_cm_frame(struct sk_buff *skb,
461 cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt; 370 cm_node->tcp_cntxt.loc_ack_num = cm_node->tcp_cntxt.rcv_nxt;
462 tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num); 371 tcph->ack_seq = htonl(cm_node->tcp_cntxt.loc_ack_num);
463 tcph->ack = 1; 372 tcph->ack = 1;
464 } else { 373 } else
465 tcph->ack_seq = 0; 374 tcph->ack_seq = 0;
466 }
467 375
468 if (flags & SET_SYN) { 376 if (flags & SET_SYN) {
469 cm_node->tcp_cntxt.loc_seq_num++; 377 cm_node->tcp_cntxt.loc_seq_num++;
470 tcph->syn = 1; 378 tcph->syn = 1;
471 } else { 379 } else
472 cm_node->tcp_cntxt.loc_seq_num += datasize; 380 cm_node->tcp_cntxt.loc_seq_num += datasize;
473 }
474 381
475 if (flags & SET_FIN) { 382 if (flags & SET_FIN) {
476 cm_node->tcp_cntxt.loc_seq_num++; 383 cm_node->tcp_cntxt.loc_seq_num++;
@@ -491,8 +398,10 @@ static void form_cm_frame(struct sk_buff *skb,
491 398
492 skb_shinfo(skb)->nr_frags = 0; 399 skb_shinfo(skb)->nr_frags = 0;
493 cm_packets_created++; 400 cm_packets_created++;
401
494} 402}
495 403
404
496/** 405/**
497 * print_core - dump a cm core 406 * print_core - dump a cm core
498 */ 407 */
@@ -504,7 +413,7 @@ static void print_core(struct nes_cm_core *core)
504 return; 413 return;
505 nes_debug(NES_DBG_CM, "---------------------------------------------\n"); 414 nes_debug(NES_DBG_CM, "---------------------------------------------\n");
506 415
507 nes_debug(NES_DBG_CM, "State : %u \n", core->state); 416 nes_debug(NES_DBG_CM, "State : %u \n", core->state);
508 417
509 nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt)); 418 nes_debug(NES_DBG_CM, "Listen Nodes : %u \n", atomic_read(&core->listen_node_cnt));
510 nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt)); 419 nes_debug(NES_DBG_CM, "Active Nodes : %u \n", atomic_read(&core->node_cnt));
@@ -514,147 +423,6 @@ static void print_core(struct nes_cm_core *core)
514 nes_debug(NES_DBG_CM, "-------------- end core ---------------\n"); 423 nes_debug(NES_DBG_CM, "-------------- end core ---------------\n");
515} 424}
516 425
517/**
518 * cm_build_mpa_frame - build a MPA V1 frame or MPA V2 frame
519 */
520static int cm_build_mpa_frame(struct nes_cm_node *cm_node, u8 **start_buff,
521 u16 *buff_len, u8 *pci_mem, u8 mpa_key)
522{
523 int ret = 0;
524
525 *start_buff = (pci_mem) ? pci_mem : &cm_node->mpa_frame_buf[0];
526
527 switch (cm_node->mpa_frame_rev) {
528 case IETF_MPA_V1:
529 *start_buff = (u8 *)*start_buff + sizeof(struct ietf_rtr_msg);
530 *buff_len = sizeof(struct ietf_mpa_v1) + cm_node->mpa_frame_size;
531 build_mpa_v1(cm_node, *start_buff, mpa_key);
532 break;
533 case IETF_MPA_V2:
534 *buff_len = sizeof(struct ietf_mpa_v2) + cm_node->mpa_frame_size;
535 build_mpa_v2(cm_node, *start_buff, mpa_key);
536 break;
537 default:
538 ret = -EINVAL;
539 }
540 return ret;
541}
542
543/**
544 * build_mpa_v2 - build a MPA V2 frame
545 */
546static void build_mpa_v2(struct nes_cm_node *cm_node,
547 void *start_addr, u8 mpa_key)
548{
549 struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr;
550 struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg;
551 u16 ctrl_ird;
552 u16 ctrl_ord;
553
554 /* initialize the upper 5 bytes of the frame */
555 build_mpa_v1(cm_node, start_addr, mpa_key);
556 mpa_frame->flags |= IETF_MPA_V2_FLAG; /* set a bit to indicate MPA V2 */
557 mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE);
558
559 /* initialize RTR msg */
560 ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ?
561 IETF_NO_IRD_ORD : cm_node->ird_size;
562 ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ?
563 IETF_NO_IRD_ORD : cm_node->ord_size;
564
565 ctrl_ird |= IETF_PEER_TO_PEER;
566 ctrl_ird |= IETF_FLPDU_ZERO_LEN;
567
568 switch (mpa_key) {
569 case MPA_KEY_REQUEST:
570 ctrl_ord |= IETF_RDMA0_WRITE;
571 ctrl_ord |= IETF_RDMA0_READ;
572 break;
573 case MPA_KEY_REPLY:
574 switch (cm_node->send_rdma0_op) {
575 case SEND_RDMA_WRITE_ZERO:
576 ctrl_ord |= IETF_RDMA0_WRITE;
577 break;
578 case SEND_RDMA_READ_ZERO:
579 ctrl_ord |= IETF_RDMA0_READ;
580 break;
581 }
582 }
583 rtr_msg->ctrl_ird = htons(ctrl_ird);
584 rtr_msg->ctrl_ord = htons(ctrl_ord);
585}
586
587/**
588 * build_mpa_v1 - build a MPA V1 frame
589 */
590static void build_mpa_v1(struct nes_cm_node *cm_node, void *start_addr, u8 mpa_key)
591{
592 struct ietf_mpa_v1 *mpa_frame = (struct ietf_mpa_v1 *)start_addr;
593
594 switch (mpa_key) {
595 case MPA_KEY_REQUEST:
596 memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
597 break;
598 case MPA_KEY_REPLY:
599 memcpy(mpa_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
600 break;
601 }
602 mpa_frame->flags = IETF_MPA_FLAGS_CRC;
603 mpa_frame->rev = cm_node->mpa_frame_rev;
604 mpa_frame->priv_data_len = htons(cm_node->mpa_frame_size);
605}
606
607static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_addr)
608{
609 u64 u64temp;
610 struct nes_qp *nesqp = *nesqp_addr;
611 struct nes_hw_qp_wqe *wqe = &nesqp->hwqp.sq_vbase[0];
612
613 u64temp = (unsigned long)nesqp;
614 u64temp |= NES_SW_CONTEXT_ALIGN >> 1;
615 set_wqe_64bit_value(wqe->wqe_words, NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
616
617 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
618 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
619
620 switch (cm_node->send_rdma0_op) {
621 case SEND_RDMA_WRITE_ZERO:
622 nes_debug(NES_DBG_CM, "Sending first write.\n");
623 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
624 cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
625 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
626 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
627 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
628 break;
629
630 case SEND_RDMA_READ_ZERO:
631 default:
632 if (cm_node->send_rdma0_op != SEND_RDMA_READ_ZERO)
633 WARN(1, "Unsupported RDMA0 len operation=%u\n",
634 cm_node->send_rdma0_op);
635 nes_debug(NES_DBG_CM, "Sending first rdma operation.\n");
636 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
637 cpu_to_le32(NES_IWARP_SQ_OP_RDMAR);
638 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_LOW_IDX] = 1;
639 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_TO_HIGH_IDX] = 0;
640 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_LENGTH_IDX] = 0;
641 wqe->wqe_words[NES_IWARP_SQ_WQE_RDMA_STAG_IDX] = 1;
642 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 1;
643 break;
644 }
645
646 if (nesqp->sq_kmapped) {
647 nesqp->sq_kmapped = 0;
648 kunmap(nesqp->page);
649 }
650
651 /*use the reserved spot on the WQ for the extra first WQE*/
652 nesqp->nesqp_context->ird_ord_sizes &= cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
653 NES_QPCONTEXT_ORDIRD_WRPDU |
654 NES_QPCONTEXT_ORDIRD_ALSMM));
655 nesqp->skip_lsmm = 1;
656 nesqp->hwqp.sq_tail = 0;
657}
658 426
659/** 427/**
660 * schedule_nes_timer 428 * schedule_nes_timer
@@ -662,13 +430,14 @@ static void build_rdma0_msg(struct nes_cm_node *cm_node, struct nes_qp **nesqp_a
662 * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node); 430 * rem_ref_cm_node(cm_core, cm_node);add_ref_cm_node(cm_node);
663 */ 431 */
664int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb, 432int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
665 enum nes_timer_type type, int send_retrans, 433 enum nes_timer_type type, int send_retrans,
666 int close_when_complete) 434 int close_when_complete)
667{ 435{
668 unsigned long flags; 436 unsigned long flags;
669 struct nes_cm_core *cm_core = cm_node->cm_core; 437 struct nes_cm_core *cm_core = cm_node->cm_core;
670 struct nes_timer_entry *new_send; 438 struct nes_timer_entry *new_send;
671 int ret = 0; 439 int ret = 0;
440 u32 was_timer_set;
672 441
673 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC); 442 new_send = kzalloc(sizeof(*new_send), GFP_ATOMIC);
674 if (!new_send) 443 if (!new_send)
@@ -685,7 +454,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
685 new_send->close_when_complete = close_when_complete; 454 new_send->close_when_complete = close_when_complete;
686 455
687 if (type == NES_TIMER_TYPE_CLOSE) { 456 if (type == NES_TIMER_TYPE_CLOSE) {
688 new_send->timetosend += (HZ / 10); 457 new_send->timetosend += (HZ/10);
689 if (cm_node->recv_entry) { 458 if (cm_node->recv_entry) {
690 kfree(new_send); 459 kfree(new_send);
691 WARN_ON(1); 460 WARN_ON(1);
@@ -706,7 +475,7 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
706 ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev); 475 ret = nes_nic_cm_xmit(new_send->skb, cm_node->netdev);
707 if (ret != NETDEV_TX_OK) { 476 if (ret != NETDEV_TX_OK) {
708 nes_debug(NES_DBG_CM, "Error sending packet %p " 477 nes_debug(NES_DBG_CM, "Error sending packet %p "
709 "(jiffies = %lu)\n", new_send, jiffies); 478 "(jiffies = %lu)\n", new_send, jiffies);
710 new_send->timetosend = jiffies; 479 new_send->timetosend = jiffies;
711 ret = NETDEV_TX_OK; 480 ret = NETDEV_TX_OK;
712 } else { 481 } else {
@@ -720,8 +489,12 @@ int schedule_nes_timer(struct nes_cm_node *cm_node, struct sk_buff *skb,
720 } 489 }
721 } 490 }
722 491
723 if (!timer_pending(&cm_core->tcp_timer)) 492 was_timer_set = timer_pending(&cm_core->tcp_timer);
724 mod_timer(&cm_core->tcp_timer, new_send->timetosend); 493
494 if (!was_timer_set) {
495 cm_core->tcp_timer.expires = new_send->timetosend;
496 add_timer(&cm_core->tcp_timer);
497 }
725 498
726 return ret; 499 return ret;
727} 500}
@@ -731,7 +504,6 @@ static void nes_retrans_expired(struct nes_cm_node *cm_node)
731 struct iw_cm_id *cm_id = cm_node->cm_id; 504 struct iw_cm_id *cm_id = cm_node->cm_id;
732 enum nes_cm_node_state state = cm_node->state; 505 enum nes_cm_node_state state = cm_node->state;
733 cm_node->state = NES_CM_STATE_CLOSED; 506 cm_node->state = NES_CM_STATE_CLOSED;
734
735 switch (state) { 507 switch (state) {
736 case NES_CM_STATE_SYN_RCVD: 508 case NES_CM_STATE_SYN_RCVD:
737 case NES_CM_STATE_CLOSING: 509 case NES_CM_STATE_CLOSING:
@@ -764,10 +536,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
764 spin_lock_irqsave(&nesqp->lock, qplockflags); 536 spin_lock_irqsave(&nesqp->lock, qplockflags);
765 if (nesqp->cm_id) { 537 if (nesqp->cm_id) {
766 nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " 538 nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
767 "refcount = %d: HIT A " 539 "refcount = %d: HIT A "
768 "NES_TIMER_TYPE_CLOSE with something " 540 "NES_TIMER_TYPE_CLOSE with something "
769 "to do!!!\n", nesqp->hwqp.qp_id, cm_id, 541 "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
770 atomic_read(&nesqp->refcount)); 542 atomic_read(&nesqp->refcount));
771 nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED; 543 nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
772 nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT; 544 nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
773 nesqp->ibqp_state = IB_QPS_ERR; 545 nesqp->ibqp_state = IB_QPS_ERR;
@@ -776,10 +548,10 @@ static void handle_recv_entry(struct nes_cm_node *cm_node, u32 rem_node)
776 } else { 548 } else {
777 spin_unlock_irqrestore(&nesqp->lock, qplockflags); 549 spin_unlock_irqrestore(&nesqp->lock, qplockflags);
778 nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, " 550 nes_debug(NES_DBG_CM, "QP%u: cm_id = %p, "
779 "refcount = %d: HIT A " 551 "refcount = %d: HIT A "
780 "NES_TIMER_TYPE_CLOSE with nothing " 552 "NES_TIMER_TYPE_CLOSE with nothing "
781 "to do!!!\n", nesqp->hwqp.qp_id, cm_id, 553 "to do!!!\n", nesqp->hwqp.qp_id, cm_id,
782 atomic_read(&nesqp->refcount)); 554 atomic_read(&nesqp->refcount));
783 } 555 }
784 } else if (rem_node) { 556 } else if (rem_node) {
785 /* TIME_WAIT state */ 557 /* TIME_WAIT state */
@@ -808,12 +580,11 @@ static void nes_cm_timer_tick(unsigned long pass)
808 int ret = NETDEV_TX_OK; 580 int ret = NETDEV_TX_OK;
809 581
810 struct list_head timer_list; 582 struct list_head timer_list;
811
812 INIT_LIST_HEAD(&timer_list); 583 INIT_LIST_HEAD(&timer_list);
813 spin_lock_irqsave(&cm_core->ht_lock, flags); 584 spin_lock_irqsave(&cm_core->ht_lock, flags);
814 585
815 list_for_each_safe(list_node, list_core_temp, 586 list_for_each_safe(list_node, list_core_temp,
816 &cm_core->connected_nodes) { 587 &cm_core->connected_nodes) {
817 cm_node = container_of(list_node, struct nes_cm_node, list); 588 cm_node = container_of(list_node, struct nes_cm_node, list);
818 if ((cm_node->recv_entry) || (cm_node->send_entry)) { 589 if ((cm_node->recv_entry) || (cm_node->send_entry)) {
819 add_ref_cm_node(cm_node); 590 add_ref_cm_node(cm_node);
@@ -824,19 +595,18 @@ static void nes_cm_timer_tick(unsigned long pass)
824 595
825 list_for_each_safe(list_node, list_core_temp, &timer_list) { 596 list_for_each_safe(list_node, list_core_temp, &timer_list) {
826 cm_node = container_of(list_node, struct nes_cm_node, 597 cm_node = container_of(list_node, struct nes_cm_node,
827 timer_entry); 598 timer_entry);
828 recv_entry = cm_node->recv_entry; 599 recv_entry = cm_node->recv_entry;
829 600
830 if (recv_entry) { 601 if (recv_entry) {
831 if (time_after(recv_entry->timetosend, jiffies)) { 602 if (time_after(recv_entry->timetosend, jiffies)) {
832 if (nexttimeout > recv_entry->timetosend || 603 if (nexttimeout > recv_entry->timetosend ||
833 !settimer) { 604 !settimer) {
834 nexttimeout = recv_entry->timetosend; 605 nexttimeout = recv_entry->timetosend;
835 settimer = 1; 606 settimer = 1;
836 } 607 }
837 } else { 608 } else
838 handle_recv_entry(cm_node, 1); 609 handle_recv_entry(cm_node, 1);
839 }
840 } 610 }
841 611
842 spin_lock_irqsave(&cm_node->retrans_list_lock, flags); 612 spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
@@ -847,8 +617,8 @@ static void nes_cm_timer_tick(unsigned long pass)
847 if (time_after(send_entry->timetosend, jiffies)) { 617 if (time_after(send_entry->timetosend, jiffies)) {
848 if (cm_node->state != NES_CM_STATE_TSA) { 618 if (cm_node->state != NES_CM_STATE_TSA) {
849 if ((nexttimeout > 619 if ((nexttimeout >
850 send_entry->timetosend) || 620 send_entry->timetosend) ||
851 !settimer) { 621 !settimer) {
852 nexttimeout = 622 nexttimeout =
853 send_entry->timetosend; 623 send_entry->timetosend;
854 settimer = 1; 624 settimer = 1;
@@ -860,13 +630,13 @@ static void nes_cm_timer_tick(unsigned long pass)
860 } 630 }
861 631
862 if ((cm_node->state == NES_CM_STATE_TSA) || 632 if ((cm_node->state == NES_CM_STATE_TSA) ||
863 (cm_node->state == NES_CM_STATE_CLOSED)) { 633 (cm_node->state == NES_CM_STATE_CLOSED)) {
864 free_retrans_entry(cm_node); 634 free_retrans_entry(cm_node);
865 break; 635 break;
866 } 636 }
867 637
868 if (!send_entry->retranscount || 638 if (!send_entry->retranscount ||
869 !send_entry->retrycount) { 639 !send_entry->retrycount) {
870 cm_packets_dropped++; 640 cm_packets_dropped++;
871 free_retrans_entry(cm_node); 641 free_retrans_entry(cm_node);
872 642
@@ -875,28 +645,28 @@ static void nes_cm_timer_tick(unsigned long pass)
875 nes_retrans_expired(cm_node); 645 nes_retrans_expired(cm_node);
876 cm_node->state = NES_CM_STATE_CLOSED; 646 cm_node->state = NES_CM_STATE_CLOSED;
877 spin_lock_irqsave(&cm_node->retrans_list_lock, 647 spin_lock_irqsave(&cm_node->retrans_list_lock,
878 flags); 648 flags);
879 break; 649 break;
880 } 650 }
881 atomic_inc(&send_entry->skb->users); 651 atomic_inc(&send_entry->skb->users);
882 cm_packets_retrans++; 652 cm_packets_retrans++;
883 nes_debug(NES_DBG_CM, "Retransmitting send_entry %p " 653 nes_debug(NES_DBG_CM, "Retransmitting send_entry %p "
884 "for node %p, jiffies = %lu, time to send = " 654 "for node %p, jiffies = %lu, time to send = "
885 "%lu, retranscount = %u, send_entry->seq_num = " 655 "%lu, retranscount = %u, send_entry->seq_num = "
886 "0x%08X, cm_node->tcp_cntxt.rem_ack_num = " 656 "0x%08X, cm_node->tcp_cntxt.rem_ack_num = "
887 "0x%08X\n", send_entry, cm_node, jiffies, 657 "0x%08X\n", send_entry, cm_node, jiffies,
888 send_entry->timetosend, 658 send_entry->timetosend,
889 send_entry->retranscount, 659 send_entry->retranscount,
890 send_entry->seq_num, 660 send_entry->seq_num,
891 cm_node->tcp_cntxt.rem_ack_num); 661 cm_node->tcp_cntxt.rem_ack_num);
892 662
893 spin_unlock_irqrestore(&cm_node->retrans_list_lock, 663 spin_unlock_irqrestore(&cm_node->retrans_list_lock,
894 flags); 664 flags);
895 ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev); 665 ret = nes_nic_cm_xmit(send_entry->skb, cm_node->netdev);
896 spin_lock_irqsave(&cm_node->retrans_list_lock, flags); 666 spin_lock_irqsave(&cm_node->retrans_list_lock, flags);
897 if (ret != NETDEV_TX_OK) { 667 if (ret != NETDEV_TX_OK) {
898 nes_debug(NES_DBG_CM, "rexmit failed for " 668 nes_debug(NES_DBG_CM, "rexmit failed for "
899 "node=%p\n", cm_node); 669 "node=%p\n", cm_node);
900 cm_packets_bounced++; 670 cm_packets_bounced++;
901 send_entry->retrycount--; 671 send_entry->retrycount--;
902 nexttimeout = jiffies + NES_SHORT_TIME; 672 nexttimeout = jiffies + NES_SHORT_TIME;
@@ -906,18 +676,18 @@ static void nes_cm_timer_tick(unsigned long pass)
906 cm_packets_sent++; 676 cm_packets_sent++;
907 } 677 }
908 nes_debug(NES_DBG_CM, "Packet Sent: retrans count = " 678 nes_debug(NES_DBG_CM, "Packet Sent: retrans count = "
909 "%u, retry count = %u.\n", 679 "%u, retry count = %u.\n",
910 send_entry->retranscount, 680 send_entry->retranscount,
911 send_entry->retrycount); 681 send_entry->retrycount);
912 if (send_entry->send_retrans) { 682 if (send_entry->send_retrans) {
913 send_entry->retranscount--; 683 send_entry->retranscount--;
914 timetosend = (NES_RETRY_TIMEOUT << 684 timetosend = (NES_RETRY_TIMEOUT <<
915 (NES_DEFAULT_RETRANS - send_entry->retranscount)); 685 (NES_DEFAULT_RETRANS - send_entry->retranscount));
916 686
917 send_entry->timetosend = jiffies + 687 send_entry->timetosend = jiffies +
918 min(timetosend, NES_MAX_TIMEOUT); 688 min(timetosend, NES_MAX_TIMEOUT);
919 if (nexttimeout > send_entry->timetosend || 689 if (nexttimeout > send_entry->timetosend ||
920 !settimer) { 690 !settimer) {
921 nexttimeout = send_entry->timetosend; 691 nexttimeout = send_entry->timetosend;
922 settimer = 1; 692 settimer = 1;
923 } 693 }
@@ -926,11 +696,11 @@ static void nes_cm_timer_tick(unsigned long pass)
926 close_when_complete = 696 close_when_complete =
927 send_entry->close_when_complete; 697 send_entry->close_when_complete;
928 nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n", 698 nes_debug(NES_DBG_CM, "cm_node=%p state=%d\n",
929 cm_node, cm_node->state); 699 cm_node, cm_node->state);
930 free_retrans_entry(cm_node); 700 free_retrans_entry(cm_node);
931 if (close_when_complete) 701 if (close_when_complete)
932 rem_ref_cm_node(cm_node->cm_core, 702 rem_ref_cm_node(cm_node->cm_core,
933 cm_node); 703 cm_node);
934 } 704 }
935 } while (0); 705 } while (0);
936 706
@@ -939,8 +709,10 @@ static void nes_cm_timer_tick(unsigned long pass)
939 } 709 }
940 710
941 if (settimer) { 711 if (settimer) {
942 if (!timer_pending(&cm_core->tcp_timer)) 712 if (!timer_pending(&cm_core->tcp_timer)) {
943 mod_timer(&cm_core->tcp_timer, nexttimeout); 713 cm_core->tcp_timer.expires = nexttimeout;
714 add_timer(&cm_core->tcp_timer);
715 }
944 } 716 }
945} 717}
946 718
@@ -949,13 +721,13 @@ static void nes_cm_timer_tick(unsigned long pass)
949 * send_syn 721 * send_syn
950 */ 722 */
951static int send_syn(struct nes_cm_node *cm_node, u32 sendack, 723static int send_syn(struct nes_cm_node *cm_node, u32 sendack,
952 struct sk_buff *skb) 724 struct sk_buff *skb)
953{ 725{
954 int ret; 726 int ret;
955 int flags = SET_SYN; 727 int flags = SET_SYN;
956 char optionsbuffer[sizeof(struct option_mss) + 728 char optionsbuffer[sizeof(struct option_mss) +
957 sizeof(struct option_windowscale) + sizeof(struct option_base) + 729 sizeof(struct option_windowscale) + sizeof(struct option_base) +
958 TCP_OPTIONS_PADDING]; 730 TCP_OPTIONS_PADDING];
959 731
960 int optionssize = 0; 732 int optionssize = 0;
961 /* Sending MSS option */ 733 /* Sending MSS option */
@@ -1082,7 +854,7 @@ static int send_fin(struct nes_cm_node *cm_node, struct sk_buff *skb)
1082 * find_node - find a cm node that matches the reference cm node 854 * find_node - find a cm node that matches the reference cm node
1083 */ 855 */
1084static struct nes_cm_node *find_node(struct nes_cm_core *cm_core, 856static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
1085 u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr) 857 u16 rem_port, nes_addr_t rem_addr, u16 loc_port, nes_addr_t loc_addr)
1086{ 858{
1087 unsigned long flags; 859 unsigned long flags;
1088 struct list_head *hte; 860 struct list_head *hte;
@@ -1096,12 +868,12 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
1096 list_for_each_entry(cm_node, hte, list) { 868 list_for_each_entry(cm_node, hte, list) {
1097 /* compare quad, return node handle if a match */ 869 /* compare quad, return node handle if a match */
1098 nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n", 870 nes_debug(NES_DBG_CM, "finding node %x:%x =? %x:%x ^ %x:%x =? %x:%x\n",
1099 cm_node->loc_addr, cm_node->loc_port, 871 cm_node->loc_addr, cm_node->loc_port,
1100 loc_addr, loc_port, 872 loc_addr, loc_port,
1101 cm_node->rem_addr, cm_node->rem_port, 873 cm_node->rem_addr, cm_node->rem_port,
1102 rem_addr, rem_port); 874 rem_addr, rem_port);
1103 if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) && 875 if ((cm_node->loc_addr == loc_addr) && (cm_node->loc_port == loc_port) &&
1104 (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) { 876 (cm_node->rem_addr == rem_addr) && (cm_node->rem_port == rem_port)) {
1105 add_ref_cm_node(cm_node); 877 add_ref_cm_node(cm_node);
1106 spin_unlock_irqrestore(&cm_core->ht_lock, flags); 878 spin_unlock_irqrestore(&cm_core->ht_lock, flags);
1107 return cm_node; 879 return cm_node;
@@ -1118,7 +890,7 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
1118 * find_listener - find a cm node listening on this addr-port pair 890 * find_listener - find a cm node listening on this addr-port pair
1119 */ 891 */
1120static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core, 892static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
1121 nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state) 893 nes_addr_t dst_addr, u16 dst_port, enum nes_cm_listener_state listener_state)
1122{ 894{
1123 unsigned long flags; 895 unsigned long flags;
1124 struct nes_cm_listener *listen_node; 896 struct nes_cm_listener *listen_node;
@@ -1128,9 +900,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
1128 list_for_each_entry(listen_node, &cm_core->listen_list.list, list) { 900 list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
1129 /* compare node pair, return node handle if a match */ 901 /* compare node pair, return node handle if a match */
1130 if (((listen_node->loc_addr == dst_addr) || 902 if (((listen_node->loc_addr == dst_addr) ||
1131 listen_node->loc_addr == 0x00000000) && 903 listen_node->loc_addr == 0x00000000) &&
1132 (listen_node->loc_port == dst_port) && 904 (listen_node->loc_port == dst_port) &&
1133 (listener_state & listen_node->listener_state)) { 905 (listener_state & listen_node->listener_state)) {
1134 atomic_inc(&listen_node->ref_count); 906 atomic_inc(&listen_node->ref_count);
1135 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 907 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1136 return listen_node; 908 return listen_node;
@@ -1155,7 +927,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
1155 return -EINVAL; 927 return -EINVAL;
1156 928
1157 nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n", 929 nes_debug(NES_DBG_CM, "Adding Node %p to Active Connection HT\n",
1158 cm_node); 930 cm_node);
1159 931
1160 spin_lock_irqsave(&cm_core->ht_lock, flags); 932 spin_lock_irqsave(&cm_core->ht_lock, flags);
1161 933
@@ -1174,7 +946,7 @@ static int add_hte_node(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node
1174 * mini_cm_dec_refcnt_listen 946 * mini_cm_dec_refcnt_listen
1175 */ 947 */
1176static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core, 948static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1177 struct nes_cm_listener *listener, int free_hanging_nodes) 949 struct nes_cm_listener *listener, int free_hanging_nodes)
1178{ 950{
1179 int ret = -EINVAL; 951 int ret = -EINVAL;
1180 int err = 0; 952 int err = 0;
@@ -1185,8 +957,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1185 struct list_head reset_list; 957 struct list_head reset_list;
1186 958
1187 nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, " 959 nes_debug(NES_DBG_CM, "attempting listener= %p free_nodes= %d, "
1188 "refcnt=%d\n", listener, free_hanging_nodes, 960 "refcnt=%d\n", listener, free_hanging_nodes,
1189 atomic_read(&listener->ref_count)); 961 atomic_read(&listener->ref_count));
1190 /* free non-accelerated child nodes for this listener */ 962 /* free non-accelerated child nodes for this listener */
1191 INIT_LIST_HEAD(&reset_list); 963 INIT_LIST_HEAD(&reset_list);
1192 if (free_hanging_nodes) { 964 if (free_hanging_nodes) {
@@ -1194,7 +966,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1194 list_for_each_safe(list_pos, list_temp, 966 list_for_each_safe(list_pos, list_temp,
1195 &g_cm_core->connected_nodes) { 967 &g_cm_core->connected_nodes) {
1196 cm_node = container_of(list_pos, struct nes_cm_node, 968 cm_node = container_of(list_pos, struct nes_cm_node,
1197 list); 969 list);
1198 if ((cm_node->listener == listener) && 970 if ((cm_node->listener == listener) &&
1199 (!cm_node->accelerated)) { 971 (!cm_node->accelerated)) {
1200 add_ref_cm_node(cm_node); 972 add_ref_cm_node(cm_node);
@@ -1206,7 +978,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1206 978
1207 list_for_each_safe(list_pos, list_temp, &reset_list) { 979 list_for_each_safe(list_pos, list_temp, &reset_list) {
1208 cm_node = container_of(list_pos, struct nes_cm_node, 980 cm_node = container_of(list_pos, struct nes_cm_node,
1209 reset_entry); 981 reset_entry);
1210 { 982 {
1211 struct nes_cm_node *loopback = cm_node->loopbackpartner; 983 struct nes_cm_node *loopback = cm_node->loopbackpartner;
1212 enum nes_cm_node_state old_state; 984 enum nes_cm_node_state old_state;
@@ -1218,7 +990,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1218 err = send_reset(cm_node, NULL); 990 err = send_reset(cm_node, NULL);
1219 if (err) { 991 if (err) {
1220 cm_node->state = 992 cm_node->state =
1221 NES_CM_STATE_CLOSED; 993 NES_CM_STATE_CLOSED;
1222 WARN_ON(1); 994 WARN_ON(1);
1223 } else { 995 } else {
1224 old_state = cm_node->state; 996 old_state = cm_node->state;
@@ -1263,9 +1035,10 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1263 1035
1264 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); 1036 spin_unlock_irqrestore(&cm_core->listen_list_lock, flags);
1265 1037
1266 if (listener->nesvnic) 1038 if (listener->nesvnic) {
1267 nes_manage_apbvt(listener->nesvnic, listener->loc_port, 1039 nes_manage_apbvt(listener->nesvnic, listener->loc_port,
1268 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL); 1040 PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn), NES_MANAGE_APBVT_DEL);
1041 }
1269 1042
1270 nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener); 1043 nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
1271 1044
@@ -1279,8 +1052,8 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1279 if (listener) { 1052 if (listener) {
1280 if (atomic_read(&listener->pend_accepts_cnt) > 0) 1053 if (atomic_read(&listener->pend_accepts_cnt) > 0)
1281 nes_debug(NES_DBG_CM, "destroying listener (%p)" 1054 nes_debug(NES_DBG_CM, "destroying listener (%p)"
1282 " with non-zero pending accepts=%u\n", 1055 " with non-zero pending accepts=%u\n",
1283 listener, atomic_read(&listener->pend_accepts_cnt)); 1056 listener, atomic_read(&listener->pend_accepts_cnt));
1284 } 1057 }
1285 1058
1286 return ret; 1059 return ret;
@@ -1291,7 +1064,7 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
1291 * mini_cm_del_listen 1064 * mini_cm_del_listen
1292 */ 1065 */
1293static int mini_cm_del_listen(struct nes_cm_core *cm_core, 1066static int mini_cm_del_listen(struct nes_cm_core *cm_core,
1294 struct nes_cm_listener *listener) 1067 struct nes_cm_listener *listener)
1295{ 1068{
1296 listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE; 1069 listener->listener_state = NES_CM_LISTENER_PASSIVE_STATE;
1297 listener->cm_id = NULL; /* going to be destroyed pretty soon */ 1070 listener->cm_id = NULL; /* going to be destroyed pretty soon */
@@ -1303,8 +1076,9 @@ static int mini_cm_del_listen(struct nes_cm_core *cm_core,
1303 * mini_cm_accelerated 1076 * mini_cm_accelerated
1304 */ 1077 */
1305static inline int mini_cm_accelerated(struct nes_cm_core *cm_core, 1078static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1306 struct nes_cm_node *cm_node) 1079 struct nes_cm_node *cm_node)
1307{ 1080{
1081 u32 was_timer_set;
1308 cm_node->accelerated = 1; 1082 cm_node->accelerated = 1;
1309 1083
1310 if (cm_node->accept_pend) { 1084 if (cm_node->accept_pend) {
@@ -1314,8 +1088,11 @@ static inline int mini_cm_accelerated(struct nes_cm_core *cm_core,
1314 BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0); 1088 BUG_ON(atomic_read(&cm_node->listener->pend_accepts_cnt) < 0);
1315 } 1089 }
1316 1090
1317 if (!timer_pending(&cm_core->tcp_timer)) 1091 was_timer_set = timer_pending(&cm_core->tcp_timer);
1318 mod_timer(&cm_core->tcp_timer, (jiffies + NES_SHORT_TIME)); 1092 if (!was_timer_set) {
1093 cm_core->tcp_timer.expires = jiffies + NES_SHORT_TIME;
1094 add_timer(&cm_core->tcp_timer);
1095 }
1319 1096
1320 return 0; 1097 return 0;
1321} 1098}
@@ -1335,7 +1112,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
1335 rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0); 1112 rt = ip_route_output(&init_net, htonl(dst_ip), 0, 0, 0);
1336 if (IS_ERR(rt)) { 1113 if (IS_ERR(rt)) {
1337 printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n", 1114 printk(KERN_ERR "%s: ip_route_output_key failed for 0x%08X\n",
1338 __func__, dst_ip); 1115 __func__, dst_ip);
1339 return rc; 1116 return rc;
1340 } 1117 }
1341 1118
@@ -1345,8 +1122,6 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
1345 netdev = nesvnic->netdev; 1122 netdev = nesvnic->netdev;
1346 1123
1347 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); 1124 neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev);
1348
1349 rcu_read_lock();
1350 if (neigh) { 1125 if (neigh) {
1351 if (neigh->nud_state & NUD_VALID) { 1126 if (neigh->nud_state & NUD_VALID) {
1352 nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" 1127 nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X"
@@ -1355,30 +1130,31 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
1355 1130
1356 if (arpindex >= 0) { 1131 if (arpindex >= 0) {
1357 if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, 1132 if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
1358 neigh->ha, ETH_ALEN)) { 1133 neigh->ha, ETH_ALEN)){
1359 /* Mac address same as in nes_arp_table */ 1134 /* Mac address same as in nes_arp_table */
1360 goto out; 1135 neigh_release(neigh);
1136 ip_rt_put(rt);
1137 return rc;
1361 } 1138 }
1362 1139
1363 nes_manage_arp_cache(nesvnic->netdev, 1140 nes_manage_arp_cache(nesvnic->netdev,
1364 nesadapter->arp_table[arpindex].mac_addr, 1141 nesadapter->arp_table[arpindex].mac_addr,
1365 dst_ip, NES_ARP_DELETE); 1142 dst_ip, NES_ARP_DELETE);
1366 } 1143 }
1367 1144
1368 nes_manage_arp_cache(nesvnic->netdev, neigh->ha, 1145 nes_manage_arp_cache(nesvnic->netdev, neigh->ha,
1369 dst_ip, NES_ARP_ADD); 1146 dst_ip, NES_ARP_ADD);
1370 rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, 1147 rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL,
1371 NES_ARP_RESOLVE); 1148 NES_ARP_RESOLVE);
1372 } else {
1373 neigh_event_send(neigh, NULL);
1374 } 1149 }
1375 }
1376out:
1377 rcu_read_unlock();
1378
1379 if (neigh)
1380 neigh_release(neigh); 1150 neigh_release(neigh);
1151 }
1381 1152
1153 if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) {
1154 rcu_read_lock();
1155 neigh_event_send(dst_get_neighbour(&rt->dst), NULL);
1156 rcu_read_unlock();
1157 }
1382 ip_rt_put(rt); 1158 ip_rt_put(rt);
1383 return rc; 1159 return rc;
1384} 1160}
@@ -1387,8 +1163,8 @@ out:
1387 * make_cm_node - create a new instance of a cm node 1163 * make_cm_node - create a new instance of a cm node
1388 */ 1164 */
1389static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core, 1165static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1390 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info, 1166 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info,
1391 struct nes_cm_listener *listener) 1167 struct nes_cm_listener *listener)
1392{ 1168{
1393 struct nes_cm_node *cm_node; 1169 struct nes_cm_node *cm_node;
1394 struct timespec ts; 1170 struct timespec ts;
@@ -1407,12 +1183,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1407 cm_node->rem_addr = cm_info->rem_addr; 1183 cm_node->rem_addr = cm_info->rem_addr;
1408 cm_node->loc_port = cm_info->loc_port; 1184 cm_node->loc_port = cm_info->loc_port;
1409 cm_node->rem_port = cm_info->rem_port; 1185 cm_node->rem_port = cm_info->rem_port;
1410 1186 cm_node->send_write0 = send_first;
1411 cm_node->mpa_frame_rev = mpa_version;
1412 cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
1413 cm_node->ird_size = IETF_NO_IRD_ORD;
1414 cm_node->ord_size = IETF_NO_IRD_ORD;
1415
1416 nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n", 1187 nes_debug(NES_DBG_CM, "Make node addresses : loc = %pI4:%x, rem = %pI4:%x\n",
1417 &cm_node->loc_addr, cm_node->loc_port, 1188 &cm_node->loc_addr, cm_node->loc_port,
1418 &cm_node->rem_addr, cm_node->rem_port); 1189 &cm_node->rem_addr, cm_node->rem_port);
@@ -1422,7 +1193,7 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1422 memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN); 1193 memcpy(cm_node->loc_mac, nesvnic->netdev->dev_addr, ETH_ALEN);
1423 1194
1424 nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener, 1195 nes_debug(NES_DBG_CM, "listener=%p, cm_id=%p\n", cm_node->listener,
1425 cm_node->cm_id); 1196 cm_node->cm_id);
1426 1197
1427 spin_lock_init(&cm_node->retrans_list_lock); 1198 spin_lock_init(&cm_node->retrans_list_lock);
1428 1199
@@ -1433,11 +1204,11 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1433 cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID; 1204 cm_node->tcp_cntxt.loc_id = NES_CM_DEF_LOCAL_ID;
1434 cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE; 1205 cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
1435 cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >> 1206 cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
1436 NES_CM_DEFAULT_RCV_WND_SCALE; 1207 NES_CM_DEFAULT_RCV_WND_SCALE;
1437 ts = current_kernel_time(); 1208 ts = current_kernel_time();
1438 cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec); 1209 cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
1439 cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) - 1210 cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
1440 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN; 1211 sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
1441 cm_node->tcp_cntxt.rcv_nxt = 0; 1212 cm_node->tcp_cntxt.rcv_nxt = 0;
1442 /* get a unique session ID , add thread_id to an upcounter to handle race */ 1213 /* get a unique session ID , add thread_id to an upcounter to handle race */
1443 atomic_inc(&cm_core->node_cnt); 1214 atomic_inc(&cm_core->node_cnt);
@@ -1453,8 +1224,13 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
1453 cm_node->loopbackpartner = NULL; 1224 cm_node->loopbackpartner = NULL;
1454 1225
1455 /* get the mac addr for the remote node */ 1226 /* get the mac addr for the remote node */
1456 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE); 1227 if (ipv4_is_loopback(htonl(cm_node->rem_addr)))
1457 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex); 1228 arpindex = nes_arp_table(nesdev, ntohl(nesvnic->local_ipaddr), NULL, NES_ARP_RESOLVE);
1229 else {
1230 oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr, NULL, NES_ARP_RESOLVE);
1231 arpindex = nes_addr_resolve_neigh(nesvnic, cm_info->rem_addr, oldarpindex);
1232
1233 }
1458 if (arpindex < 0) { 1234 if (arpindex < 0) {
1459 kfree(cm_node); 1235 kfree(cm_node);
1460 return NULL; 1236 return NULL;
@@ -1486,7 +1262,7 @@ static int add_ref_cm_node(struct nes_cm_node *cm_node)
1486 * rem_ref_cm_node - destroy an instance of a cm node 1262 * rem_ref_cm_node - destroy an instance of a cm node
1487 */ 1263 */
1488static int rem_ref_cm_node(struct nes_cm_core *cm_core, 1264static int rem_ref_cm_node(struct nes_cm_core *cm_core,
1489 struct nes_cm_node *cm_node) 1265 struct nes_cm_node *cm_node)
1490{ 1266{
1491 unsigned long flags; 1267 unsigned long flags;
1492 struct nes_qp *nesqp; 1268 struct nes_qp *nesqp;
@@ -1517,9 +1293,9 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
1517 } else { 1293 } else {
1518 if (cm_node->apbvt_set && cm_node->nesvnic) { 1294 if (cm_node->apbvt_set && cm_node->nesvnic) {
1519 nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port, 1295 nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
1520 PCI_FUNC( 1296 PCI_FUNC(
1521 cm_node->nesvnic->nesdev->pcidev->devfn), 1297 cm_node->nesvnic->nesdev->pcidev->devfn),
1522 NES_MANAGE_APBVT_DEL); 1298 NES_MANAGE_APBVT_DEL);
1523 } 1299 }
1524 } 1300 }
1525 1301
@@ -1540,7 +1316,7 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
1540 * process_options 1316 * process_options
1541 */ 1317 */
1542static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc, 1318static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
1543 u32 optionsize, u32 syn_packet) 1319 u32 optionsize, u32 syn_packet)
1544{ 1320{
1545 u32 tmp; 1321 u32 tmp;
1546 u32 offset = 0; 1322 u32 offset = 0;
@@ -1558,15 +1334,15 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
1558 continue; 1334 continue;
1559 case OPTION_NUMBER_MSS: 1335 case OPTION_NUMBER_MSS:
1560 nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d " 1336 nes_debug(NES_DBG_CM, "%s: MSS Length: %d Offset: %d "
1561 "Size: %d\n", __func__, 1337 "Size: %d\n", __func__,
1562 all_options->as_mss.length, offset, optionsize); 1338 all_options->as_mss.length, offset, optionsize);
1563 got_mss_option = 1; 1339 got_mss_option = 1;
1564 if (all_options->as_mss.length != 4) { 1340 if (all_options->as_mss.length != 4) {
1565 return 1; 1341 return 1;
1566 } else { 1342 } else {
1567 tmp = ntohs(all_options->as_mss.mss); 1343 tmp = ntohs(all_options->as_mss.mss);
1568 if (tmp > 0 && tmp < 1344 if (tmp > 0 && tmp <
1569 cm_node->tcp_cntxt.mss) 1345 cm_node->tcp_cntxt.mss)
1570 cm_node->tcp_cntxt.mss = tmp; 1346 cm_node->tcp_cntxt.mss = tmp;
1571 } 1347 }
1572 break; 1348 break;
@@ -1574,9 +1350,12 @@ static int process_options(struct nes_cm_node *cm_node, u8 *optionsloc,
1574 cm_node->tcp_cntxt.snd_wscale = 1350 cm_node->tcp_cntxt.snd_wscale =
1575 all_options->as_windowscale.shiftcount; 1351 all_options->as_windowscale.shiftcount;
1576 break; 1352 break;
1353 case OPTION_NUMBER_WRITE0:
1354 cm_node->send_write0 = 1;
1355 break;
1577 default: 1356 default:
1578 nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n", 1357 nes_debug(NES_DBG_CM, "TCP Option not understood: %x\n",
1579 all_options->as_base.optionnum); 1358 all_options->as_base.optionnum);
1580 break; 1359 break;
1581 } 1360 }
1582 offset += all_options->as_base.length; 1361 offset += all_options->as_base.length;
@@ -1595,8 +1374,8 @@ static void drop_packet(struct sk_buff *skb)
1595static void handle_fin_pkt(struct nes_cm_node *cm_node) 1374static void handle_fin_pkt(struct nes_cm_node *cm_node)
1596{ 1375{
1597 nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. " 1376 nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
1598 "refcnt=%d\n", cm_node, cm_node->state, 1377 "refcnt=%d\n", cm_node, cm_node->state,
1599 atomic_read(&cm_node->ref_count)); 1378 atomic_read(&cm_node->ref_count));
1600 switch (cm_node->state) { 1379 switch (cm_node->state) {
1601 case NES_CM_STATE_SYN_RCVD: 1380 case NES_CM_STATE_SYN_RCVD:
1602 case NES_CM_STATE_SYN_SENT: 1381 case NES_CM_STATE_SYN_SENT:
@@ -1662,20 +1441,7 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1662 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " 1441 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
1663 "listener=%p state=%d\n", __func__, __LINE__, cm_node, 1442 "listener=%p state=%d\n", __func__, __LINE__, cm_node,
1664 cm_node->listener, cm_node->state); 1443 cm_node->listener, cm_node->state);
1665 switch (cm_node->mpa_frame_rev) { 1444 active_open_err(cm_node, skb, reset);
1666 case IETF_MPA_V2:
1667 cm_node->mpa_frame_rev = IETF_MPA_V1;
1668 /* send a syn and goto syn sent state */
1669 cm_node->state = NES_CM_STATE_SYN_SENT;
1670 if (send_syn(cm_node, 0, NULL)) {
1671 active_open_err(cm_node, skb, reset);
1672 }
1673 break;
1674 case IETF_MPA_V1:
1675 default:
1676 active_open_err(cm_node, skb, reset);
1677 break;
1678 }
1679 break; 1445 break;
1680 case NES_CM_STATE_MPAREQ_RCVD: 1446 case NES_CM_STATE_MPAREQ_RCVD:
1681 atomic_inc(&cm_node->passive_state); 1447 atomic_inc(&cm_node->passive_state);
@@ -1711,21 +1477,21 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1711 1477
1712static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb) 1478static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
1713{ 1479{
1714 int ret = 0; 1480
1481 int ret = 0;
1715 int datasize = skb->len; 1482 int datasize = skb->len;
1716 u8 *dataloc = skb->data; 1483 u8 *dataloc = skb->data;
1717 1484
1718 enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN; 1485 enum nes_cm_event_type type = NES_CM_EVENT_UNKNOWN;
1719 u32 res_type; 1486 u32 res_type;
1720
1721 ret = parse_mpa(cm_node, dataloc, &res_type, datasize); 1487 ret = parse_mpa(cm_node, dataloc, &res_type, datasize);
1722 if (ret) { 1488 if (ret) {
1723 nes_debug(NES_DBG_CM, "didn't like MPA Request\n"); 1489 nes_debug(NES_DBG_CM, "didn't like MPA Request\n");
1724 if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) { 1490 if (cm_node->state == NES_CM_STATE_MPAREQ_SENT) {
1725 nes_debug(NES_DBG_CM, "%s[%u] create abort for " 1491 nes_debug(NES_DBG_CM, "%s[%u] create abort for "
1726 "cm_node=%p listener=%p state=%d\n", __func__, 1492 "cm_node=%p listener=%p state=%d\n", __func__,
1727 __LINE__, cm_node, cm_node->listener, 1493 __LINE__, cm_node, cm_node->listener,
1728 cm_node->state); 1494 cm_node->state);
1729 active_open_err(cm_node, skb, 1); 1495 active_open_err(cm_node, skb, 1);
1730 } else { 1496 } else {
1731 passive_open_err(cm_node, skb, 1); 1497 passive_open_err(cm_node, skb, 1);
@@ -1735,15 +1501,16 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
1735 1501
1736 switch (cm_node->state) { 1502 switch (cm_node->state) {
1737 case NES_CM_STATE_ESTABLISHED: 1503 case NES_CM_STATE_ESTABLISHED:
1738 if (res_type == NES_MPA_REQUEST_REJECT) 1504 if (res_type == NES_MPA_REQUEST_REJECT) {
1739 /*BIG problem as we are receiving the MPA.. So should 1505 /*BIG problem as we are receiving the MPA.. So should
1740 * not be REJECT.. This is Passive Open.. We can 1506 * not be REJECT.. This is Passive Open.. We can
1741 * only receive it Reject for Active Open...*/ 1507 * only receive it Reject for Active Open...*/
1742 WARN_ON(1); 1508 WARN_ON(1);
1509 }
1743 cm_node->state = NES_CM_STATE_MPAREQ_RCVD; 1510 cm_node->state = NES_CM_STATE_MPAREQ_RCVD;
1744 type = NES_CM_EVENT_MPA_REQ; 1511 type = NES_CM_EVENT_MPA_REQ;
1745 atomic_set(&cm_node->passive_state, 1512 atomic_set(&cm_node->passive_state,
1746 NES_PASSIVE_STATE_INDICATED); 1513 NES_PASSIVE_STATE_INDICATED);
1747 break; 1514 break;
1748 case NES_CM_STATE_MPAREQ_SENT: 1515 case NES_CM_STATE_MPAREQ_SENT:
1749 cleanup_retrans_entry(cm_node); 1516 cleanup_retrans_entry(cm_node);
@@ -1770,8 +1537,8 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
1770 case NES_CM_STATE_SYN_SENT: 1537 case NES_CM_STATE_SYN_SENT:
1771 case NES_CM_STATE_MPAREQ_SENT: 1538 case NES_CM_STATE_MPAREQ_SENT:
1772 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " 1539 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
1773 "listener=%p state=%d\n", __func__, __LINE__, cm_node, 1540 "listener=%p state=%d\n", __func__, __LINE__, cm_node,
1774 cm_node->listener, cm_node->state); 1541 cm_node->listener, cm_node->state);
1775 active_open_err(cm_node, skb, 1); 1542 active_open_err(cm_node, skb, 1);
1776 break; 1543 break;
1777 case NES_CM_STATE_ESTABLISHED: 1544 case NES_CM_STATE_ESTABLISHED:
@@ -1785,11 +1552,11 @@ static void indicate_pkt_err(struct nes_cm_node *cm_node, struct sk_buff *skb)
1785} 1552}
1786 1553
1787static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph, 1554static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1788 struct sk_buff *skb) 1555 struct sk_buff *skb)
1789{ 1556{
1790 int err; 1557 int err;
1791 1558
1792 err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num)) ? 0 : 1; 1559 err = ((ntohl(tcph->ack_seq) == cm_node->tcp_cntxt.loc_seq_num))? 0 : 1;
1793 if (err) 1560 if (err)
1794 active_open_err(cm_node, skb, 1); 1561 active_open_err(cm_node, skb, 1);
1795 1562
@@ -1797,7 +1564,7 @@ static int check_syn(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1797} 1564}
1798 1565
1799static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph, 1566static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1800 struct sk_buff *skb) 1567 struct sk_buff *skb)
1801{ 1568{
1802 int err = 0; 1569 int err = 0;
1803 u32 seq; 1570 u32 seq;
@@ -1805,22 +1572,21 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1805 u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num; 1572 u32 loc_seq_num = cm_node->tcp_cntxt.loc_seq_num;
1806 u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt; 1573 u32 rcv_nxt = cm_node->tcp_cntxt.rcv_nxt;
1807 u32 rcv_wnd; 1574 u32 rcv_wnd;
1808
1809 seq = ntohl(tcph->seq); 1575 seq = ntohl(tcph->seq);
1810 ack_seq = ntohl(tcph->ack_seq); 1576 ack_seq = ntohl(tcph->ack_seq);
1811 rcv_wnd = cm_node->tcp_cntxt.rcv_wnd; 1577 rcv_wnd = cm_node->tcp_cntxt.rcv_wnd;
1812 if (ack_seq != loc_seq_num) 1578 if (ack_seq != loc_seq_num)
1813 err = 1; 1579 err = 1;
1814 else if (!between(seq, rcv_nxt, (rcv_nxt + rcv_wnd))) 1580 else if (!between(seq, rcv_nxt, (rcv_nxt+rcv_wnd)))
1815 err = 1; 1581 err = 1;
1816 if (err) { 1582 if (err) {
1817 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p " 1583 nes_debug(NES_DBG_CM, "%s[%u] create abort for cm_node=%p "
1818 "listener=%p state=%d\n", __func__, __LINE__, cm_node, 1584 "listener=%p state=%d\n", __func__, __LINE__, cm_node,
1819 cm_node->listener, cm_node->state); 1585 cm_node->listener, cm_node->state);
1820 indicate_pkt_err(cm_node, skb); 1586 indicate_pkt_err(cm_node, skb);
1821 nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X " 1587 nes_debug(NES_DBG_CM, "seq ERROR cm_node =%p seq=0x%08X "
1822 "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt, 1588 "rcv_nxt=0x%08X rcv_wnd=0x%x\n", cm_node, seq, rcv_nxt,
1823 rcv_wnd); 1589 rcv_wnd);
1824 } 1590 }
1825 return err; 1591 return err;
1826} 1592}
@@ -1830,8 +1596,9 @@ static int check_seq(struct nes_cm_node *cm_node, struct tcphdr *tcph,
1830 * is created with a listener or it may comein as rexmitted packet which in 1596 * is created with a listener or it may comein as rexmitted packet which in
1831 * that case will be just dropped. 1597 * that case will be just dropped.
1832 */ 1598 */
1599
1833static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, 1600static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1834 struct tcphdr *tcph) 1601 struct tcphdr *tcph)
1835{ 1602{
1836 int ret; 1603 int ret;
1837 u32 inc_sequence; 1604 u32 inc_sequence;
@@ -1850,15 +1617,15 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1850 case NES_CM_STATE_LISTENING: 1617 case NES_CM_STATE_LISTENING:
1851 /* Passive OPEN */ 1618 /* Passive OPEN */
1852 if (atomic_read(&cm_node->listener->pend_accepts_cnt) > 1619 if (atomic_read(&cm_node->listener->pend_accepts_cnt) >
1853 cm_node->listener->backlog) { 1620 cm_node->listener->backlog) {
1854 nes_debug(NES_DBG_CM, "drop syn due to backlog " 1621 nes_debug(NES_DBG_CM, "drop syn due to backlog "
1855 "pressure \n"); 1622 "pressure \n");
1856 cm_backlog_drops++; 1623 cm_backlog_drops++;
1857 passive_open_err(cm_node, skb, 0); 1624 passive_open_err(cm_node, skb, 0);
1858 break; 1625 break;
1859 } 1626 }
1860 ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1627 ret = handle_tcp_options(cm_node, tcph, skb, optionsize,
1861 1); 1628 1);
1862 if (ret) { 1629 if (ret) {
1863 passive_open_err(cm_node, skb, 0); 1630 passive_open_err(cm_node, skb, 0);
1864 /* drop pkt */ 1631 /* drop pkt */
@@ -1892,8 +1659,9 @@ static void handle_syn_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1892} 1659}
1893 1660
1894static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, 1661static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1895 struct tcphdr *tcph) 1662 struct tcphdr *tcph)
1896{ 1663{
1664
1897 int ret; 1665 int ret;
1898 u32 inc_sequence; 1666 u32 inc_sequence;
1899 int optionsize; 1667 int optionsize;
@@ -1912,7 +1680,7 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1912 ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0); 1680 ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 0);
1913 if (ret) { 1681 if (ret) {
1914 nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n", 1682 nes_debug(NES_DBG_CM, "cm_node=%p tcp_options failed\n",
1915 cm_node); 1683 cm_node);
1916 break; 1684 break;
1917 } 1685 }
1918 cleanup_retrans_entry(cm_node); 1686 cleanup_retrans_entry(cm_node);
@@ -1951,13 +1719,12 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1951} 1719}
1952 1720
1953static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb, 1721static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1954 struct tcphdr *tcph) 1722 struct tcphdr *tcph)
1955{ 1723{
1956 int datasize = 0; 1724 int datasize = 0;
1957 u32 inc_sequence; 1725 u32 inc_sequence;
1958 int ret = 0; 1726 int ret = 0;
1959 int optionsize; 1727 int optionsize;
1960
1961 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr); 1728 optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);
1962 1729
1963 if (check_seq(cm_node, tcph, skb)) 1730 if (check_seq(cm_node, tcph, skb))
@@ -1978,9 +1745,8 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1978 if (datasize) { 1745 if (datasize) {
1979 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; 1746 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
1980 handle_rcv_mpa(cm_node, skb); 1747 handle_rcv_mpa(cm_node, skb);
1981 } else { /* rcvd ACK only */ 1748 } else /* rcvd ACK only */
1982 dev_kfree_skb_any(skb); 1749 dev_kfree_skb_any(skb);
1983 }
1984 break; 1750 break;
1985 case NES_CM_STATE_ESTABLISHED: 1751 case NES_CM_STATE_ESTABLISHED:
1986 /* Passive OPEN */ 1752 /* Passive OPEN */
@@ -1988,18 +1754,16 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
1988 if (datasize) { 1754 if (datasize) {
1989 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; 1755 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
1990 handle_rcv_mpa(cm_node, skb); 1756 handle_rcv_mpa(cm_node, skb);
1991 } else { 1757 } else
1992 drop_packet(skb); 1758 drop_packet(skb);
1993 }
1994 break; 1759 break;
1995 case NES_CM_STATE_MPAREQ_SENT: 1760 case NES_CM_STATE_MPAREQ_SENT:
1996 cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq); 1761 cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
1997 if (datasize) { 1762 if (datasize) {
1998 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize; 1763 cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
1999 handle_rcv_mpa(cm_node, skb); 1764 handle_rcv_mpa(cm_node, skb);
2000 } else { /* Could be just an ack pkt.. */ 1765 } else /* Could be just an ack pkt.. */
2001 dev_kfree_skb_any(skb); 1766 dev_kfree_skb_any(skb);
2002 }
2003 break; 1767 break;
2004 case NES_CM_STATE_LISTENING: 1768 case NES_CM_STATE_LISTENING:
2005 cleanup_retrans_entry(cm_node); 1769 cleanup_retrans_entry(cm_node);
@@ -2040,15 +1804,14 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
2040 1804
2041 1805
2042static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph, 1806static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
2043 struct sk_buff *skb, int optionsize, int passive) 1807 struct sk_buff *skb, int optionsize, int passive)
2044{ 1808{
2045 u8 *optionsloc = (u8 *)&tcph[1]; 1809 u8 *optionsloc = (u8 *)&tcph[1];
2046
2047 if (optionsize) { 1810 if (optionsize) {
2048 if (process_options(cm_node, optionsloc, optionsize, 1811 if (process_options(cm_node, optionsloc, optionsize,
2049 (u32)tcph->syn)) { 1812 (u32)tcph->syn)) {
2050 nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n", 1813 nes_debug(NES_DBG_CM, "%s: Node %p, Sending RESET\n",
2051 __func__, cm_node); 1814 __func__, cm_node);
2052 if (passive) 1815 if (passive)
2053 passive_open_err(cm_node, skb, 1); 1816 passive_open_err(cm_node, skb, 1);
2054 else 1817 else
@@ -2058,7 +1821,7 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
2058 } 1821 }
2059 1822
2060 cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) << 1823 cm_node->tcp_cntxt.snd_wnd = ntohs(tcph->window) <<
2061 cm_node->tcp_cntxt.snd_wscale; 1824 cm_node->tcp_cntxt.snd_wscale;
2062 1825
2063 if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd) 1826 if (cm_node->tcp_cntxt.snd_wnd > cm_node->tcp_cntxt.max_snd_wnd)
2064 cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd; 1827 cm_node->tcp_cntxt.max_snd_wnd = cm_node->tcp_cntxt.snd_wnd;
@@ -2069,18 +1832,18 @@ static int handle_tcp_options(struct nes_cm_node *cm_node, struct tcphdr *tcph,
2069 * active_open_err() will send reset() if flag set.. 1832 * active_open_err() will send reset() if flag set..
2070 * It will also send ABORT event. 1833 * It will also send ABORT event.
2071 */ 1834 */
1835
2072static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, 1836static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
2073 int reset) 1837 int reset)
2074{ 1838{
2075 cleanup_retrans_entry(cm_node); 1839 cleanup_retrans_entry(cm_node);
2076 if (reset) { 1840 if (reset) {
2077 nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, " 1841 nes_debug(NES_DBG_CM, "ERROR active err called for cm_node=%p, "
2078 "state=%d\n", cm_node, cm_node->state); 1842 "state=%d\n", cm_node, cm_node->state);
2079 add_ref_cm_node(cm_node); 1843 add_ref_cm_node(cm_node);
2080 send_reset(cm_node, skb); 1844 send_reset(cm_node, skb);
2081 } else { 1845 } else
2082 dev_kfree_skb_any(skb); 1846 dev_kfree_skb_any(skb);
2083 }
2084 1847
2085 cm_node->state = NES_CM_STATE_CLOSED; 1848 cm_node->state = NES_CM_STATE_CLOSED;
2086 create_event(cm_node, NES_CM_EVENT_ABORTED); 1849 create_event(cm_node, NES_CM_EVENT_ABORTED);
@@ -2090,14 +1853,15 @@ static void active_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
2090 * passive_open_err() will either do a reset() or will free up the skb and 1853 * passive_open_err() will either do a reset() or will free up the skb and
2091 * remove the cm_node. 1854 * remove the cm_node.
2092 */ 1855 */
1856
2093static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb, 1857static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
2094 int reset) 1858 int reset)
2095{ 1859{
2096 cleanup_retrans_entry(cm_node); 1860 cleanup_retrans_entry(cm_node);
2097 cm_node->state = NES_CM_STATE_CLOSED; 1861 cm_node->state = NES_CM_STATE_CLOSED;
2098 if (reset) { 1862 if (reset) {
2099 nes_debug(NES_DBG_CM, "passive_open_err sending RST for " 1863 nes_debug(NES_DBG_CM, "passive_open_err sending RST for "
2100 "cm_node=%p state =%d\n", cm_node, cm_node->state); 1864 "cm_node=%p state =%d\n", cm_node, cm_node->state);
2101 send_reset(cm_node, skb); 1865 send_reset(cm_node, skb);
2102 } else { 1866 } else {
2103 dev_kfree_skb_any(skb); 1867 dev_kfree_skb_any(skb);
@@ -2112,7 +1876,6 @@ static void passive_open_err(struct nes_cm_node *cm_node, struct sk_buff *skb,
2112static void free_retrans_entry(struct nes_cm_node *cm_node) 1876static void free_retrans_entry(struct nes_cm_node *cm_node)
2113{ 1877{
2114 struct nes_timer_entry *send_entry; 1878 struct nes_timer_entry *send_entry;
2115
2116 send_entry = cm_node->send_entry; 1879 send_entry = cm_node->send_entry;
2117 if (send_entry) { 1880 if (send_entry) {
2118 cm_node->send_entry = NULL; 1881 cm_node->send_entry = NULL;
@@ -2136,28 +1899,26 @@ static void cleanup_retrans_entry(struct nes_cm_node *cm_node)
2136 * Returns skb if to be freed, else it will return NULL if already used.. 1899 * Returns skb if to be freed, else it will return NULL if already used..
2137 */ 1900 */
2138static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb, 1901static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
2139 struct nes_cm_core *cm_core) 1902 struct nes_cm_core *cm_core)
2140{ 1903{
2141 enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN; 1904 enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
2142 struct tcphdr *tcph = tcp_hdr(skb); 1905 struct tcphdr *tcph = tcp_hdr(skb);
2143 u32 fin_set = 0; 1906 u32 fin_set = 0;
2144 int ret = 0; 1907 int ret = 0;
2145
2146 skb_pull(skb, ip_hdr(skb)->ihl << 2); 1908 skb_pull(skb, ip_hdr(skb)->ihl << 2);
2147 1909
2148 nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d " 1910 nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
2149 "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn, 1911 "ack=%d rst=%d fin=%d\n", cm_node, cm_node->state, tcph->syn,
2150 tcph->ack, tcph->rst, tcph->fin); 1912 tcph->ack, tcph->rst, tcph->fin);
2151 1913
2152 if (tcph->rst) { 1914 if (tcph->rst)
2153 pkt_type = NES_PKT_TYPE_RST; 1915 pkt_type = NES_PKT_TYPE_RST;
2154 } else if (tcph->syn) { 1916 else if (tcph->syn) {
2155 pkt_type = NES_PKT_TYPE_SYN; 1917 pkt_type = NES_PKT_TYPE_SYN;
2156 if (tcph->ack) 1918 if (tcph->ack)
2157 pkt_type = NES_PKT_TYPE_SYNACK; 1919 pkt_type = NES_PKT_TYPE_SYNACK;
2158 } else if (tcph->ack) { 1920 } else if (tcph->ack)
2159 pkt_type = NES_PKT_TYPE_ACK; 1921 pkt_type = NES_PKT_TYPE_ACK;
2160 }
2161 if (tcph->fin) 1922 if (tcph->fin)
2162 fin_set = 1; 1923 fin_set = 1;
2163 1924
@@ -2188,17 +1949,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
2188 * mini_cm_listen - create a listen node with params 1949 * mini_cm_listen - create a listen node with params
2189 */ 1950 */
2190static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core, 1951static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2191 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info) 1952 struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
2192{ 1953{
2193 struct nes_cm_listener *listener; 1954 struct nes_cm_listener *listener;
2194 unsigned long flags; 1955 unsigned long flags;
2195 1956
2196 nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n", 1957 nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
2197 cm_info->loc_addr, cm_info->loc_port); 1958 cm_info->loc_addr, cm_info->loc_port);
2198 1959
2199 /* cannot have multiple matching listeners */ 1960 /* cannot have multiple matching listeners */
2200 listener = find_listener(cm_core, htonl(cm_info->loc_addr), 1961 listener = find_listener(cm_core, htonl(cm_info->loc_addr),
2201 htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE); 1962 htons(cm_info->loc_port), NES_CM_LISTENER_EITHER_STATE);
2202 if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) { 1963 if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
2203 /* find automatically incs ref count ??? */ 1964 /* find automatically incs ref count ??? */
2204 atomic_dec(&listener->ref_count); 1965 atomic_dec(&listener->ref_count);
@@ -2244,9 +2005,9 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2244 } 2005 }
2245 2006
2246 nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x," 2007 nes_debug(NES_DBG_CM, "Api - listen(): addr=0x%08X, port=0x%04x,"
2247 " listener = %p, backlog = %d, cm_id = %p.\n", 2008 " listener = %p, backlog = %d, cm_id = %p.\n",
2248 cm_info->loc_addr, cm_info->loc_port, 2009 cm_info->loc_addr, cm_info->loc_port,
2249 listener, listener->backlog, listener->cm_id); 2010 listener, listener->backlog, listener->cm_id);
2250 2011
2251 return listener; 2012 return listener;
2252} 2013}
@@ -2256,20 +2017,26 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
2256 * mini_cm_connect - make a connection node with params 2017 * mini_cm_connect - make a connection node with params
2257 */ 2018 */
2258static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core, 2019static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2259 struct nes_vnic *nesvnic, u16 private_data_len, 2020 struct nes_vnic *nesvnic, u16 private_data_len,
2260 void *private_data, struct nes_cm_info *cm_info) 2021 void *private_data, struct nes_cm_info *cm_info)
2261{ 2022{
2262 int ret = 0; 2023 int ret = 0;
2263 struct nes_cm_node *cm_node; 2024 struct nes_cm_node *cm_node;
2264 struct nes_cm_listener *loopbackremotelistener; 2025 struct nes_cm_listener *loopbackremotelistener;
2265 struct nes_cm_node *loopbackremotenode; 2026 struct nes_cm_node *loopbackremotenode;
2266 struct nes_cm_info loopback_cm_info; 2027 struct nes_cm_info loopback_cm_info;
2267 u8 *start_buff; 2028 u16 mpa_frame_size = sizeof(struct ietf_mpa_frame) + private_data_len;
2029 struct ietf_mpa_frame *mpa_frame = NULL;
2268 2030
2269 /* create a CM connection node */ 2031 /* create a CM connection node */
2270 cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL); 2032 cm_node = make_cm_node(cm_core, nesvnic, cm_info, NULL);
2271 if (!cm_node) 2033 if (!cm_node)
2272 return NULL; 2034 return NULL;
2035 mpa_frame = &cm_node->mpa_frame;
2036 memcpy(mpa_frame->key, IEFT_MPA_KEY_REQ, IETF_MPA_KEY_SIZE);
2037 mpa_frame->flags = IETF_MPA_FLAGS_CRC;
2038 mpa_frame->rev = IETF_MPA_VERSION;
2039 mpa_frame->priv_data_len = htons(private_data_len);
2273 2040
2274 /* set our node side to client (active) side */ 2041 /* set our node side to client (active) side */
2275 cm_node->tcp_cntxt.client = 1; 2042 cm_node->tcp_cntxt.client = 1;
@@ -2277,8 +2044,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2277 2044
2278 if (cm_info->loc_addr == cm_info->rem_addr) { 2045 if (cm_info->loc_addr == cm_info->rem_addr) {
2279 loopbackremotelistener = find_listener(cm_core, 2046 loopbackremotelistener = find_listener(cm_core,
2280 ntohl(nesvnic->local_ipaddr), cm_node->rem_port, 2047 ntohl(nesvnic->local_ipaddr), cm_node->rem_port,
2281 NES_CM_LISTENER_ACTIVE_STATE); 2048 NES_CM_LISTENER_ACTIVE_STATE);
2282 if (loopbackremotelistener == NULL) { 2049 if (loopbackremotelistener == NULL) {
2283 create_event(cm_node, NES_CM_EVENT_ABORTED); 2050 create_event(cm_node, NES_CM_EVENT_ABORTED);
2284 } else { 2051 } else {
@@ -2287,7 +2054,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2287 loopback_cm_info.rem_port = cm_info->loc_port; 2054 loopback_cm_info.rem_port = cm_info->loc_port;
2288 loopback_cm_info.cm_id = loopbackremotelistener->cm_id; 2055 loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
2289 loopbackremotenode = make_cm_node(cm_core, nesvnic, 2056 loopbackremotenode = make_cm_node(cm_core, nesvnic,
2290 &loopback_cm_info, loopbackremotelistener); 2057 &loopback_cm_info, loopbackremotelistener);
2291 if (!loopbackremotenode) { 2058 if (!loopbackremotenode) {
2292 rem_ref_cm_node(cm_node->cm_core, cm_node); 2059 rem_ref_cm_node(cm_node->cm_core, cm_node);
2293 return NULL; 2060 return NULL;
@@ -2298,7 +2065,7 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2298 NES_CM_DEFAULT_RCV_WND_SCALE; 2065 NES_CM_DEFAULT_RCV_WND_SCALE;
2299 cm_node->loopbackpartner = loopbackremotenode; 2066 cm_node->loopbackpartner = loopbackremotenode;
2300 memcpy(loopbackremotenode->mpa_frame_buf, private_data, 2067 memcpy(loopbackremotenode->mpa_frame_buf, private_data,
2301 private_data_len); 2068 private_data_len);
2302 loopbackremotenode->mpa_frame_size = private_data_len; 2069 loopbackremotenode->mpa_frame_size = private_data_len;
2303 2070
2304 /* we are done handling this state. */ 2071 /* we are done handling this state. */
@@ -2326,10 +2093,12 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2326 return cm_node; 2093 return cm_node;
2327 } 2094 }
2328 2095
2329 start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); 2096 /* set our node side to client (active) side */
2330 cm_node->mpa_frame_size = private_data_len; 2097 cm_node->tcp_cntxt.client = 1;
2098 /* init our MPA frame ptr */
2099 memcpy(mpa_frame->priv_data, private_data, private_data_len);
2331 2100
2332 memcpy(start_buff, private_data, private_data_len); 2101 cm_node->mpa_frame_size = mpa_frame_size;
2333 2102
2334 /* send a syn and goto syn sent state */ 2103 /* send a syn and goto syn sent state */
2335 cm_node->state = NES_CM_STATE_SYN_SENT; 2104 cm_node->state = NES_CM_STATE_SYN_SENT;
@@ -2338,19 +2107,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2338 if (ret) { 2107 if (ret) {
2339 /* error in sending the syn free up the cm_node struct */ 2108 /* error in sending the syn free up the cm_node struct */
2340 nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest " 2109 nes_debug(NES_DBG_CM, "Api - connect() FAILED: dest "
2341 "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n", 2110 "addr=0x%08X, port=0x%04x, cm_node=%p, cm_id = %p.\n",
2342 cm_node->rem_addr, cm_node->rem_port, cm_node, 2111 cm_node->rem_addr, cm_node->rem_port, cm_node,
2343 cm_node->cm_id); 2112 cm_node->cm_id);
2344 rem_ref_cm_node(cm_node->cm_core, cm_node); 2113 rem_ref_cm_node(cm_node->cm_core, cm_node);
2345 cm_node = NULL; 2114 cm_node = NULL;
2346 } 2115 }
2347 2116
2348 if (cm_node) { 2117 if (cm_node)
2349 nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X," 2118 nes_debug(NES_DBG_CM, "Api - connect(): dest addr=0x%08X,"
2350 "port=0x%04x, cm_node=%p, cm_id = %p.\n", 2119 "port=0x%04x, cm_node=%p, cm_id = %p.\n",
2351 cm_node->rem_addr, cm_node->rem_port, cm_node, 2120 cm_node->rem_addr, cm_node->rem_port, cm_node,
2352 cm_node->cm_id); 2121 cm_node->cm_id);
2353 }
2354 2122
2355 return cm_node; 2123 return cm_node;
2356} 2124}
@@ -2360,7 +2128,8 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
2360 * mini_cm_accept - accept a connection 2128 * mini_cm_accept - accept a connection
2361 * This function is never called 2129 * This function is never called
2362 */ 2130 */
2363static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) 2131static int mini_cm_accept(struct nes_cm_core *cm_core,
2132 struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
2364{ 2133{
2365 return 0; 2134 return 0;
2366} 2135}
@@ -2369,7 +2138,8 @@ static int mini_cm_accept(struct nes_cm_core *cm_core, struct nes_cm_node *cm_no
2369/** 2138/**
2370 * mini_cm_reject - reject and teardown a connection 2139 * mini_cm_reject - reject and teardown a connection
2371 */ 2140 */
2372static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_node) 2141static int mini_cm_reject(struct nes_cm_core *cm_core,
2142 struct ietf_mpa_frame *mpa_frame, struct nes_cm_node *cm_node)
2373{ 2143{
2374 int ret = 0; 2144 int ret = 0;
2375 int err = 0; 2145 int err = 0;
@@ -2379,7 +2149,7 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_no
2379 struct nes_cm_node *loopback = cm_node->loopbackpartner; 2149 struct nes_cm_node *loopback = cm_node->loopbackpartner;
2380 2150
2381 nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n", 2151 nes_debug(NES_DBG_CM, "%s cm_node=%p type=%d state=%d\n",
2382 __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state); 2152 __func__, cm_node, cm_node->tcp_cntxt.client, cm_node->state);
2383 2153
2384 if (cm_node->tcp_cntxt.client) 2154 if (cm_node->tcp_cntxt.client)
2385 return ret; 2155 return ret;
@@ -2400,9 +2170,8 @@ static int mini_cm_reject(struct nes_cm_core *cm_core, struct nes_cm_node *cm_no
2400 err = send_reset(cm_node, NULL); 2170 err = send_reset(cm_node, NULL);
2401 if (err) 2171 if (err)
2402 WARN_ON(1); 2172 WARN_ON(1);
2403 } else { 2173 } else
2404 cm_id->add_ref(cm_id); 2174 cm_id->add_ref(cm_id);
2405 }
2406 } 2175 }
2407 } 2176 }
2408 } else { 2177 } else {
@@ -2477,7 +2246,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
2477 case NES_CM_STATE_TSA: 2246 case NES_CM_STATE_TSA:
2478 if (cm_node->send_entry) 2247 if (cm_node->send_entry)
2479 printk(KERN_ERR "ERROR Close got called from STATE_TSA " 2248 printk(KERN_ERR "ERROR Close got called from STATE_TSA "
2480 "send_entry=%p\n", cm_node->send_entry); 2249 "send_entry=%p\n", cm_node->send_entry);
2481 ret = rem_ref_cm_node(cm_core, cm_node); 2250 ret = rem_ref_cm_node(cm_core, cm_node);
2482 break; 2251 break;
2483 } 2252 }
@@ -2490,7 +2259,7 @@ static int mini_cm_close(struct nes_cm_core *cm_core, struct nes_cm_node *cm_nod
2490 * node state machine 2259 * node state machine
2491 */ 2260 */
2492static int mini_cm_recv_pkt(struct nes_cm_core *cm_core, 2261static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2493 struct nes_vnic *nesvnic, struct sk_buff *skb) 2262 struct nes_vnic *nesvnic, struct sk_buff *skb)
2494{ 2263{
2495 struct nes_cm_node *cm_node = NULL; 2264 struct nes_cm_node *cm_node = NULL;
2496 struct nes_cm_listener *listener = NULL; 2265 struct nes_cm_listener *listener = NULL;
@@ -2502,8 +2271,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2502 2271
2503 if (!skb) 2272 if (!skb)
2504 return 0; 2273 return 0;
2505 if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) 2274 if (skb->len < sizeof(struct iphdr) + sizeof(struct tcphdr)) {
2506 return 0; 2275 return 0;
2276 }
2507 2277
2508 iph = (struct iphdr *)skb->data; 2278 iph = (struct iphdr *)skb->data;
2509 tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr)); 2279 tcph = (struct tcphdr *)(skb->data + sizeof(struct iphdr));
@@ -2521,8 +2291,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2521 2291
2522 do { 2292 do {
2523 cm_node = find_node(cm_core, 2293 cm_node = find_node(cm_core,
2524 nfo.rem_port, nfo.rem_addr, 2294 nfo.rem_port, nfo.rem_addr,
2525 nfo.loc_port, nfo.loc_addr); 2295 nfo.loc_port, nfo.loc_addr);
2526 2296
2527 if (!cm_node) { 2297 if (!cm_node) {
2528 /* Only type of packet accepted are for */ 2298 /* Only type of packet accepted are for */
@@ -2532,8 +2302,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2532 break; 2302 break;
2533 } 2303 }
2534 listener = find_listener(cm_core, nfo.loc_addr, 2304 listener = find_listener(cm_core, nfo.loc_addr,
2535 nfo.loc_port, 2305 nfo.loc_port,
2536 NES_CM_LISTENER_ACTIVE_STATE); 2306 NES_CM_LISTENER_ACTIVE_STATE);
2537 if (!listener) { 2307 if (!listener) {
2538 nfo.cm_id = NULL; 2308 nfo.cm_id = NULL;
2539 nfo.conn_type = 0; 2309 nfo.conn_type = 0;
@@ -2544,10 +2314,10 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2544 nfo.cm_id = listener->cm_id; 2314 nfo.cm_id = listener->cm_id;
2545 nfo.conn_type = listener->conn_type; 2315 nfo.conn_type = listener->conn_type;
2546 cm_node = make_cm_node(cm_core, nesvnic, &nfo, 2316 cm_node = make_cm_node(cm_core, nesvnic, &nfo,
2547 listener); 2317 listener);
2548 if (!cm_node) { 2318 if (!cm_node) {
2549 nes_debug(NES_DBG_CM, "Unable to allocate " 2319 nes_debug(NES_DBG_CM, "Unable to allocate "
2550 "node\n"); 2320 "node\n");
2551 cm_packets_dropped++; 2321 cm_packets_dropped++;
2552 atomic_dec(&listener->ref_count); 2322 atomic_dec(&listener->ref_count);
2553 dev_kfree_skb_any(skb); 2323 dev_kfree_skb_any(skb);
@@ -2563,13 +2333,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
2563 } 2333 }
2564 add_ref_cm_node(cm_node); 2334 add_ref_cm_node(cm_node);
2565 } else if (cm_node->state == NES_CM_STATE_TSA) { 2335 } else if (cm_node->state == NES_CM_STATE_TSA) {
2566 if (cm_node->nesqp->pau_mode) 2336 rem_ref_cm_node(cm_core, cm_node);
2567 nes_queue_mgt_skbs(skb, nesvnic, cm_node->nesqp); 2337 atomic_inc(&cm_accel_dropped_pkts);
2568 else { 2338 dev_kfree_skb_any(skb);
2569 rem_ref_cm_node(cm_core, cm_node);
2570 atomic_inc(&cm_accel_dropped_pkts);
2571 dev_kfree_skb_any(skb);
2572 }
2573 break; 2339 break;
2574 } 2340 }
2575 skb_reset_network_header(skb); 2341 skb_reset_network_header(skb);
@@ -2599,7 +2365,7 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
2599 init_timer(&cm_core->tcp_timer); 2365 init_timer(&cm_core->tcp_timer);
2600 cm_core->tcp_timer.function = nes_cm_timer_tick; 2366 cm_core->tcp_timer.function = nes_cm_timer_tick;
2601 2367
2602 cm_core->mtu = NES_CM_DEFAULT_MTU; 2368 cm_core->mtu = NES_CM_DEFAULT_MTU;
2603 cm_core->state = NES_CM_STATE_INITED; 2369 cm_core->state = NES_CM_STATE_INITED;
2604 cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS; 2370 cm_core->free_tx_pkt_max = NES_CM_DEFAULT_FREE_PKTS;
2605 2371
@@ -2637,8 +2403,9 @@ static int mini_cm_dealloc_core(struct nes_cm_core *cm_core)
2637 2403
2638 barrier(); 2404 barrier();
2639 2405
2640 if (timer_pending(&cm_core->tcp_timer)) 2406 if (timer_pending(&cm_core->tcp_timer)) {
2641 del_timer(&cm_core->tcp_timer); 2407 del_timer(&cm_core->tcp_timer);
2408 }
2642 2409
2643 destroy_workqueue(cm_core->event_wq); 2410 destroy_workqueue(cm_core->event_wq);
2644 destroy_workqueue(cm_core->disconn_wq); 2411 destroy_workqueue(cm_core->disconn_wq);
@@ -2693,8 +2460,8 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
2693 return -EINVAL; 2460 return -EINVAL;
2694 2461
2695 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 | 2462 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_IPV4 |
2696 NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG | 2463 NES_QPCONTEXT_MISC_NO_NAGLE | NES_QPCONTEXT_MISC_DO_NOT_FRAG |
2697 NES_QPCONTEXT_MISC_DROS); 2464 NES_QPCONTEXT_MISC_DROS);
2698 2465
2699 if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale) 2466 if (cm_node->tcp_cntxt.snd_wscale || cm_node->tcp_cntxt.rcv_wscale)
2700 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE); 2467 nesqp->nesqp_context->misc |= cpu_to_le32(NES_QPCONTEXT_MISC_WSCALE);
@@ -2704,15 +2471,15 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
2704 nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16); 2471 nesqp->nesqp_context->mss |= cpu_to_le32(((u32)cm_node->tcp_cntxt.mss) << 16);
2705 2472
2706 nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32( 2473 nesqp->nesqp_context->tcp_state_flow_label |= cpu_to_le32(
2707 (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT); 2474 (u32)NES_QPCONTEXT_TCPSTATE_EST << NES_QPCONTEXT_TCPFLOW_TCP_STATE_SHIFT);
2708 2475
2709 nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( 2476 nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
2710 (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) & 2477 (cm_node->tcp_cntxt.snd_wscale << NES_QPCONTEXT_PDWSCALE_SND_WSCALE_SHIFT) &
2711 NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK); 2478 NES_QPCONTEXT_PDWSCALE_SND_WSCALE_MASK);
2712 2479
2713 nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32( 2480 nesqp->nesqp_context->pd_index_wscale |= cpu_to_le32(
2714 (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) & 2481 (cm_node->tcp_cntxt.rcv_wscale << NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_SHIFT) &
2715 NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK); 2482 NES_QPCONTEXT_PDWSCALE_RCV_WSCALE_MASK);
2716 2483
2717 nesqp->nesqp_context->keepalive = cpu_to_le32(0x80); 2484 nesqp->nesqp_context->keepalive = cpu_to_le32(0x80);
2718 nesqp->nesqp_context->ts_recent = 0; 2485 nesqp->nesqp_context->ts_recent = 0;
@@ -2721,24 +2488,24 @@ static int nes_cm_init_tsa_conn(struct nes_qp *nesqp, struct nes_cm_node *cm_nod
2721 nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd); 2488 nesqp->nesqp_context->snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.snd_wnd);
2722 nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); 2489 nesqp->nesqp_context->rcv_nxt = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
2723 nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd << 2490 nesqp->nesqp_context->rcv_wnd = cpu_to_le32(cm_node->tcp_cntxt.rcv_wnd <<
2724 cm_node->tcp_cntxt.rcv_wscale); 2491 cm_node->tcp_cntxt.rcv_wscale);
2725 nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); 2492 nesqp->nesqp_context->snd_max = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
2726 nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); 2493 nesqp->nesqp_context->snd_una = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
2727 nesqp->nesqp_context->srtt = 0; 2494 nesqp->nesqp_context->srtt = 0;
2728 nesqp->nesqp_context->rttvar = cpu_to_le32(0x6); 2495 nesqp->nesqp_context->rttvar = cpu_to_le32(0x6);
2729 nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000); 2496 nesqp->nesqp_context->ssthresh = cpu_to_le32(0x3FFFC000);
2730 nesqp->nesqp_context->cwnd = cpu_to_le32(2 * cm_node->tcp_cntxt.mss); 2497 nesqp->nesqp_context->cwnd = cpu_to_le32(2*cm_node->tcp_cntxt.mss);
2731 nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt); 2498 nesqp->nesqp_context->snd_wl1 = cpu_to_le32(cm_node->tcp_cntxt.rcv_nxt);
2732 nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num); 2499 nesqp->nesqp_context->snd_wl2 = cpu_to_le32(cm_node->tcp_cntxt.loc_seq_num);
2733 nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd); 2500 nesqp->nesqp_context->max_snd_wnd = cpu_to_le32(cm_node->tcp_cntxt.max_snd_wnd);
2734 2501
2735 nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X," 2502 nes_debug(NES_DBG_CM, "QP%u: rcv_nxt = 0x%08X, snd_nxt = 0x%08X,"
2736 " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n", 2503 " Setting MSS to %u, PDWscale = 0x%08X, rcv_wnd = %u, context misc = 0x%08X.\n",
2737 nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt), 2504 nesqp->hwqp.qp_id, le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
2738 le32_to_cpu(nesqp->nesqp_context->snd_nxt), 2505 le32_to_cpu(nesqp->nesqp_context->snd_nxt),
2739 cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale), 2506 cm_node->tcp_cntxt.mss, le32_to_cpu(nesqp->nesqp_context->pd_index_wscale),
2740 le32_to_cpu(nesqp->nesqp_context->rcv_wnd), 2507 le32_to_cpu(nesqp->nesqp_context->rcv_wnd),
2741 le32_to_cpu(nesqp->nesqp_context->misc)); 2508 le32_to_cpu(nesqp->nesqp_context->misc));
2742 nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd)); 2509 nes_debug(NES_DBG_CM, " snd_wnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->snd_wnd));
2743 nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd)); 2510 nes_debug(NES_DBG_CM, " snd_cwnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->cwnd));
2744 nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd)); 2511 nes_debug(NES_DBG_CM, " max_swnd = 0x%08X.\n", le32_to_cpu(nesqp->nesqp_context->max_snd_wnd));
@@ -2759,7 +2526,7 @@ int nes_cm_disconn(struct nes_qp *nesqp)
2759 2526
2760 work = kzalloc(sizeof *work, GFP_ATOMIC); 2527 work = kzalloc(sizeof *work, GFP_ATOMIC);
2761 if (!work) 2528 if (!work)
2762 return -ENOMEM; /* Timer will clean up */ 2529 return -ENOMEM; /* Timer will clean up */
2763 2530
2764 nes_add_ref(&nesqp->ibqp); 2531 nes_add_ref(&nesqp->ibqp);
2765 work->nesqp = nesqp; 2532 work->nesqp = nesqp;
@@ -2779,7 +2546,7 @@ static void nes_disconnect_worker(struct work_struct *work)
2779 2546
2780 kfree(dwork); 2547 kfree(dwork);
2781 nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n", 2548 nes_debug(NES_DBG_CM, "processing AEQE id 0x%04X for QP%u.\n",
2782 nesqp->last_aeq, nesqp->hwqp.qp_id); 2549 nesqp->last_aeq, nesqp->hwqp.qp_id);
2783 nes_cm_disconn_true(nesqp); 2550 nes_cm_disconn_true(nesqp);
2784 nes_rem_ref(&nesqp->ibqp); 2551 nes_rem_ref(&nesqp->ibqp);
2785} 2552}
@@ -2815,7 +2582,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2815 /* make sure we havent already closed this connection */ 2582 /* make sure we havent already closed this connection */
2816 if (!cm_id) { 2583 if (!cm_id) {
2817 nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n", 2584 nes_debug(NES_DBG_CM, "QP%u disconnect_worker cmid is NULL\n",
2818 nesqp->hwqp.qp_id); 2585 nesqp->hwqp.qp_id);
2819 spin_unlock_irqrestore(&nesqp->lock, flags); 2586 spin_unlock_irqrestore(&nesqp->lock, flags);
2820 return -1; 2587 return -1;
2821 } 2588 }
@@ -2824,14 +2591,13 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2824 nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id); 2591 nes_debug(NES_DBG_CM, "Disconnecting QP%u\n", nesqp->hwqp.qp_id);
2825 2592
2826 original_hw_tcp_state = nesqp->hw_tcp_state; 2593 original_hw_tcp_state = nesqp->hw_tcp_state;
2827 original_ibqp_state = nesqp->ibqp_state; 2594 original_ibqp_state = nesqp->ibqp_state;
2828 last_ae = nesqp->last_aeq; 2595 last_ae = nesqp->last_aeq;
2829 2596
2830 if (nesqp->term_flags) { 2597 if (nesqp->term_flags) {
2831 issue_disconn = 1; 2598 issue_disconn = 1;
2832 issue_close = 1; 2599 issue_close = 1;
2833 nesqp->cm_id = NULL; 2600 nesqp->cm_id = NULL;
2834 del_timer(&nesqp->terminate_timer);
2835 if (nesqp->flush_issued == 0) { 2601 if (nesqp->flush_issued == 0) {
2836 nesqp->flush_issued = 1; 2602 nesqp->flush_issued = 1;
2837 issue_flush = 1; 2603 issue_flush = 1;
@@ -2868,8 +2634,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2868 ibevent.device = nesqp->ibqp.device; 2634 ibevent.device = nesqp->ibqp.device;
2869 ibevent.event = nesqp->terminate_eventtype; 2635 ibevent.event = nesqp->terminate_eventtype;
2870 ibevent.element.qp = &nesqp->ibqp; 2636 ibevent.element.qp = &nesqp->ibqp;
2871 if (nesqp->ibqp.event_handler) 2637 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
2872 nesqp->ibqp.event_handler(&ibevent, nesqp->ibqp.qp_context);
2873 } 2638 }
2874 } 2639 }
2875 2640
@@ -2884,16 +2649,16 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2884 cm_event.private_data_len = 0; 2649 cm_event.private_data_len = 0;
2885 2650
2886 nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event" 2651 nes_debug(NES_DBG_CM, "Generating a CM Disconnect Event"
2887 " for QP%u, SQ Head = %u, SQ Tail = %u. " 2652 " for QP%u, SQ Head = %u, SQ Tail = %u. "
2888 "cm_id = %p, refcount = %u.\n", 2653 "cm_id = %p, refcount = %u.\n",
2889 nesqp->hwqp.qp_id, nesqp->hwqp.sq_head, 2654 nesqp->hwqp.qp_id, nesqp->hwqp.sq_head,
2890 nesqp->hwqp.sq_tail, cm_id, 2655 nesqp->hwqp.sq_tail, cm_id,
2891 atomic_read(&nesqp->refcount)); 2656 atomic_read(&nesqp->refcount));
2892 2657
2893 ret = cm_id->event_handler(cm_id, &cm_event); 2658 ret = cm_id->event_handler(cm_id, &cm_event);
2894 if (ret) 2659 if (ret)
2895 nes_debug(NES_DBG_CM, "OFA CM event_handler " 2660 nes_debug(NES_DBG_CM, "OFA CM event_handler "
2896 "returned, ret=%d\n", ret); 2661 "returned, ret=%d\n", ret);
2897 } 2662 }
2898 2663
2899 if (issue_close) { 2664 if (issue_close) {
@@ -2911,8 +2676,9 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
2911 cm_event.private_data_len = 0; 2676 cm_event.private_data_len = 0;
2912 2677
2913 ret = cm_id->event_handler(cm_id, &cm_event); 2678 ret = cm_id->event_handler(cm_id, &cm_event);
2914 if (ret) 2679 if (ret) {
2915 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); 2680 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
2681 }
2916 2682
2917 cm_id->rem_ref(cm_id); 2683 cm_id->rem_ref(cm_id);
2918 } 2684 }
@@ -2952,8 +2718,8 @@ static int nes_disconnect(struct nes_qp *nesqp, int abrupt)
2952 if (nesqp->lsmm_mr) 2718 if (nesqp->lsmm_mr)
2953 nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr); 2719 nesibdev->ibdev.dereg_mr(nesqp->lsmm_mr);
2954 pci_free_consistent(nesdev->pcidev, 2720 pci_free_consistent(nesdev->pcidev,
2955 nesqp->private_data_len + nesqp->ietf_frame_size, 2721 nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
2956 nesqp->ietf_frame, nesqp->ietf_frame_pbase); 2722 nesqp->ietf_frame, nesqp->ietf_frame_pbase);
2957 } 2723 }
2958 } 2724 }
2959 2725
@@ -2992,12 +2758,6 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
2992 struct ib_phys_buf ibphysbuf; 2758 struct ib_phys_buf ibphysbuf;
2993 struct nes_pd *nespd; 2759 struct nes_pd *nespd;
2994 u64 tagged_offset; 2760 u64 tagged_offset;
2995 u8 mpa_frame_offset = 0;
2996 struct ietf_mpa_v2 *mpa_v2_frame;
2997 u8 start_addr = 0;
2998 u8 *start_ptr = &start_addr;
2999 u8 **start_buff = &start_ptr;
3000 u16 buff_len = 0;
3001 2761
3002 ibqp = nes_get_qp(cm_id->device, conn_param->qpn); 2762 ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
3003 if (!ibqp) 2763 if (!ibqp)
@@ -3038,49 +2798,53 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3038 nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n", 2798 nes_debug(NES_DBG_CM, "netdev refcnt = %u.\n",
3039 netdev_refcnt_read(nesvnic->netdev)); 2799 netdev_refcnt_read(nesvnic->netdev));
3040 2800
3041 nesqp->ietf_frame_size = sizeof(struct ietf_mpa_v2);
3042 /* allocate the ietf frame and space for private data */ 2801 /* allocate the ietf frame and space for private data */
3043 nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev, 2802 nesqp->ietf_frame = pci_alloc_consistent(nesdev->pcidev,
3044 nesqp->ietf_frame_size + conn_param->private_data_len, 2803 sizeof(struct ietf_mpa_frame) + conn_param->private_data_len,
3045 &nesqp->ietf_frame_pbase); 2804 &nesqp->ietf_frame_pbase);
3046 2805
3047 if (!nesqp->ietf_frame) { 2806 if (!nesqp->ietf_frame) {
3048 nes_debug(NES_DBG_CM, "Unable to allocate memory for private data\n"); 2807 nes_debug(NES_DBG_CM, "Unable to allocate memory for private "
2808 "data\n");
3049 return -ENOMEM; 2809 return -ENOMEM;
3050 } 2810 }
3051 mpa_v2_frame = (struct ietf_mpa_v2 *)nesqp->ietf_frame;
3052
3053 if (cm_node->mpa_frame_rev == IETF_MPA_V1)
3054 mpa_frame_offset = 4;
3055 2811
3056 memcpy(mpa_v2_frame->priv_data, conn_param->private_data,
3057 conn_param->private_data_len);
3058 2812
3059 cm_build_mpa_frame(cm_node, start_buff, &buff_len, nesqp->ietf_frame, MPA_KEY_REPLY); 2813 /* setup the MPA frame */
3060 nesqp->private_data_len = conn_param->private_data_len; 2814 nesqp->private_data_len = conn_param->private_data_len;
2815 memcpy(nesqp->ietf_frame->key, IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
2816
2817 memcpy(nesqp->ietf_frame->priv_data, conn_param->private_data,
2818 conn_param->private_data_len);
2819
2820 nesqp->ietf_frame->priv_data_len =
2821 cpu_to_be16(conn_param->private_data_len);
2822 nesqp->ietf_frame->rev = mpa_version;
2823 nesqp->ietf_frame->flags = IETF_MPA_FLAGS_CRC;
3061 2824
3062 /* setup our first outgoing iWarp send WQE (the IETF frame response) */ 2825 /* setup our first outgoing iWarp send WQE (the IETF frame response) */
3063 wqe = &nesqp->hwqp.sq_vbase[0]; 2826 wqe = &nesqp->hwqp.sq_vbase[0];
3064 2827
3065 if (cm_id->remote_addr.sin_addr.s_addr != 2828 if (cm_id->remote_addr.sin_addr.s_addr !=
3066 cm_id->local_addr.sin_addr.s_addr) { 2829 cm_id->local_addr.sin_addr.s_addr) {
3067 u64temp = (unsigned long)nesqp; 2830 u64temp = (unsigned long)nesqp;
3068 nesibdev = nesvnic->nesibdev; 2831 nesibdev = nesvnic->nesibdev;
3069 nespd = nesqp->nespd; 2832 nespd = nesqp->nespd;
3070 ibphysbuf.addr = nesqp->ietf_frame_pbase + mpa_frame_offset; 2833 ibphysbuf.addr = nesqp->ietf_frame_pbase;
3071 ibphysbuf.size = buff_len; 2834 ibphysbuf.size = conn_param->private_data_len +
3072 tagged_offset = (u64)(unsigned long)*start_buff; 2835 sizeof(struct ietf_mpa_frame);
2836 tagged_offset = (u64)(unsigned long)nesqp->ietf_frame;
3073 ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd, 2837 ibmr = nesibdev->ibdev.reg_phys_mr((struct ib_pd *)nespd,
3074 &ibphysbuf, 1, 2838 &ibphysbuf, 1,
3075 IB_ACCESS_LOCAL_WRITE, 2839 IB_ACCESS_LOCAL_WRITE,
3076 &tagged_offset); 2840 &tagged_offset);
3077 if (!ibmr) { 2841 if (!ibmr) {
3078 nes_debug(NES_DBG_CM, "Unable to register memory region" 2842 nes_debug(NES_DBG_CM, "Unable to register memory region"
3079 "for lSMM for cm_node = %p \n", 2843 "for lSMM for cm_node = %p \n",
3080 cm_node); 2844 cm_node);
3081 pci_free_consistent(nesdev->pcidev, 2845 pci_free_consistent(nesdev->pcidev,
3082 nesqp->private_data_len + nesqp->ietf_frame_size, 2846 nesqp->private_data_len+sizeof(struct ietf_mpa_frame),
3083 nesqp->ietf_frame, nesqp->ietf_frame_pbase); 2847 nesqp->ietf_frame, nesqp->ietf_frame_pbase);
3084 return -ENOMEM; 2848 return -ENOMEM;
3085 } 2849 }
3086 2850
@@ -3088,20 +2852,22 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3088 ibmr->device = nespd->ibpd.device; 2852 ibmr->device = nespd->ibpd.device;
3089 nesqp->lsmm_mr = ibmr; 2853 nesqp->lsmm_mr = ibmr;
3090 2854
3091 u64temp |= NES_SW_CONTEXT_ALIGN >> 1; 2855 u64temp |= NES_SW_CONTEXT_ALIGN>>1;
3092 set_wqe_64bit_value(wqe->wqe_words, 2856 set_wqe_64bit_value(wqe->wqe_words,
3093 NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, 2857 NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX,
3094 u64temp); 2858 u64temp);
3095 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] = 2859 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
3096 cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING | 2860 cpu_to_le32(NES_IWARP_SQ_WQE_STREAMING |
3097 NES_IWARP_SQ_WQE_WRPDU); 2861 NES_IWARP_SQ_WQE_WRPDU);
3098 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 2862 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] =
3099 cpu_to_le32(buff_len); 2863 cpu_to_le32(conn_param->private_data_len +
2864 sizeof(struct ietf_mpa_frame));
3100 set_wqe_64bit_value(wqe->wqe_words, 2865 set_wqe_64bit_value(wqe->wqe_words,
3101 NES_IWARP_SQ_WQE_FRAG0_LOW_IDX, 2866 NES_IWARP_SQ_WQE_FRAG0_LOW_IDX,
3102 (u64)(unsigned long)(*start_buff)); 2867 (u64)(unsigned long)nesqp->ietf_frame);
3103 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 2868 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] =
3104 cpu_to_le32(buff_len); 2869 cpu_to_le32(conn_param->private_data_len +
2870 sizeof(struct ietf_mpa_frame));
3105 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey; 2871 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = ibmr->lkey;
3106 if (nesqp->sq_kmapped) { 2872 if (nesqp->sq_kmapped) {
3107 nesqp->sq_kmapped = 0; 2873 nesqp->sq_kmapped = 0;
@@ -3110,7 +2876,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3110 2876
3111 nesqp->nesqp_context->ird_ord_sizes |= 2877 nesqp->nesqp_context->ird_ord_sizes |=
3112 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT | 2878 cpu_to_le32(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
3113 NES_QPCONTEXT_ORDIRD_WRPDU); 2879 NES_QPCONTEXT_ORDIRD_WRPDU);
3114 } else { 2880 } else {
3115 nesqp->nesqp_context->ird_ord_sizes |= 2881 nesqp->nesqp_context->ird_ord_sizes |=
3116 cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU); 2882 cpu_to_le32(NES_QPCONTEXT_ORDIRD_WRPDU);
@@ -3124,11 +2890,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3124 2890
3125 /* nesqp->cm_node = (void *)cm_id->provider_data; */ 2891 /* nesqp->cm_node = (void *)cm_id->provider_data; */
3126 cm_id->provider_data = nesqp; 2892 cm_id->provider_data = nesqp;
3127 nesqp->active_conn = 0; 2893 nesqp->active_conn = 0;
3128 2894
3129 if (cm_node->state == NES_CM_STATE_TSA) 2895 if (cm_node->state == NES_CM_STATE_TSA)
3130 nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n", 2896 nes_debug(NES_DBG_CM, "Already state = TSA for cm_node=%p\n",
3131 cm_node); 2897 cm_node);
3132 2898
3133 nes_cm_init_tsa_conn(nesqp, cm_node); 2899 nes_cm_init_tsa_conn(nesqp, cm_node);
3134 2900
@@ -3137,17 +2903,21 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3137 nesqp->nesqp_context->tcpPorts[1] = 2903 nesqp->nesqp_context->tcpPorts[1] =
3138 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); 2904 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
3139 2905
3140 nesqp->nesqp_context->ip0 = 2906 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
2907 nesqp->nesqp_context->ip0 =
2908 cpu_to_le32(ntohl(nesvnic->local_ipaddr));
2909 else
2910 nesqp->nesqp_context->ip0 =
3141 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); 2911 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
3142 2912
3143 nesqp->nesqp_context->misc2 |= cpu_to_le32( 2913 nesqp->nesqp_context->misc2 |= cpu_to_le32(
3144 (u32)PCI_FUNC(nesdev->pcidev->devfn) << 2914 (u32)PCI_FUNC(nesdev->pcidev->devfn) <<
3145 NES_QPCONTEXT_MISC2_SRC_IP_SHIFT); 2915 NES_QPCONTEXT_MISC2_SRC_IP_SHIFT);
3146 2916
3147 nesqp->nesqp_context->arp_index_vlan |= 2917 nesqp->nesqp_context->arp_index_vlan |=
3148 cpu_to_le32(nes_arp_table(nesdev, 2918 cpu_to_le32(nes_arp_table(nesdev,
3149 le32_to_cpu(nesqp->nesqp_context->ip0), NULL, 2919 le32_to_cpu(nesqp->nesqp_context->ip0), NULL,
3150 NES_ARP_RESOLVE) << 16); 2920 NES_ARP_RESOLVE) << 16);
3151 2921
3152 nesqp->nesqp_context->ts_val_delta = cpu_to_le32( 2922 nesqp->nesqp_context->ts_val_delta = cpu_to_le32(
3153 jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW)); 2923 jiffies - nes_read_indexed(nesdev, NES_IDX_TCP_NOW));
@@ -3162,7 +2932,10 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3162 memset(&nes_quad, 0, sizeof(nes_quad)); 2932 memset(&nes_quad, 0, sizeof(nes_quad));
3163 nes_quad.DstIpAdrIndex = 2933 nes_quad.DstIpAdrIndex =
3164 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 2934 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3165 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; 2935 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
2936 nes_quad.SrcIpadr = nesvnic->local_ipaddr;
2937 else
2938 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3166 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; 2939 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
3167 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; 2940 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
3168 2941
@@ -3170,7 +2943,7 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3170 crc_value = get_crc_value(&nes_quad); 2943 crc_value = get_crc_value(&nes_quad);
3171 nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); 2944 nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
3172 nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n", 2945 nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, CRC = 0x%08X\n",
3173 nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask); 2946 nesqp->hte_index, nesqp->hte_index & adapter->hte_index_mask);
3174 2947
3175 nesqp->hte_index &= adapter->hte_index_mask; 2948 nesqp->hte_index &= adapter->hte_index_mask;
3176 nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); 2949 nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
@@ -3178,15 +2951,17 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3178 cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); 2951 cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
3179 2952
3180 nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = " 2953 nes_debug(NES_DBG_CM, "QP%u, Destination IP = 0x%08X:0x%04X, local = "
3181 "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + " 2954 "0x%08X:0x%04X, rcv_nxt=0x%08X, snd_nxt=0x%08X, mpa + "
3182 "private data length=%u.\n", nesqp->hwqp.qp_id, 2955 "private data length=%zu.\n", nesqp->hwqp.qp_id,
3183 ntohl(cm_id->remote_addr.sin_addr.s_addr), 2956 ntohl(cm_id->remote_addr.sin_addr.s_addr),
3184 ntohs(cm_id->remote_addr.sin_port), 2957 ntohs(cm_id->remote_addr.sin_port),
3185 ntohl(cm_id->local_addr.sin_addr.s_addr), 2958 ntohl(cm_id->local_addr.sin_addr.s_addr),
3186 ntohs(cm_id->local_addr.sin_port), 2959 ntohs(cm_id->local_addr.sin_port),
3187 le32_to_cpu(nesqp->nesqp_context->rcv_nxt), 2960 le32_to_cpu(nesqp->nesqp_context->rcv_nxt),
3188 le32_to_cpu(nesqp->nesqp_context->snd_nxt), 2961 le32_to_cpu(nesqp->nesqp_context->snd_nxt),
3189 buff_len); 2962 conn_param->private_data_len +
2963 sizeof(struct ietf_mpa_frame));
2964
3190 2965
3191 /* notify OF layer that accept event was successful */ 2966 /* notify OF layer that accept event was successful */
3192 cm_id->add_ref(cm_id); 2967 cm_id->add_ref(cm_id);
@@ -3207,12 +2982,12 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3207 nesqp->private_data_len; 2982 nesqp->private_data_len;
3208 /* copy entire MPA frame to our cm_node's frame */ 2983 /* copy entire MPA frame to our cm_node's frame */
3209 memcpy(cm_node->loopbackpartner->mpa_frame_buf, 2984 memcpy(cm_node->loopbackpartner->mpa_frame_buf,
3210 conn_param->private_data, conn_param->private_data_len); 2985 nesqp->ietf_frame->priv_data, nesqp->private_data_len);
3211 create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED); 2986 create_event(cm_node->loopbackpartner, NES_CM_EVENT_CONNECTED);
3212 } 2987 }
3213 if (ret) 2988 if (ret)
3214 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " 2989 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
3215 "ret=%d\n", __func__, __LINE__, ret); 2990 "ret=%d\n", __func__, __LINE__, ret);
3216 2991
3217 return 0; 2992 return 0;
3218} 2993}
@@ -3225,28 +3000,34 @@ int nes_reject(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3225{ 3000{
3226 struct nes_cm_node *cm_node; 3001 struct nes_cm_node *cm_node;
3227 struct nes_cm_node *loopback; 3002 struct nes_cm_node *loopback;
3003
3228 struct nes_cm_core *cm_core; 3004 struct nes_cm_core *cm_core;
3229 u8 *start_buff;
3230 3005
3231 atomic_inc(&cm_rejects); 3006 atomic_inc(&cm_rejects);
3232 cm_node = (struct nes_cm_node *)cm_id->provider_data; 3007 cm_node = (struct nes_cm_node *) cm_id->provider_data;
3233 loopback = cm_node->loopbackpartner; 3008 loopback = cm_node->loopbackpartner;
3234 cm_core = cm_node->cm_core; 3009 cm_core = cm_node->cm_core;
3235 cm_node->cm_id = cm_id; 3010 cm_node->cm_id = cm_id;
3011 cm_node->mpa_frame_size = sizeof(struct ietf_mpa_frame) + pdata_len;
3236 3012
3237 if (pdata_len + sizeof(struct ietf_mpa_v2) > MAX_CM_BUFFER) 3013 if (cm_node->mpa_frame_size > MAX_CM_BUFFER)
3238 return -EINVAL; 3014 return -EINVAL;
3239 3015
3016 memcpy(&cm_node->mpa_frame.key[0], IEFT_MPA_KEY_REP, IETF_MPA_KEY_SIZE);
3240 if (loopback) { 3017 if (loopback) {
3241 memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len); 3018 memcpy(&loopback->mpa_frame.priv_data, pdata, pdata_len);
3242 loopback->mpa_frame.priv_data_len = pdata_len; 3019 loopback->mpa_frame.priv_data_len = pdata_len;
3243 loopback->mpa_frame_size = pdata_len; 3020 loopback->mpa_frame_size = sizeof(struct ietf_mpa_frame) +
3021 pdata_len;
3244 } else { 3022 } else {
3245 start_buff = &cm_node->mpa_frame_buf[0] + sizeof(struct ietf_mpa_v2); 3023 memcpy(&cm_node->mpa_frame.priv_data, pdata, pdata_len);
3246 cm_node->mpa_frame_size = pdata_len; 3024 cm_node->mpa_frame.priv_data_len = cpu_to_be16(pdata_len);
3247 memcpy(start_buff, pdata, pdata_len);
3248 } 3025 }
3249 return cm_core->api->reject(cm_core, cm_node); 3026
3027 cm_node->mpa_frame.rev = mpa_version;
3028 cm_node->mpa_frame.flags = IETF_MPA_FLAGS_CRC | IETF_MPA_FLAGS_REJECT;
3029
3030 return cm_core->api->reject(cm_core, &cm_node->mpa_frame, cm_node);
3250} 3031}
3251 3032
3252 3033
@@ -3273,7 +3054,7 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3273 nesvnic = to_nesvnic(nesqp->ibqp.device); 3054 nesvnic = to_nesvnic(nesqp->ibqp.device);
3274 if (!nesvnic) 3055 if (!nesvnic)
3275 return -EINVAL; 3056 return -EINVAL;
3276 nesdev = nesvnic->nesdev; 3057 nesdev = nesvnic->nesdev;
3277 if (!nesdev) 3058 if (!nesdev)
3278 return -EINVAL; 3059 return -EINVAL;
3279 3060
@@ -3281,12 +3062,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3281 return -EINVAL; 3062 return -EINVAL;
3282 3063
3283 nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = " 3064 nes_debug(NES_DBG_CM, "QP%u, current IP = 0x%08X, Destination IP = "
3284 "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id, 3065 "0x%08X:0x%04X, local = 0x%08X:0x%04X.\n", nesqp->hwqp.qp_id,
3285 ntohl(nesvnic->local_ipaddr), 3066 ntohl(nesvnic->local_ipaddr),
3286 ntohl(cm_id->remote_addr.sin_addr.s_addr), 3067 ntohl(cm_id->remote_addr.sin_addr.s_addr),
3287 ntohs(cm_id->remote_addr.sin_port), 3068 ntohs(cm_id->remote_addr.sin_port),
3288 ntohl(cm_id->local_addr.sin_addr.s_addr), 3069 ntohl(cm_id->local_addr.sin_addr.s_addr),
3289 ntohs(cm_id->local_addr.sin_port)); 3070 ntohs(cm_id->local_addr.sin_port));
3290 3071
3291 atomic_inc(&cm_connects); 3072 atomic_inc(&cm_connects);
3292 nesqp->active_conn = 1; 3073 nesqp->active_conn = 1;
@@ -3298,18 +3079,14 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3298 3079
3299 nesqp->private_data_len = conn_param->private_data_len; 3080 nesqp->private_data_len = conn_param->private_data_len;
3300 nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord); 3081 nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32((u32)conn_param->ord);
3301 /* space for rdma0 read msg */
3302 if (conn_param->ord == 0)
3303 nesqp->nesqp_context->ird_ord_sizes |= cpu_to_le32(1);
3304
3305 nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord); 3082 nes_debug(NES_DBG_CM, "requested ord = 0x%08X.\n", (u32)conn_param->ord);
3306 nes_debug(NES_DBG_CM, "mpa private data len =%u\n", 3083 nes_debug(NES_DBG_CM, "mpa private data len =%u\n",
3307 conn_param->private_data_len); 3084 conn_param->private_data_len);
3308 3085
3309 if (cm_id->local_addr.sin_addr.s_addr != 3086 if (cm_id->local_addr.sin_addr.s_addr !=
3310 cm_id->remote_addr.sin_addr.s_addr) { 3087 cm_id->remote_addr.sin_addr.s_addr) {
3311 nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), 3088 nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
3312 PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD); 3089 PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
3313 apbvt_set = 1; 3090 apbvt_set = 1;
3314 } 3091 }
3315 3092
@@ -3325,13 +3102,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3325 3102
3326 /* create a connect CM node connection */ 3103 /* create a connect CM node connection */
3327 cm_node = g_cm_core->api->connect(g_cm_core, nesvnic, 3104 cm_node = g_cm_core->api->connect(g_cm_core, nesvnic,
3328 conn_param->private_data_len, (void *)conn_param->private_data, 3105 conn_param->private_data_len, (void *)conn_param->private_data,
3329 &cm_info); 3106 &cm_info);
3330 if (!cm_node) { 3107 if (!cm_node) {
3331 if (apbvt_set) 3108 if (apbvt_set)
3332 nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port), 3109 nes_manage_apbvt(nesvnic, ntohs(cm_id->local_addr.sin_port),
3333 PCI_FUNC(nesdev->pcidev->devfn), 3110 PCI_FUNC(nesdev->pcidev->devfn),
3334 NES_MANAGE_APBVT_DEL); 3111 NES_MANAGE_APBVT_DEL);
3335 3112
3336 cm_id->rem_ref(cm_id); 3113 cm_id->rem_ref(cm_id);
3337 return -ENOMEM; 3114 return -ENOMEM;
@@ -3381,7 +3158,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3381 cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info); 3158 cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
3382 if (!cm_node) { 3159 if (!cm_node) {
3383 printk(KERN_ERR "%s[%u] Error returned from listen API call\n", 3160 printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
3384 __func__, __LINE__); 3161 __func__, __LINE__);
3385 return -ENOMEM; 3162 return -ENOMEM;
3386 } 3163 }
3387 3164
@@ -3389,12 +3166,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
3389 3166
3390 if (!cm_node->reused_node) { 3167 if (!cm_node->reused_node) {
3391 err = nes_manage_apbvt(nesvnic, 3168 err = nes_manage_apbvt(nesvnic,
3392 ntohs(cm_id->local_addr.sin_port), 3169 ntohs(cm_id->local_addr.sin_port),
3393 PCI_FUNC(nesvnic->nesdev->pcidev->devfn), 3170 PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
3394 NES_MANAGE_APBVT_ADD); 3171 NES_MANAGE_APBVT_ADD);
3395 if (err) { 3172 if (err) {
3396 printk(KERN_ERR "nes_manage_apbvt call returned %d.\n", 3173 printk(KERN_ERR "nes_manage_apbvt call returned %d.\n",
3397 err); 3174 err);
3398 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node); 3175 g_cm_core->api->stop_listener(g_cm_core, (void *)cm_node);
3399 return err; 3176 return err;
3400 } 3177 }
@@ -3431,13 +3208,13 @@ int nes_destroy_listen(struct iw_cm_id *cm_id)
3431int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice) 3208int nes_cm_recv(struct sk_buff *skb, struct net_device *netdevice)
3432{ 3209{
3433 int rc = 0; 3210 int rc = 0;
3434
3435 cm_packets_received++; 3211 cm_packets_received++;
3436 if ((g_cm_core) && (g_cm_core->api)) 3212 if ((g_cm_core) && (g_cm_core->api)) {
3437 rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb); 3213 rc = g_cm_core->api->recv_pkt(g_cm_core, netdev_priv(netdevice), skb);
3438 else 3214 } else {
3439 nes_debug(NES_DBG_CM, "Unable to process packet for CM," 3215 nes_debug(NES_DBG_CM, "Unable to process packet for CM,"
3440 " cm is not setup properly.\n"); 3216 " cm is not setup properly.\n");
3217 }
3441 3218
3442 return rc; 3219 return rc;
3443} 3220}
@@ -3452,10 +3229,11 @@ int nes_cm_start(void)
3452 nes_debug(NES_DBG_CM, "\n"); 3229 nes_debug(NES_DBG_CM, "\n");
3453 /* create the primary CM core, pass this handle to subsequent core inits */ 3230 /* create the primary CM core, pass this handle to subsequent core inits */
3454 g_cm_core = nes_cm_alloc_core(); 3231 g_cm_core = nes_cm_alloc_core();
3455 if (g_cm_core) 3232 if (g_cm_core) {
3456 return 0; 3233 return 0;
3457 else 3234 } else {
3458 return -ENOMEM; 3235 return -ENOMEM;
3236 }
3459} 3237}
3460 3238
3461 3239
@@ -3476,6 +3254,7 @@ int nes_cm_stop(void)
3476 */ 3254 */
3477static void cm_event_connected(struct nes_cm_event *event) 3255static void cm_event_connected(struct nes_cm_event *event)
3478{ 3256{
3257 u64 u64temp;
3479 struct nes_qp *nesqp; 3258 struct nes_qp *nesqp;
3480 struct nes_vnic *nesvnic; 3259 struct nes_vnic *nesvnic;
3481 struct nes_device *nesdev; 3260 struct nes_device *nesdev;
@@ -3484,6 +3263,7 @@ static void cm_event_connected(struct nes_cm_event *event)
3484 struct ib_qp_attr attr; 3263 struct ib_qp_attr attr;
3485 struct iw_cm_id *cm_id; 3264 struct iw_cm_id *cm_id;
3486 struct iw_cm_event cm_event; 3265 struct iw_cm_event cm_event;
3266 struct nes_hw_qp_wqe *wqe;
3487 struct nes_v4_quad nes_quad; 3267 struct nes_v4_quad nes_quad;
3488 u32 crc_value; 3268 u32 crc_value;
3489 int ret; 3269 int ret;
@@ -3497,16 +3277,17 @@ static void cm_event_connected(struct nes_cm_event *event)
3497 nesdev = nesvnic->nesdev; 3277 nesdev = nesvnic->nesdev;
3498 nesadapter = nesdev->nesadapter; 3278 nesadapter = nesdev->nesadapter;
3499 3279
3500 if (nesqp->destroyed) 3280 if (nesqp->destroyed) {
3501 return; 3281 return;
3282 }
3502 atomic_inc(&cm_connecteds); 3283 atomic_inc(&cm_connecteds);
3503 nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on" 3284 nes_debug(NES_DBG_CM, "QP%u attempting to connect to 0x%08X:0x%04X on"
3504 " local port 0x%04X. jiffies = %lu.\n", 3285 " local port 0x%04X. jiffies = %lu.\n",
3505 nesqp->hwqp.qp_id, 3286 nesqp->hwqp.qp_id,
3506 ntohl(cm_id->remote_addr.sin_addr.s_addr), 3287 ntohl(cm_id->remote_addr.sin_addr.s_addr),
3507 ntohs(cm_id->remote_addr.sin_port), 3288 ntohs(cm_id->remote_addr.sin_port),
3508 ntohs(cm_id->local_addr.sin_port), 3289 ntohs(cm_id->local_addr.sin_port),
3509 jiffies); 3290 jiffies);
3510 3291
3511 nes_cm_init_tsa_conn(nesqp, cm_node); 3292 nes_cm_init_tsa_conn(nesqp, cm_node);
3512 3293
@@ -3515,7 +3296,11 @@ static void cm_event_connected(struct nes_cm_event *event)
3515 cpu_to_le16(ntohs(cm_id->local_addr.sin_port)); 3296 cpu_to_le16(ntohs(cm_id->local_addr.sin_port));
3516 nesqp->nesqp_context->tcpPorts[1] = 3297 nesqp->nesqp_context->tcpPorts[1] =
3517 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port)); 3298 cpu_to_le16(ntohs(cm_id->remote_addr.sin_port));
3518 nesqp->nesqp_context->ip0 = 3299 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
3300 nesqp->nesqp_context->ip0 =
3301 cpu_to_le32(ntohl(nesvnic->local_ipaddr));
3302 else
3303 nesqp->nesqp_context->ip0 =
3519 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr)); 3304 cpu_to_le32(ntohl(cm_id->remote_addr.sin_addr.s_addr));
3520 3305
3521 nesqp->nesqp_context->misc2 |= cpu_to_le32( 3306 nesqp->nesqp_context->misc2 |= cpu_to_le32(
@@ -3533,18 +3318,49 @@ static void cm_event_connected(struct nes_cm_event *event)
3533 NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT); 3318 NES_QPCONTEXT_ORDIRD_IWARP_MODE_SHIFT);
3534 3319
3535 /* Adjust tail for not having a LSMM */ 3320 /* Adjust tail for not having a LSMM */
3536 /*nesqp->hwqp.sq_tail = 1;*/ 3321 nesqp->hwqp.sq_tail = 1;
3322
3323#if defined(NES_SEND_FIRST_WRITE)
3324 if (cm_node->send_write0) {
3325 nes_debug(NES_DBG_CM, "Sending first write.\n");
3326 wqe = &nesqp->hwqp.sq_vbase[0];
3327 u64temp = (unsigned long)nesqp;
3328 u64temp |= NES_SW_CONTEXT_ALIGN>>1;
3329 set_wqe_64bit_value(wqe->wqe_words,
3330 NES_IWARP_SQ_WQE_COMP_CTX_LOW_IDX, u64temp);
3331 wqe->wqe_words[NES_IWARP_SQ_WQE_MISC_IDX] =
3332 cpu_to_le32(NES_IWARP_SQ_OP_RDMAW);
3333 wqe->wqe_words[NES_IWARP_SQ_WQE_TOTAL_PAYLOAD_IDX] = 0;
3334 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_LOW_IDX] = 0;
3335 wqe->wqe_words[NES_IWARP_SQ_WQE_FRAG0_HIGH_IDX] = 0;
3336 wqe->wqe_words[NES_IWARP_SQ_WQE_LENGTH0_IDX] = 0;
3337 wqe->wqe_words[NES_IWARP_SQ_WQE_STAG0_IDX] = 0;
3537 3338
3538 build_rdma0_msg(cm_node, &nesqp); 3339 if (nesqp->sq_kmapped) {
3340 nesqp->sq_kmapped = 0;
3341 kunmap(nesqp->page);
3342 }
3539 3343
3540 nes_write32(nesdev->regs + NES_WQE_ALLOC, 3344 /* use the reserved spot on the WQ for the extra first WQE */
3541 (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id); 3345 nesqp->nesqp_context->ird_ord_sizes &=
3346 cpu_to_le32(~(NES_QPCONTEXT_ORDIRD_LSMM_PRESENT |
3347 NES_QPCONTEXT_ORDIRD_WRPDU |
3348 NES_QPCONTEXT_ORDIRD_ALSMM));
3349 nesqp->skip_lsmm = 1;
3350 nesqp->hwqp.sq_tail = 0;
3351 nes_write32(nesdev->regs + NES_WQE_ALLOC,
3352 (1 << 24) | 0x00800000 | nesqp->hwqp.qp_id);
3353 }
3354#endif
3542 3355
3543 memset(&nes_quad, 0, sizeof(nes_quad)); 3356 memset(&nes_quad, 0, sizeof(nes_quad));
3544 3357
3545 nes_quad.DstIpAdrIndex = 3358 nes_quad.DstIpAdrIndex =
3546 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24); 3359 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
3547 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr; 3360 if (ipv4_is_loopback(cm_id->remote_addr.sin_addr.s_addr))
3361 nes_quad.SrcIpadr = nesvnic->local_ipaddr;
3362 else
3363 nes_quad.SrcIpadr = cm_id->remote_addr.sin_addr.s_addr;
3548 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port; 3364 nes_quad.TcpPorts[0] = cm_id->remote_addr.sin_port;
3549 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port; 3365 nes_quad.TcpPorts[1] = cm_id->local_addr.sin_port;
3550 3366
@@ -3552,13 +3368,13 @@ static void cm_event_connected(struct nes_cm_event *event)
3552 crc_value = get_crc_value(&nes_quad); 3368 crc_value = get_crc_value(&nes_quad);
3553 nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff); 3369 nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
3554 nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n", 3370 nes_debug(NES_DBG_CM, "HTE Index = 0x%08X, After CRC = 0x%08X\n",
3555 nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask); 3371 nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
3556 3372
3557 nesqp->hte_index &= nesadapter->hte_index_mask; 3373 nesqp->hte_index &= nesadapter->hte_index_mask;
3558 nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index); 3374 nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
3559 3375
3560 nesqp->ietf_frame = &cm_node->mpa_frame; 3376 nesqp->ietf_frame = &cm_node->mpa_frame;
3561 nesqp->private_data_len = (u8)cm_node->mpa_frame_size; 3377 nesqp->private_data_len = (u8) cm_node->mpa_frame_size;
3562 cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node); 3378 cm_node->cm_core->api->accelerated(cm_node->cm_core, cm_node);
3563 3379
3564 /* notify OF layer we successfully created the requested connection */ 3380 /* notify OF layer we successfully created the requested connection */
@@ -3570,9 +3386,7 @@ static void cm_event_connected(struct nes_cm_event *event)
3570 cm_event.remote_addr = cm_id->remote_addr; 3386 cm_event.remote_addr = cm_id->remote_addr;
3571 3387
3572 cm_event.private_data = (void *)event->cm_node->mpa_frame_buf; 3388 cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
3573 cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size; 3389 cm_event.private_data_len = (u8) event->cm_node->mpa_frame_size;
3574 cm_event.ird = cm_node->ird_size;
3575 cm_event.ord = cm_node->ord_size;
3576 3390
3577 cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr; 3391 cm_event.local_addr.sin_addr.s_addr = event->cm_info.rem_addr;
3578 ret = cm_id->event_handler(cm_id, &cm_event); 3392 ret = cm_id->event_handler(cm_id, &cm_event);
@@ -3580,12 +3394,12 @@ static void cm_event_connected(struct nes_cm_event *event)
3580 3394
3581 if (ret) 3395 if (ret)
3582 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " 3396 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
3583 "ret=%d\n", __func__, __LINE__, ret); 3397 "ret=%d\n", __func__, __LINE__, ret);
3584 attr.qp_state = IB_QPS_RTS; 3398 attr.qp_state = IB_QPS_RTS;
3585 nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL); 3399 nes_modify_qp(&nesqp->ibqp, &attr, IB_QP_STATE, NULL);
3586 3400
3587 nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = " 3401 nes_debug(NES_DBG_CM, "Exiting connect thread for QP%u. jiffies = "
3588 "%lu\n", nesqp->hwqp.qp_id, jiffies); 3402 "%lu\n", nesqp->hwqp.qp_id, jiffies);
3589 3403
3590 return; 3404 return;
3591} 3405}
@@ -3606,14 +3420,16 @@ static void cm_event_connect_error(struct nes_cm_event *event)
3606 return; 3420 return;
3607 3421
3608 cm_id = event->cm_node->cm_id; 3422 cm_id = event->cm_node->cm_id;
3609 if (!cm_id) 3423 if (!cm_id) {
3610 return; 3424 return;
3425 }
3611 3426
3612 nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id); 3427 nes_debug(NES_DBG_CM, "cm_node=%p, cm_id=%p\n", event->cm_node, cm_id);
3613 nesqp = cm_id->provider_data; 3428 nesqp = cm_id->provider_data;
3614 3429
3615 if (!nesqp) 3430 if (!nesqp) {
3616 return; 3431 return;
3432 }
3617 3433
3618 /* notify OF layer about this connection error event */ 3434 /* notify OF layer about this connection error event */
3619 /* cm_id->rem_ref(cm_id); */ 3435 /* cm_id->rem_ref(cm_id); */
@@ -3628,14 +3444,14 @@ static void cm_event_connect_error(struct nes_cm_event *event)
3628 cm_event.private_data_len = 0; 3444 cm_event.private_data_len = 0;
3629 3445
3630 nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, " 3446 nes_debug(NES_DBG_CM, "call CM_EVENT REJECTED, local_addr=%08x, "
3631 "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr, 3447 "remove_addr=%08x\n", cm_event.local_addr.sin_addr.s_addr,
3632 cm_event.remote_addr.sin_addr.s_addr); 3448 cm_event.remote_addr.sin_addr.s_addr);
3633 3449
3634 ret = cm_id->event_handler(cm_id, &cm_event); 3450 ret = cm_id->event_handler(cm_id, &cm_event);
3635 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret); 3451 nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
3636 if (ret) 3452 if (ret)
3637 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, " 3453 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, "
3638 "ret=%d\n", __func__, __LINE__, ret); 3454 "ret=%d\n", __func__, __LINE__, ret);
3639 cm_id->rem_ref(cm_id); 3455 cm_id->rem_ref(cm_id);
3640 3456
3641 rem_ref_cm_node(event->cm_node->cm_core, event->cm_node); 3457 rem_ref_cm_node(event->cm_node->cm_core, event->cm_node);
@@ -3705,7 +3521,7 @@ static void cm_event_reset(struct nes_cm_event *event)
3705 */ 3521 */
3706static void cm_event_mpa_req(struct nes_cm_event *event) 3522static void cm_event_mpa_req(struct nes_cm_event *event)
3707{ 3523{
3708 struct iw_cm_id *cm_id; 3524 struct iw_cm_id *cm_id;
3709 struct iw_cm_event cm_event; 3525 struct iw_cm_event cm_event;
3710 int ret; 3526 int ret;
3711 struct nes_cm_node *cm_node; 3527 struct nes_cm_node *cm_node;
@@ -3717,7 +3533,7 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
3717 3533
3718 atomic_inc(&cm_connect_reqs); 3534 atomic_inc(&cm_connect_reqs);
3719 nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", 3535 nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
3720 cm_node, cm_id, jiffies); 3536 cm_node, cm_id, jiffies);
3721 3537
3722 cm_event.event = IW_CM_EVENT_CONNECT_REQUEST; 3538 cm_event.event = IW_CM_EVENT_CONNECT_REQUEST;
3723 cm_event.status = 0; 3539 cm_event.status = 0;
@@ -3731,21 +3547,19 @@ static void cm_event_mpa_req(struct nes_cm_event *event)
3731 cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port); 3547 cm_event.remote_addr.sin_port = htons(event->cm_info.rem_port);
3732 cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); 3548 cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
3733 cm_event.private_data = cm_node->mpa_frame_buf; 3549 cm_event.private_data = cm_node->mpa_frame_buf;
3734 cm_event.private_data_len = (u8)cm_node->mpa_frame_size; 3550 cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
3735 cm_event.ird = cm_node->ird_size;
3736 cm_event.ord = cm_node->ord_size;
3737 3551
3738 ret = cm_id->event_handler(cm_id, &cm_event); 3552 ret = cm_id->event_handler(cm_id, &cm_event);
3739 if (ret) 3553 if (ret)
3740 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", 3554 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
3741 __func__, __LINE__, ret); 3555 __func__, __LINE__, ret);
3742 return; 3556 return;
3743} 3557}
3744 3558
3745 3559
3746static void cm_event_mpa_reject(struct nes_cm_event *event) 3560static void cm_event_mpa_reject(struct nes_cm_event *event)
3747{ 3561{
3748 struct iw_cm_id *cm_id; 3562 struct iw_cm_id *cm_id;
3749 struct iw_cm_event cm_event; 3563 struct iw_cm_event cm_event;
3750 struct nes_cm_node *cm_node; 3564 struct nes_cm_node *cm_node;
3751 int ret; 3565 int ret;
@@ -3757,7 +3571,7 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
3757 3571
3758 atomic_inc(&cm_connect_reqs); 3572 atomic_inc(&cm_connect_reqs);
3759 nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n", 3573 nes_debug(NES_DBG_CM, "cm_node = %p - cm_id = %p, jiffies = %lu\n",
3760 cm_node, cm_id, jiffies); 3574 cm_node, cm_id, jiffies);
3761 3575
3762 cm_event.event = IW_CM_EVENT_CONNECT_REPLY; 3576 cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
3763 cm_event.status = -ECONNREFUSED; 3577 cm_event.status = -ECONNREFUSED;
@@ -3772,17 +3586,17 @@ static void cm_event_mpa_reject(struct nes_cm_event *event)
3772 cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr); 3586 cm_event.remote_addr.sin_addr.s_addr = htonl(event->cm_info.rem_addr);
3773 3587
3774 cm_event.private_data = cm_node->mpa_frame_buf; 3588 cm_event.private_data = cm_node->mpa_frame_buf;
3775 cm_event.private_data_len = (u8)cm_node->mpa_frame_size; 3589 cm_event.private_data_len = (u8) cm_node->mpa_frame_size;
3776 3590
3777 nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, " 3591 nes_debug(NES_DBG_CM, "call CM_EVENT_MPA_REJECTED, local_addr=%08x, "
3778 "remove_addr=%08x\n", 3592 "remove_addr=%08x\n",
3779 cm_event.local_addr.sin_addr.s_addr, 3593 cm_event.local_addr.sin_addr.s_addr,
3780 cm_event.remote_addr.sin_addr.s_addr); 3594 cm_event.remote_addr.sin_addr.s_addr);
3781 3595
3782 ret = cm_id->event_handler(cm_id, &cm_event); 3596 ret = cm_id->event_handler(cm_id, &cm_event);
3783 if (ret) 3597 if (ret)
3784 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n", 3598 printk(KERN_ERR "%s[%u] OFA CM event_handler returned, ret=%d\n",
3785 __func__, __LINE__, ret); 3599 __func__, __LINE__, ret);
3786 3600
3787 return; 3601 return;
3788} 3602}
@@ -3801,7 +3615,7 @@ static int nes_cm_post_event(struct nes_cm_event *event)
3801 event->cm_info.cm_id->add_ref(event->cm_info.cm_id); 3615 event->cm_info.cm_id->add_ref(event->cm_info.cm_id);
3802 INIT_WORK(&event->event_work, nes_cm_event_handler); 3616 INIT_WORK(&event->event_work, nes_cm_event_handler);
3803 nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n", 3617 nes_debug(NES_DBG_CM, "cm_node=%p queue_work, event=%p\n",
3804 event->cm_node, event); 3618 event->cm_node, event);
3805 3619
3806 queue_work(event->cm_node->cm_core->event_wq, &event->event_work); 3620 queue_work(event->cm_node->cm_core->event_wq, &event->event_work);
3807 3621
@@ -3818,7 +3632,7 @@ static int nes_cm_post_event(struct nes_cm_event *event)
3818static void nes_cm_event_handler(struct work_struct *work) 3632static void nes_cm_event_handler(struct work_struct *work)
3819{ 3633{
3820 struct nes_cm_event *event = container_of(work, struct nes_cm_event, 3634 struct nes_cm_event *event = container_of(work, struct nes_cm_event,
3821 event_work); 3635 event_work);
3822 struct nes_cm_core *cm_core; 3636 struct nes_cm_core *cm_core;
3823 3637
3824 if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core)) 3638 if ((!event) || (!event->cm_node) || (!event->cm_node->cm_core))
@@ -3826,29 +3640,29 @@ static void nes_cm_event_handler(struct work_struct *work)
3826 3640
3827 cm_core = event->cm_node->cm_core; 3641 cm_core = event->cm_node->cm_core;
3828 nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n", 3642 nes_debug(NES_DBG_CM, "event=%p, event->type=%u, events posted=%u\n",
3829 event, event->type, atomic_read(&cm_core->events_posted)); 3643 event, event->type, atomic_read(&cm_core->events_posted));
3830 3644
3831 switch (event->type) { 3645 switch (event->type) {
3832 case NES_CM_EVENT_MPA_REQ: 3646 case NES_CM_EVENT_MPA_REQ:
3833 cm_event_mpa_req(event); 3647 cm_event_mpa_req(event);
3834 nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n", 3648 nes_debug(NES_DBG_CM, "cm_node=%p CM Event: MPA REQUEST\n",
3835 event->cm_node); 3649 event->cm_node);
3836 break; 3650 break;
3837 case NES_CM_EVENT_RESET: 3651 case NES_CM_EVENT_RESET:
3838 nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n", 3652 nes_debug(NES_DBG_CM, "cm_node = %p CM Event: RESET\n",
3839 event->cm_node); 3653 event->cm_node);
3840 cm_event_reset(event); 3654 cm_event_reset(event);
3841 break; 3655 break;
3842 case NES_CM_EVENT_CONNECTED: 3656 case NES_CM_EVENT_CONNECTED:
3843 if ((!event->cm_node->cm_id) || 3657 if ((!event->cm_node->cm_id) ||
3844 (event->cm_node->state != NES_CM_STATE_TSA)) 3658 (event->cm_node->state != NES_CM_STATE_TSA))
3845 break; 3659 break;
3846 cm_event_connected(event); 3660 cm_event_connected(event);
3847 nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n"); 3661 nes_debug(NES_DBG_CM, "CM Event: CONNECTED\n");
3848 break; 3662 break;
3849 case NES_CM_EVENT_MPA_REJECT: 3663 case NES_CM_EVENT_MPA_REJECT:
3850 if ((!event->cm_node->cm_id) || 3664 if ((!event->cm_node->cm_id) ||
3851 (event->cm_node->state == NES_CM_STATE_TSA)) 3665 (event->cm_node->state == NES_CM_STATE_TSA))
3852 break; 3666 break;
3853 cm_event_mpa_reject(event); 3667 cm_event_mpa_reject(event);
3854 nes_debug(NES_DBG_CM, "CM Event: REJECT\n"); 3668 nes_debug(NES_DBG_CM, "CM Event: REJECT\n");
@@ -3856,7 +3670,7 @@ static void nes_cm_event_handler(struct work_struct *work)
3856 3670
3857 case NES_CM_EVENT_ABORTED: 3671 case NES_CM_EVENT_ABORTED:
3858 if ((!event->cm_node->cm_id) || 3672 if ((!event->cm_node->cm_id) ||
3859 (event->cm_node->state == NES_CM_STATE_TSA)) 3673 (event->cm_node->state == NES_CM_STATE_TSA))
3860 break; 3674 break;
3861 cm_event_connect_error(event); 3675 cm_event_connect_error(event);
3862 nes_debug(NES_DBG_CM, "CM Event: ABORTED\n"); 3676 nes_debug(NES_DBG_CM, "CM Event: ABORTED\n");
diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h
index 4646e666608..d9825fda70a 100644
--- a/drivers/infiniband/hw/nes/nes_cm.h
+++ b/drivers/infiniband/hw/nes/nes_cm.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -48,16 +48,7 @@
48#define IETF_MPA_KEY_SIZE 16 48#define IETF_MPA_KEY_SIZE 16
49#define IETF_MPA_VERSION 1 49#define IETF_MPA_VERSION 1
50#define IETF_MAX_PRIV_DATA_LEN 512 50#define IETF_MAX_PRIV_DATA_LEN 512
51#define IETF_MPA_FRAME_SIZE 20 51#define IETF_MPA_FRAME_SIZE 20
52#define IETF_RTR_MSG_SIZE 4
53#define IETF_MPA_V2_FLAG 0x10
54
55/* IETF RTR MSG Fields */
56#define IETF_PEER_TO_PEER 0x8000
57#define IETF_FLPDU_ZERO_LEN 0x4000
58#define IETF_RDMA0_WRITE 0x8000
59#define IETF_RDMA0_READ 0x4000
60#define IETF_NO_IRD_ORD 0x3FFF
61 52
62enum ietf_mpa_flags { 53enum ietf_mpa_flags {
63 IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */ 54 IETF_MPA_FLAGS_MARKERS = 0x80, /* receive Markers */
@@ -65,7 +56,7 @@ enum ietf_mpa_flags {
65 IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */ 56 IETF_MPA_FLAGS_REJECT = 0x20, /* Reject */
66}; 57};
67 58
68struct ietf_mpa_v1 { 59struct ietf_mpa_frame {
69 u8 key[IETF_MPA_KEY_SIZE]; 60 u8 key[IETF_MPA_KEY_SIZE];
70 u8 flags; 61 u8 flags;
71 u8 rev; 62 u8 rev;
@@ -75,20 +66,6 @@ struct ietf_mpa_v1 {
75 66
76#define ietf_mpa_req_resp_frame ietf_mpa_frame 67#define ietf_mpa_req_resp_frame ietf_mpa_frame
77 68
78struct ietf_rtr_msg {
79 __be16 ctrl_ird;
80 __be16 ctrl_ord;
81};
82
83struct ietf_mpa_v2 {
84 u8 key[IETF_MPA_KEY_SIZE];
85 u8 flags;
86 u8 rev;
87 __be16 priv_data_len;
88 struct ietf_rtr_msg rtr_msg;
89 u8 priv_data[0];
90};
91
92struct nes_v4_quad { 69struct nes_v4_quad {
93 u32 rsvd0; 70 u32 rsvd0;
94 __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */ 71 __le32 DstIpAdrIndex; /* Only most significant 5 bits are valid */
@@ -194,7 +171,8 @@ struct nes_timer_entry {
194 171
195#define NES_CM_DEF_SEQ2 0x18ed5740 172#define NES_CM_DEF_SEQ2 0x18ed5740
196#define NES_CM_DEF_LOCAL_ID2 0xb807 173#define NES_CM_DEF_LOCAL_ID2 0xb807
197#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_RTR_MSG_SIZE + IETF_MAX_PRIV_DATA_LEN) 174#define MAX_CM_BUFFER (IETF_MPA_FRAME_SIZE + IETF_MAX_PRIV_DATA_LEN)
175
198 176
199typedef u32 nes_addr_t; 177typedef u32 nes_addr_t;
200 178
@@ -226,21 +204,6 @@ enum nes_cm_node_state {
226 NES_CM_STATE_CLOSED 204 NES_CM_STATE_CLOSED
227}; 205};
228 206
229enum mpa_frame_version {
230 IETF_MPA_V1 = 1,
231 IETF_MPA_V2 = 2
232};
233
234enum mpa_frame_key {
235 MPA_KEY_REQUEST,
236 MPA_KEY_REPLY
237};
238
239enum send_rdma0 {
240 SEND_RDMA_READ_ZERO = 1,
241 SEND_RDMA_WRITE_ZERO = 2
242};
243
244enum nes_tcpip_pkt_type { 207enum nes_tcpip_pkt_type {
245 NES_PKT_TYPE_UNKNOWN, 208 NES_PKT_TYPE_UNKNOWN,
246 NES_PKT_TYPE_SYN, 209 NES_PKT_TYPE_SYN,
@@ -282,9 +245,9 @@ struct nes_cm_tcp_context {
282 245
283 246
284enum nes_cm_listener_state { 247enum nes_cm_listener_state {
285 NES_CM_LISTENER_PASSIVE_STATE = 1, 248 NES_CM_LISTENER_PASSIVE_STATE=1,
286 NES_CM_LISTENER_ACTIVE_STATE = 2, 249 NES_CM_LISTENER_ACTIVE_STATE=2,
287 NES_CM_LISTENER_EITHER_STATE = 3 250 NES_CM_LISTENER_EITHER_STATE=3
288}; 251};
289 252
290struct nes_cm_listener { 253struct nes_cm_listener {
@@ -320,20 +283,16 @@ struct nes_cm_node {
320 283
321 struct nes_cm_node *loopbackpartner; 284 struct nes_cm_node *loopbackpartner;
322 285
323 struct nes_timer_entry *send_entry; 286 struct nes_timer_entry *send_entry;
324 struct nes_timer_entry *recv_entry; 287
325 spinlock_t retrans_list_lock; 288 spinlock_t retrans_list_lock;
326 enum send_rdma0 send_rdma0_op; 289 struct nes_timer_entry *recv_entry;
327 290
291 int send_write0;
328 union { 292 union {
329 struct ietf_mpa_v1 mpa_frame; 293 struct ietf_mpa_frame mpa_frame;
330 struct ietf_mpa_v2 mpa_v2_frame; 294 u8 mpa_frame_buf[MAX_CM_BUFFER];
331 u8 mpa_frame_buf[MAX_CM_BUFFER];
332 }; 295 };
333 enum mpa_frame_version mpa_frame_rev;
334 u16 ird_size;
335 u16 ord_size;
336
337 u16 mpa_frame_size; 296 u16 mpa_frame_size;
338 struct iw_cm_id *cm_id; 297 struct iw_cm_id *cm_id;
339 struct list_head list; 298 struct list_head list;
@@ -440,8 +399,10 @@ struct nes_cm_ops {
440 struct nes_vnic *, u16, void *, 399 struct nes_vnic *, u16, void *,
441 struct nes_cm_info *); 400 struct nes_cm_info *);
442 int (*close)(struct nes_cm_core *, struct nes_cm_node *); 401 int (*close)(struct nes_cm_core *, struct nes_cm_node *);
443 int (*accept)(struct nes_cm_core *, struct nes_cm_node *); 402 int (*accept)(struct nes_cm_core *, struct ietf_mpa_frame *,
444 int (*reject)(struct nes_cm_core *, struct nes_cm_node *); 403 struct nes_cm_node *);
404 int (*reject)(struct nes_cm_core *, struct ietf_mpa_frame *,
405 struct nes_cm_node *);
445 int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *, 406 int (*recv_pkt)(struct nes_cm_core *, struct nes_vnic *,
446 struct sk_buff *); 407 struct sk_buff *);
447 int (*destroy_cm_core)(struct nes_cm_core *); 408 int (*destroy_cm_core)(struct nes_cm_core *);
@@ -461,7 +422,5 @@ int nes_destroy_listen(struct iw_cm_id *);
461int nes_cm_recv(struct sk_buff *, struct net_device *); 422int nes_cm_recv(struct sk_buff *, struct net_device *);
462int nes_cm_start(void); 423int nes_cm_start(void);
463int nes_cm_stop(void); 424int nes_cm_stop(void);
464int nes_add_ref_cm_node(struct nes_cm_node *cm_node);
465int nes_rem_ref_cm_node(struct nes_cm_node *cm_node);
466 425
467#endif /* NES_CM_H */ 426#endif /* NES_CM_H */
diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h
index a69eef16d72..b4393a16099 100644
--- a/drivers/infiniband/hw/nes/nes_context.h
+++ b/drivers/infiniband/hw/nes/nes_context.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 67647e26461..be36cbeae63 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -75,6 +75,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
75static void process_critical_error(struct nes_device *nesdev); 75static void process_critical_error(struct nes_device *nesdev);
76static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number); 76static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
77static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode); 77static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
78static void nes_terminate_timeout(unsigned long context);
78static void nes_terminate_start_timer(struct nes_qp *nesqp); 79static void nes_terminate_start_timer(struct nes_qp *nesqp);
79 80
80#ifdef CONFIG_INFINIBAND_NES_DEBUG 81#ifdef CONFIG_INFINIBAND_NES_DEBUG
@@ -109,14 +110,6 @@ static unsigned char *nes_tcp_state_str[] = {
109}; 110};
110#endif 111#endif
111 112
112static inline void print_ip(struct nes_cm_node *cm_node)
113{
114 unsigned char *rem_addr;
115 if (cm_node) {
116 rem_addr = (unsigned char *)&cm_node->rem_addr;
117 printk(KERN_ERR PFX "Remote IP addr: %pI4\n", rem_addr);
118 }
119}
120 113
121/** 114/**
122 * nes_nic_init_timer_defaults 115 * nes_nic_init_timer_defaults
@@ -1528,7 +1521,7 @@ int nes_init_phy(struct nes_device *nesdev)
1528 } else { 1521 } else {
1529 /* setup 10G MDIO operation */ 1522 /* setup 10G MDIO operation */
1530 tx_config &= 0xFFFFFFE3; 1523 tx_config &= 0xFFFFFFE3;
1531 tx_config |= 0x1D; 1524 tx_config |= 0x15;
1532 } 1525 }
1533 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); 1526 nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config);
1534 1527
@@ -1562,7 +1555,6 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
1562 struct nes_hw_nic_rq_wqe *nic_rqe; 1555 struct nes_hw_nic_rq_wqe *nic_rqe;
1563 struct nes_hw_nic *nesnic; 1556 struct nes_hw_nic *nesnic;
1564 struct nes_device *nesdev; 1557 struct nes_device *nesdev;
1565 struct nes_rskb_cb *cb;
1566 u32 rx_wqes_posted = 0; 1558 u32 rx_wqes_posted = 0;
1567 1559
1568 nesnic = &nesvnic->nic; 1560 nesnic = &nesvnic->nic;
@@ -1588,9 +1580,6 @@ static void nes_replenish_nic_rq(struct nes_vnic *nesvnic)
1588 1580
1589 bus_address = pci_map_single(nesdev->pcidev, 1581 bus_address = pci_map_single(nesdev->pcidev,
1590 skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); 1582 skb->data, nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
1591 cb = (struct nes_rskb_cb *)&skb->cb[0];
1592 cb->busaddr = bus_address;
1593 cb->maplen = nesvnic->max_frame_size;
1594 1583
1595 nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head]; 1584 nic_rqe = &nesnic->rq_vbase[nesvnic->nic.rq_head];
1596 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = 1585 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
@@ -1680,7 +1669,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
1680 u32 cqp_head; 1669 u32 cqp_head;
1681 u32 counter; 1670 u32 counter;
1682 u32 wqe_count; 1671 u32 wqe_count;
1683 struct nes_rskb_cb *cb;
1684 u8 jumbomode=0; 1672 u8 jumbomode=0;
1685 1673
1686 /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */ 1674 /* Allocate fragment, SQ, RQ, and CQ; Reuse CEQ based on the PCI function */
@@ -1857,9 +1845,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
1857 1845
1858 pmem = pci_map_single(nesdev->pcidev, skb->data, 1846 pmem = pci_map_single(nesdev->pcidev, skb->data,
1859 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); 1847 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
1860 cb = (struct nes_rskb_cb *)&skb->cb[0];
1861 cb->busaddr = pmem;
1862 cb->maplen = nesvnic->max_frame_size;
1863 1848
1864 nic_rqe = &nesvnic->nic.rq_vbase[counter]; 1849 nic_rqe = &nesvnic->nic.rq_vbase[counter];
1865 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size); 1850 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32(nesvnic->max_frame_size);
@@ -1888,13 +1873,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
1888 jumbomode = 1; 1873 jumbomode = 1;
1889 nes_nic_init_timer_defaults(nesdev, jumbomode); 1874 nes_nic_init_timer_defaults(nesdev, jumbomode);
1890 } 1875 }
1891 if ((nesdev->nesadapter->allow_unaligned_fpdus) &&
1892 (nes_init_mgt_qp(nesdev, netdev, nesvnic))) {
1893 nes_debug(NES_DBG_INIT, "%s: Out of memory for pau nic\n", netdev->name);
1894 nes_destroy_nic_qp(nesvnic);
1895 return -ENOMEM;
1896 }
1897
1898 nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr; 1876 nesvnic->lro_mgr.max_aggr = nes_lro_max_aggr;
1899 nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS; 1877 nesvnic->lro_mgr.max_desc = NES_MAX_LRO_DESCRIPTORS;
1900 nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc; 1878 nesvnic->lro_mgr.lro_arr = nesvnic->lro_desc;
@@ -1917,29 +1895,28 @@ void nes_destroy_nic_qp(struct nes_vnic *nesvnic)
1917 struct nes_device *nesdev = nesvnic->nesdev; 1895 struct nes_device *nesdev = nesvnic->nesdev;
1918 struct nes_hw_cqp_wqe *cqp_wqe; 1896 struct nes_hw_cqp_wqe *cqp_wqe;
1919 struct nes_hw_nic_sq_wqe *nic_sqe; 1897 struct nes_hw_nic_sq_wqe *nic_sqe;
1898 struct nes_hw_nic_rq_wqe *nic_rqe;
1920 __le16 *wqe_fragment_length; 1899 __le16 *wqe_fragment_length;
1921 u16 wqe_fragment_index; 1900 u16 wqe_fragment_index;
1901 u64 wqe_frag;
1922 u32 cqp_head; 1902 u32 cqp_head;
1923 u32 wqm_cfg0; 1903 u32 wqm_cfg0;
1924 unsigned long flags; 1904 unsigned long flags;
1925 struct sk_buff *rx_skb;
1926 struct nes_rskb_cb *cb;
1927 int ret; 1905 int ret;
1928 1906
1929 if (nesdev->nesadapter->allow_unaligned_fpdus)
1930 nes_destroy_mgt(nesvnic);
1931
1932 /* clear wqe stall before destroying NIC QP */ 1907 /* clear wqe stall before destroying NIC QP */
1933 wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0); 1908 wqm_cfg0 = nes_read_indexed(nesdev, NES_IDX_WQM_CONFIG0);
1934 nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF); 1909 nes_write_indexed(nesdev, NES_IDX_WQM_CONFIG0, wqm_cfg0 & 0xFFFF7FFF);
1935 1910
1936 /* Free remaining NIC receive buffers */ 1911 /* Free remaining NIC receive buffers */
1937 while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) { 1912 while (nesvnic->nic.rq_head != nesvnic->nic.rq_tail) {
1938 rx_skb = nesvnic->nic.rx_skb[nesvnic->nic.rq_tail]; 1913 nic_rqe = &nesvnic->nic.rq_vbase[nesvnic->nic.rq_tail];
1939 cb = (struct nes_rskb_cb *)&rx_skb->cb[0]; 1914 wqe_frag = (u64)le32_to_cpu(
1940 pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, 1915 nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX]);
1941 PCI_DMA_FROMDEVICE); 1916 wqe_frag |= ((u64)le32_to_cpu(
1942 1917 nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX]))<<32;
1918 pci_unmap_single(nesdev->pcidev, (dma_addr_t)wqe_frag,
1919 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
1943 dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]); 1920 dev_kfree_skb(nesvnic->nic.rx_skb[nesvnic->nic.rq_tail++]);
1944 nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1); 1921 nesvnic->nic.rq_tail &= (nesvnic->nic.rq_size - 1);
1945 } 1922 }
@@ -2678,9 +2655,11 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number)
2678 } 2655 }
2679 } 2656 }
2680 if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) { 2657 if (nesadapter->phy_type[mac_index] == NES_PHY_TYPE_SFP_D) {
2658 if (nesdev->link_recheck)
2659 cancel_delayed_work(&nesdev->work);
2681 nesdev->link_recheck = 1; 2660 nesdev->link_recheck = 1;
2682 mod_delayed_work(system_wq, &nesdev->work, 2661 schedule_delayed_work(&nesdev->work,
2683 NES_LINK_RECHECK_DELAY); 2662 NES_LINK_RECHECK_DELAY);
2684 } 2663 }
2685 } 2664 }
2686 2665
@@ -2796,7 +2775,6 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
2796 struct nes_hw_nic_sq_wqe *nic_sqe; 2775 struct nes_hw_nic_sq_wqe *nic_sqe;
2797 struct sk_buff *skb; 2776 struct sk_buff *skb;
2798 struct sk_buff *rx_skb; 2777 struct sk_buff *rx_skb;
2799 struct nes_rskb_cb *cb;
2800 __le16 *wqe_fragment_length; 2778 __le16 *wqe_fragment_length;
2801 u32 head; 2779 u32 head;
2802 u32 cq_size; 2780 u32 cq_size;
@@ -2881,8 +2859,6 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
2881 bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32; 2859 bus_address += ((u64)le32_to_cpu(nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX])) << 32;
2882 pci_unmap_single(nesdev->pcidev, bus_address, 2860 pci_unmap_single(nesdev->pcidev, bus_address,
2883 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE); 2861 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
2884 cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
2885 cb->busaddr = 0;
2886 /* rx_skb->tail = rx_skb->data + rx_pkt_size; */ 2862 /* rx_skb->tail = rx_skb->data + rx_pkt_size; */
2887 /* rx_skb->len = rx_pkt_size; */ 2863 /* rx_skb->len = rx_pkt_size; */
2888 rx_skb->len = 0; /* TODO: see if this is necessary */ 2864 rx_skb->len = 0; /* TODO: see if this is necessary */
@@ -3007,7 +2983,6 @@ skip_rx_indicate0:
3007} 2983}
3008 2984
3009 2985
3010
3011/** 2986/**
3012 * nes_cqp_ce_handler 2987 * nes_cqp_ce_handler
3013 */ 2988 */
@@ -3022,8 +2997,6 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
3022 u32 cq_size; 2997 u32 cq_size;
3023 u32 cqe_count=0; 2998 u32 cqe_count=0;
3024 u32 error_code; 2999 u32 error_code;
3025 u32 opcode;
3026 u32 ctx_index;
3027 /* u32 counter; */ 3000 /* u32 counter; */
3028 3001
3029 head = cq->cq_head; 3002 head = cq->cq_head;
@@ -3034,9 +3007,12 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
3034 /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head, 3007 /* nes_debug(NES_DBG_CQP, "head=%u cqe_words=%08X\n", head,
3035 le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */ 3008 le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])); */
3036 3009
3037 opcode = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]); 3010 if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX]) & NES_CQE_VALID) {
3038 if (opcode & NES_CQE_VALID) { 3011 u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head].
3039 cqp = &nesdev->cqp; 3012 cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) |
3013 ((u64)(le32_to_cpu(cq->cq_vbase[head].
3014 cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX])));
3015 cqp = *((struct nes_hw_cqp **)&u64temp);
3040 3016
3041 error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]); 3017 error_code = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_ERROR_CODE_IDX]);
3042 if (error_code) { 3018 if (error_code) {
@@ -3045,14 +3021,15 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
3045 le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f, 3021 le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_CQE_OPCODE_IDX])&0x3f,
3046 (u16)(error_code >> 16), 3022 (u16)(error_code >> 16),
3047 (u16)error_code); 3023 (u16)error_code);
3024 nes_debug(NES_DBG_CQP, "cqp: qp_id=%u, sq_head=%u, sq_tail=%u\n",
3025 cqp->qp_id, cqp->sq_head, cqp->sq_tail);
3048 } 3026 }
3049 3027
3050 u64temp = (((u64)(le32_to_cpu(cq->cq_vbase[head]. 3028 u64temp = (((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
3051 cqe_words[NES_CQE_COMP_COMP_CTX_HIGH_IDX]))) << 32) | 3029 wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX]))) << 32) |
3052 ((u64)(le32_to_cpu(cq->cq_vbase[head]. 3030 ((u64)(le32_to_cpu(nesdev->cqp.sq_vbase[cqp->sq_tail].
3053 cqe_words[NES_CQE_COMP_COMP_CTX_LOW_IDX]))); 3031 wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX])));
3054 3032 cqp_request = *((struct nes_cqp_request **)&u64temp);
3055 cqp_request = (struct nes_cqp_request *)(unsigned long)u64temp;
3056 if (cqp_request) { 3033 if (cqp_request) {
3057 if (cqp_request->waiting) { 3034 if (cqp_request->waiting) {
3058 /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */ 3035 /* nes_debug(NES_DBG_CQP, "%s: Waking up requestor\n"); */
@@ -3098,15 +3075,9 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
3098 cqp_wqe = &nesdev->cqp.sq_vbase[head]; 3075 cqp_wqe = &nesdev->cqp.sq_vbase[head];
3099 memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); 3076 memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
3100 barrier(); 3077 barrier();
3101 3078 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] =
3102 opcode = cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX];
3103 if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
3104 ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
3105 else
3106 ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
3107 cqp_wqe->wqe_words[ctx_index] =
3108 cpu_to_le32((u32)((unsigned long)cqp_request)); 3079 cpu_to_le32((u32)((unsigned long)cqp_request));
3109 cqp_wqe->wqe_words[ctx_index + 1] = 3080 cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] =
3110 cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request))); 3081 cpu_to_le32((u32)(upper_32_bits((unsigned long)cqp_request)));
3111 nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n", 3082 nes_debug(NES_DBG_CQP, "CQP request %p (opcode 0x%02X) put on CQPs SQ wqe%u.\n",
3112 cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head); 3083 cqp_request, le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f, head);
@@ -3122,6 +3093,7 @@ static void nes_cqp_ce_handler(struct nes_device *nesdev, struct nes_hw_cq *cq)
3122 nes_read32(nesdev->regs+NES_CQE_ALLOC); 3093 nes_read32(nesdev->regs+NES_CQE_ALLOC);
3123} 3094}
3124 3095
3096
3125static u8 *locate_mpa(u8 *pkt, u32 aeq_info) 3097static u8 *locate_mpa(u8 *pkt, u32 aeq_info)
3126{ 3098{
3127 if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) { 3099 if (aeq_info & NES_AEQE_Q2_DATA_ETHERNET) {
@@ -3519,7 +3491,7 @@ static void nes_terminate_received(struct nes_device *nesdev,
3519} 3491}
3520 3492
3521/* Timeout routine in case terminate fails to complete */ 3493/* Timeout routine in case terminate fails to complete */
3522void nes_terminate_timeout(unsigned long context) 3494static void nes_terminate_timeout(unsigned long context)
3523{ 3495{
3524 struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context; 3496 struct nes_qp *nesqp = (struct nes_qp *)(unsigned long)context;
3525 3497
@@ -3529,7 +3501,11 @@ void nes_terminate_timeout(unsigned long context)
3529/* Set a timer in case hw cannot complete the terminate sequence */ 3501/* Set a timer in case hw cannot complete the terminate sequence */
3530static void nes_terminate_start_timer(struct nes_qp *nesqp) 3502static void nes_terminate_start_timer(struct nes_qp *nesqp)
3531{ 3503{
3532 mod_timer(&nesqp->terminate_timer, (jiffies + HZ)); 3504 init_timer(&nesqp->terminate_timer);
3505 nesqp->terminate_timer.function = nes_terminate_timeout;
3506 nesqp->terminate_timer.expires = jiffies + HZ;
3507 nesqp->terminate_timer.data = (unsigned long)nesqp;
3508 add_timer(&nesqp->terminate_timer);
3533} 3509}
3534 3510
3535/** 3511/**
@@ -3570,16 +3546,16 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3570 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT; 3546 tcp_state = (aeq_info & NES_AEQE_TCP_STATE_MASK) >> NES_AEQE_TCP_STATE_SHIFT;
3571 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT; 3547 iwarp_state = (aeq_info & NES_AEQE_IWARP_STATE_MASK) >> NES_AEQE_IWARP_STATE_SHIFT;
3572 nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p," 3548 nes_debug(NES_DBG_AEQ, "aeid = 0x%04X, qp-cq id = %d, aeqe = %p,"
3573 " Tcp state = %d, iWARP state = %d\n", 3549 " Tcp state = %s, iWARP state = %s\n",
3574 async_event_id, 3550 async_event_id,
3575 le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe, 3551 le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]), aeqe,
3576 tcp_state, iwarp_state); 3552 nes_tcp_state_str[tcp_state], nes_iwarp_state_str[iwarp_state]);
3577 3553
3578 aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]); 3554 aeqe_cq_id = le32_to_cpu(aeqe->aeqe_words[NES_AEQE_COMP_QP_CQ_ID_IDX]);
3579 if (aeq_info & NES_AEQE_QP) { 3555 if (aeq_info & NES_AEQE_QP) {
3580 if (!nes_is_resource_allocated(nesadapter, 3556 if ((!nes_is_resource_allocated(nesadapter, nesadapter->allocated_qps,
3581 nesadapter->allocated_qps, 3557 aeqe_cq_id)) ||
3582 aeqe_cq_id)) 3558 (atomic_read(&nesqp->close_timer_started)))
3583 return; 3559 return;
3584 } 3560 }
3585 3561
@@ -3590,7 +3566,8 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3590 3566
3591 if (atomic_inc_return(&nesqp->close_timer_started) == 1) { 3567 if (atomic_inc_return(&nesqp->close_timer_started) == 1) {
3592 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) && 3568 if ((tcp_state == NES_AEQE_TCP_STATE_CLOSE_WAIT) &&
3593 (nesqp->ibqp_state == IB_QPS_RTS)) { 3569 (nesqp->ibqp_state == IB_QPS_RTS) &&
3570 ((nesadapter->eeprom_version >> 16) != NES_A0)) {
3594 spin_lock_irqsave(&nesqp->lock, flags); 3571 spin_lock_irqsave(&nesqp->lock, flags);
3595 nesqp->hw_iwarp_state = iwarp_state; 3572 nesqp->hw_iwarp_state = iwarp_state;
3596 nesqp->hw_tcp_state = tcp_state; 3573 nesqp->hw_tcp_state = tcp_state;
@@ -3612,11 +3589,14 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3612 } 3589 }
3613 break; 3590 break;
3614 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: 3591 case NES_AEQE_AEID_LLP_CLOSE_COMPLETE:
3592 if (nesqp->term_flags) {
3593 nes_terminate_done(nesqp, 0);
3594 return;
3595 }
3615 spin_lock_irqsave(&nesqp->lock, flags); 3596 spin_lock_irqsave(&nesqp->lock, flags);
3616 nesqp->hw_iwarp_state = iwarp_state; 3597 nesqp->hw_iwarp_state = NES_AEQE_IWARP_STATE_CLOSING;
3617 nesqp->hw_tcp_state = tcp_state;
3618 nesqp->last_aeq = async_event_id;
3619 spin_unlock_irqrestore(&nesqp->lock, flags); 3598 spin_unlock_irqrestore(&nesqp->lock, flags);
3599 nes_hw_modify_qp(nesdev, nesqp, NES_CQP_QP_IWARP_STATE_CLOSING, 0, 0);
3620 nes_cm_disconn(nesqp); 3600 nes_cm_disconn(nesqp);
3621 break; 3601 break;
3622 3602
@@ -3714,9 +3694,7 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev,
3714 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP: 3694 case NES_AEQE_AEID_ROE_INVALID_RDMA_WRITE_OR_READ_RESP:
3715 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n", 3695 printk(KERN_ERR PFX "QP[%u] async_event_id=0x%04X IB_EVENT_QP_FATAL\n",
3716 nesqp->hwqp.qp_id, async_event_id); 3696 nesqp->hwqp.qp_id, async_event_id);
3717 print_ip(nesqp->cm_node); 3697 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3718 if (!atomic_read(&nesqp->close_timer_started))
3719 nes_terminate_connection(nesdev, nesqp, aeqe, IB_EVENT_QP_FATAL);
3720 break; 3698 break;
3721 3699
3722 case NES_AEQE_AEID_CQ_OPERATION_ERROR: 3700 case NES_AEQE_AEID_CQ_OPERATION_ERROR:
diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h
index d748e4b31b8..c3241479ec0 100644
--- a/drivers/infiniband/hw/nes/nes_hw.h
+++ b/drivers/infiniband/hw/nes/nes_hw.h
@@ -1,5 +1,5 @@
1/* 1/*
2* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3* 3*
4* This software is available to you under a choice of one of two 4* This software is available to you under a choice of one of two
5* licenses. You may choose to be licensed under the terms of the GNU 5* licenses. You may choose to be licensed under the terms of the GNU
@@ -47,11 +47,6 @@
47#define NES_MULTICAST_PF_MAX 8 47#define NES_MULTICAST_PF_MAX 8
48#define NES_A0 3 48#define NES_A0 3
49 49
50#define NES_ENABLE_PAU 0x07000001
51#define NES_DISABLE_PAU 0x07000000
52#define NES_PAU_COUNTER 10
53#define NES_CQP_OPCODE_MASK 0x3f
54
55enum pci_regs { 50enum pci_regs {
56 NES_INT_STAT = 0x0000, 51 NES_INT_STAT = 0x0000,
57 NES_INT_MASK = 0x0004, 52 NES_INT_MASK = 0x0004,
@@ -78,10 +73,8 @@ enum indexed_regs {
78 NES_IDX_QP_CONTROL = 0x0040, 73 NES_IDX_QP_CONTROL = 0x0040,
79 NES_IDX_FLM_CONTROL = 0x0080, 74 NES_IDX_FLM_CONTROL = 0x0080,
80 NES_IDX_INT_CPU_STATUS = 0x00a0, 75 NES_IDX_INT_CPU_STATUS = 0x00a0,
81 NES_IDX_GPR_TRIGGER = 0x00bc,
82 NES_IDX_GPIO_CONTROL = 0x00f0, 76 NES_IDX_GPIO_CONTROL = 0x00f0,
83 NES_IDX_GPIO_DATA = 0x00f4, 77 NES_IDX_GPIO_DATA = 0x00f4,
84 NES_IDX_GPR2 = 0x010c,
85 NES_IDX_TCP_CONFIG0 = 0x01e4, 78 NES_IDX_TCP_CONFIG0 = 0x01e4,
86 NES_IDX_TCP_TIMER_CONFIG = 0x01ec, 79 NES_IDX_TCP_TIMER_CONFIG = 0x01ec,
87 NES_IDX_TCP_NOW = 0x01f0, 80 NES_IDX_TCP_NOW = 0x01f0,
@@ -209,7 +202,6 @@ enum nes_cqp_opcodes {
209 NES_CQP_REGISTER_SHARED_STAG = 0x0c, 202 NES_CQP_REGISTER_SHARED_STAG = 0x0c,
210 NES_CQP_DEALLOCATE_STAG = 0x0d, 203 NES_CQP_DEALLOCATE_STAG = 0x0d,
211 NES_CQP_MANAGE_ARP_CACHE = 0x0f, 204 NES_CQP_MANAGE_ARP_CACHE = 0x0f,
212 NES_CQP_DOWNLOAD_SEGMENT = 0x10,
213 NES_CQP_SUSPEND_QPS = 0x11, 205 NES_CQP_SUSPEND_QPS = 0x11,
214 NES_CQP_UPLOAD_CONTEXT = 0x13, 206 NES_CQP_UPLOAD_CONTEXT = 0x13,
215 NES_CQP_CREATE_CEQ = 0x16, 207 NES_CQP_CREATE_CEQ = 0x16,
@@ -218,8 +210,7 @@ enum nes_cqp_opcodes {
218 NES_CQP_DESTROY_AEQ = 0x1b, 210 NES_CQP_DESTROY_AEQ = 0x1b,
219 NES_CQP_LMI_ACCESS = 0x20, 211 NES_CQP_LMI_ACCESS = 0x20,
220 NES_CQP_FLUSH_WQES = 0x22, 212 NES_CQP_FLUSH_WQES = 0x22,
221 NES_CQP_MANAGE_APBVT = 0x23, 213 NES_CQP_MANAGE_APBVT = 0x23
222 NES_CQP_MANAGE_QUAD_HASH = 0x25
223}; 214};
224 215
225enum nes_cqp_wqe_word_idx { 216enum nes_cqp_wqe_word_idx {
@@ -231,14 +222,6 @@ enum nes_cqp_wqe_word_idx {
231 NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5, 222 NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX = 5,
232}; 223};
233 224
234enum nes_cqp_wqe_word_download_idx { /* format differs from other cqp ops */
235 NES_CQP_WQE_DL_OPCODE_IDX = 0,
236 NES_CQP_WQE_DL_COMP_CTX_LOW_IDX = 1,
237 NES_CQP_WQE_DL_COMP_CTX_HIGH_IDX = 2,
238 NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX = 3
239 /* For index values 4-15 use NES_NIC_SQ_WQE_ values */
240};
241
242enum nes_cqp_cq_wqeword_idx { 225enum nes_cqp_cq_wqeword_idx {
243 NES_CQP_CQ_WQE_PBL_LOW_IDX = 6, 226 NES_CQP_CQ_WQE_PBL_LOW_IDX = 6,
244 NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7, 227 NES_CQP_CQ_WQE_PBL_HIGH_IDX = 7,
@@ -259,7 +242,6 @@ enum nes_cqp_stag_wqeword_idx {
259 NES_CQP_STAG_WQE_PBL_LEN_IDX = 14 242 NES_CQP_STAG_WQE_PBL_LEN_IDX = 14
260}; 243};
261 244
262#define NES_CQP_OP_LOGICAL_PORT_SHIFT 26
263#define NES_CQP_OP_IWARP_STATE_SHIFT 28 245#define NES_CQP_OP_IWARP_STATE_SHIFT 28
264#define NES_CQP_OP_TERMLEN_SHIFT 28 246#define NES_CQP_OP_TERMLEN_SHIFT 28
265 247
@@ -617,7 +599,6 @@ enum nes_nic_sq_wqe_bits {
617 599
618enum nes_nic_cqe_word_idx { 600enum nes_nic_cqe_word_idx {
619 NES_NIC_CQE_ACCQP_ID_IDX = 0, 601 NES_NIC_CQE_ACCQP_ID_IDX = 0,
620 NES_NIC_CQE_HASH_RCVNXT = 1,
621 NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2, 602 NES_NIC_CQE_TAG_PKT_TYPE_IDX = 2,
622 NES_NIC_CQE_MISC_IDX = 3, 603 NES_NIC_CQE_MISC_IDX = 3,
623}; 604};
@@ -1024,11 +1005,6 @@ struct nes_arp_entry {
1024#define NES_NIC_CQ_DOWNWARD_TREND 16 1005#define NES_NIC_CQ_DOWNWARD_TREND 16
1025#define NES_PFT_SIZE 48 1006#define NES_PFT_SIZE 48
1026 1007
1027#define NES_MGT_WQ_COUNT 32
1028#define NES_MGT_CTX_SIZE ((NES_NIC_CTX_RQ_SIZE_32) | (NES_NIC_CTX_SQ_SIZE_32))
1029#define NES_MGT_QP_OFFSET 36
1030#define NES_MGT_QP_COUNT 4
1031
1032struct nes_hw_tune_timer { 1008struct nes_hw_tune_timer {
1033 /* u16 cq_count; */ 1009 /* u16 cq_count; */
1034 u16 threshold_low; 1010 u16 threshold_low;
@@ -1142,7 +1118,6 @@ struct nes_adapter {
1142 u32 et_rate_sample_interval; 1118 u32 et_rate_sample_interval;
1143 u32 timer_int_limit; 1119 u32 timer_int_limit;
1144 u32 wqm_quanta; 1120 u32 wqm_quanta;
1145 u8 allow_unaligned_fpdus;
1146 1121
1147 /* Adapter base MAC address */ 1122 /* Adapter base MAC address */
1148 u32 mac_addr_low; 1123 u32 mac_addr_low;
@@ -1276,14 +1251,6 @@ struct nes_vnic {
1276 enum ib_event_type delayed_event; 1251 enum ib_event_type delayed_event;
1277 enum ib_event_type last_dispatched_event; 1252 enum ib_event_type last_dispatched_event;
1278 spinlock_t port_ibevent_lock; 1253 spinlock_t port_ibevent_lock;
1279 u32 mgt_mem_size;
1280 void *mgt_vbase;
1281 dma_addr_t mgt_pbase;
1282 struct nes_vnic_mgt *mgtvnic[NES_MGT_QP_COUNT];
1283 struct task_struct *mgt_thread;
1284 wait_queue_head_t mgt_wait_queue;
1285 struct sk_buff_head mgt_skb_list;
1286
1287}; 1254};
1288 1255
1289struct nes_ib_device { 1256struct nes_ib_device {
diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c
deleted file mode 100644
index 416645259b0..00000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.c
+++ /dev/null
@@ -1,1160 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#include <linux/skbuff.h>
35#include <linux/etherdevice.h>
36#include <linux/kthread.h>
37#include <linux/ip.h>
38#include <linux/tcp.h>
39#include <net/tcp.h>
40#include "nes.h"
41#include "nes_mgt.h"
42
43atomic_t pau_qps_created;
44atomic_t pau_qps_destroyed;
45
46static void nes_replenish_mgt_rq(struct nes_vnic_mgt *mgtvnic)
47{
48 unsigned long flags;
49 dma_addr_t bus_address;
50 struct sk_buff *skb;
51 struct nes_hw_nic_rq_wqe *nic_rqe;
52 struct nes_hw_mgt *nesmgt;
53 struct nes_device *nesdev;
54 struct nes_rskb_cb *cb;
55 u32 rx_wqes_posted = 0;
56
57 nesmgt = &mgtvnic->mgt;
58 nesdev = mgtvnic->nesvnic->nesdev;
59 spin_lock_irqsave(&nesmgt->rq_lock, flags);
60 if (nesmgt->replenishing_rq != 0) {
61 if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
62 (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
63 atomic_set(&mgtvnic->rx_skb_timer_running, 1);
64 spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
65 mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
66 add_timer(&mgtvnic->rq_wqes_timer);
67 } else {
68 spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
69 }
70 return;
71 }
72 nesmgt->replenishing_rq = 1;
73 spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
74 do {
75 skb = dev_alloc_skb(mgtvnic->nesvnic->max_frame_size);
76 if (skb) {
77 skb->dev = mgtvnic->nesvnic->netdev;
78
79 bus_address = pci_map_single(nesdev->pcidev,
80 skb->data, mgtvnic->nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
81 cb = (struct nes_rskb_cb *)&skb->cb[0];
82 cb->busaddr = bus_address;
83 cb->maplen = mgtvnic->nesvnic->max_frame_size;
84
85 nic_rqe = &nesmgt->rq_vbase[mgtvnic->mgt.rq_head];
86 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] =
87 cpu_to_le32(mgtvnic->nesvnic->max_frame_size);
88 nic_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
89 nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] =
90 cpu_to_le32((u32)bus_address);
91 nic_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] =
92 cpu_to_le32((u32)((u64)bus_address >> 32));
93 nesmgt->rx_skb[nesmgt->rq_head] = skb;
94 nesmgt->rq_head++;
95 nesmgt->rq_head &= nesmgt->rq_size - 1;
96 atomic_dec(&mgtvnic->rx_skbs_needed);
97 barrier();
98 if (++rx_wqes_posted == 255) {
99 nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
100 rx_wqes_posted = 0;
101 }
102 } else {
103 spin_lock_irqsave(&nesmgt->rq_lock, flags);
104 if (((nesmgt->rq_size - 1) == atomic_read(&mgtvnic->rx_skbs_needed)) &&
105 (atomic_read(&mgtvnic->rx_skb_timer_running) == 0)) {
106 atomic_set(&mgtvnic->rx_skb_timer_running, 1);
107 spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
108 mgtvnic->rq_wqes_timer.expires = jiffies + (HZ / 2); /* 1/2 second */
109 add_timer(&mgtvnic->rq_wqes_timer);
110 } else {
111 spin_unlock_irqrestore(&nesmgt->rq_lock, flags);
112 }
113 break;
114 }
115 } while (atomic_read(&mgtvnic->rx_skbs_needed));
116 barrier();
117 if (rx_wqes_posted)
118 nes_write32(nesdev->regs + NES_WQE_ALLOC, (rx_wqes_posted << 24) | nesmgt->qp_id);
119 nesmgt->replenishing_rq = 0;
120}
121
122/**
123 * nes_mgt_rq_wqes_timeout
124 */
125static void nes_mgt_rq_wqes_timeout(unsigned long parm)
126{
127 struct nes_vnic_mgt *mgtvnic = (struct nes_vnic_mgt *)parm;
128
129 atomic_set(&mgtvnic->rx_skb_timer_running, 0);
130 if (atomic_read(&mgtvnic->rx_skbs_needed))
131 nes_replenish_mgt_rq(mgtvnic);
132}
133
134/**
135 * nes_mgt_free_skb - unmap and free skb
136 */
137static void nes_mgt_free_skb(struct nes_device *nesdev, struct sk_buff *skb, u32 dir)
138{
139 struct nes_rskb_cb *cb;
140
141 cb = (struct nes_rskb_cb *)&skb->cb[0];
142 pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, dir);
143 cb->busaddr = 0;
144 dev_kfree_skb_any(skb);
145}
146
147/**
148 * nes_download_callback - handle download completions
149 */
150static void nes_download_callback(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
151{
152 struct pau_fpdu_info *fpdu_info = cqp_request->cqp_callback_pointer;
153 struct nes_qp *nesqp = fpdu_info->nesqp;
154 struct sk_buff *skb;
155 int i;
156
157 for (i = 0; i < fpdu_info->frag_cnt; i++) {
158 skb = fpdu_info->frags[i].skb;
159 if (fpdu_info->frags[i].cmplt) {
160 nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
161 nes_rem_ref_cm_node(nesqp->cm_node);
162 }
163 }
164
165 if (fpdu_info->hdr_vbase)
166 pci_free_consistent(nesdev->pcidev, fpdu_info->hdr_len,
167 fpdu_info->hdr_vbase, fpdu_info->hdr_pbase);
168 kfree(fpdu_info);
169}
170
171/**
172 * nes_get_seq - Get the seq, ack_seq and window from the packet
173 */
174static u32 nes_get_seq(struct sk_buff *skb, u32 *ack, u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
175{
176 struct nes_rskb_cb *cb = (struct nes_rskb_cb *)&skb->cb[0];
177 struct iphdr *iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
178 struct tcphdr *tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
179
180 *ack = be32_to_cpu(tcph->ack_seq);
181 *wnd = be16_to_cpu(tcph->window);
182 *fin_rcvd = tcph->fin;
183 *rst_rcvd = tcph->rst;
184 return be32_to_cpu(tcph->seq);
185}
186
187/**
188 * nes_get_next_skb - Get the next skb based on where current skb is in the queue
189 */
190static struct sk_buff *nes_get_next_skb(struct nes_device *nesdev, struct nes_qp *nesqp,
191 struct sk_buff *skb, u32 nextseq, u32 *ack,
192 u16 *wnd, u32 *fin_rcvd, u32 *rst_rcvd)
193{
194 u32 seq;
195 bool processacks;
196 struct sk_buff *old_skb;
197
198 if (skb) {
199 /* Continue processing fpdu */
200 if (skb->next == (struct sk_buff *)&nesqp->pau_list)
201 goto out;
202 skb = skb->next;
203 processacks = false;
204 } else {
205 /* Starting a new one */
206 if (skb_queue_empty(&nesqp->pau_list))
207 goto out;
208 skb = skb_peek(&nesqp->pau_list);
209 processacks = true;
210 }
211
212 while (1) {
213 if (skb_queue_empty(&nesqp->pau_list))
214 goto out;
215
216 seq = nes_get_seq(skb, ack, wnd, fin_rcvd, rst_rcvd);
217 if (seq == nextseq) {
218 if (skb->len || processacks)
219 break;
220 } else if (after(seq, nextseq)) {
221 goto out;
222 }
223
224 old_skb = skb;
225 skb = skb->next;
226 skb_unlink(old_skb, &nesqp->pau_list);
227 nes_mgt_free_skb(nesdev, old_skb, PCI_DMA_TODEVICE);
228 nes_rem_ref_cm_node(nesqp->cm_node);
229 if (skb == (struct sk_buff *)&nesqp->pau_list)
230 goto out;
231 }
232 return skb;
233
234out:
235 return NULL;
236}
237
238/**
239 * get_fpdu_info - Find the next complete fpdu and return its fragments.
240 */
241static int get_fpdu_info(struct nes_device *nesdev, struct nes_qp *nesqp,
242 struct pau_fpdu_info **pau_fpdu_info)
243{
244 struct sk_buff *skb;
245 struct iphdr *iph;
246 struct tcphdr *tcph;
247 struct nes_rskb_cb *cb;
248 struct pau_fpdu_info *fpdu_info = NULL;
249 struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
250 u32 fpdu_len = 0;
251 u32 tmp_len;
252 int frag_cnt = 0;
253 u32 tot_len;
254 u32 frag_tot;
255 u32 ack;
256 u32 fin_rcvd;
257 u32 rst_rcvd;
258 u16 wnd;
259 int i;
260 int rc = 0;
261
262 *pau_fpdu_info = NULL;
263
264 skb = nes_get_next_skb(nesdev, nesqp, NULL, nesqp->pau_rcv_nxt, &ack, &wnd, &fin_rcvd, &rst_rcvd);
265 if (!skb)
266 goto out;
267
268 cb = (struct nes_rskb_cb *)&skb->cb[0];
269 if (skb->len) {
270 fpdu_len = be16_to_cpu(*(__be16 *) skb->data) + MPA_FRAMING;
271 fpdu_len = (fpdu_len + 3) & 0xfffffffc;
272 tmp_len = fpdu_len;
273
274 /* See if we have all of the fpdu */
275 frag_tot = 0;
276 memset(&frags, 0, sizeof frags);
277 for (i = 0; i < MAX_FPDU_FRAGS; i++) {
278 frags[i].physaddr = cb->busaddr;
279 frags[i].physaddr += skb->data - cb->data_start;
280 frags[i].frag_len = min(tmp_len, skb->len);
281 frags[i].skb = skb;
282 frags[i].cmplt = (skb->len == frags[i].frag_len);
283 frag_tot += frags[i].frag_len;
284 frag_cnt++;
285
286 tmp_len -= frags[i].frag_len;
287 if (tmp_len == 0)
288 break;
289
290 skb = nes_get_next_skb(nesdev, nesqp, skb,
291 nesqp->pau_rcv_nxt + frag_tot, &ack, &wnd, &fin_rcvd, &rst_rcvd);
292 if (!skb)
293 goto out;
294 if (rst_rcvd) {
295 /* rst received in the middle of fpdu */
296 for (; i >= 0; i--) {
297 skb_unlink(frags[i].skb, &nesqp->pau_list);
298 nes_mgt_free_skb(nesdev, frags[i].skb, PCI_DMA_TODEVICE);
299 }
300 cb = (struct nes_rskb_cb *)&skb->cb[0];
301 frags[0].physaddr = cb->busaddr;
302 frags[0].physaddr += skb->data - cb->data_start;
303 frags[0].frag_len = skb->len;
304 frags[0].skb = skb;
305 frags[0].cmplt = true;
306 frag_cnt = 1;
307 break;
308 }
309
310 cb = (struct nes_rskb_cb *)&skb->cb[0];
311 }
312 } else {
313 /* no data */
314 frags[0].physaddr = cb->busaddr;
315 frags[0].frag_len = 0;
316 frags[0].skb = skb;
317 frags[0].cmplt = true;
318 frag_cnt = 1;
319 }
320
321 /* Found one */
322 fpdu_info = kzalloc(sizeof(*fpdu_info), GFP_ATOMIC);
323 if (fpdu_info == NULL) {
324 nes_debug(NES_DBG_PAU, "Failed to alloc a fpdu_info.\n");
325 rc = -ENOMEM;
326 goto out;
327 }
328
329 fpdu_info->cqp_request = nes_get_cqp_request(nesdev);
330 if (fpdu_info->cqp_request == NULL) {
331 nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
332 rc = -ENOMEM;
333 goto out;
334 }
335
336 cb = (struct nes_rskb_cb *)&frags[0].skb->cb[0];
337 iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
338 tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
339 fpdu_info->hdr_len = (((unsigned char *)tcph) + 4 * (tcph->doff)) - cb->data_start;
340 fpdu_info->data_len = fpdu_len;
341 tot_len = fpdu_info->hdr_len + fpdu_len - ETH_HLEN;
342
343 if (frags[0].cmplt) {
344 fpdu_info->hdr_pbase = cb->busaddr;
345 fpdu_info->hdr_vbase = NULL;
346 } else {
347 fpdu_info->hdr_vbase = pci_alloc_consistent(nesdev->pcidev,
348 fpdu_info->hdr_len, &fpdu_info->hdr_pbase);
349 if (!fpdu_info->hdr_vbase) {
350 nes_debug(NES_DBG_PAU, "Unable to allocate memory for pau first frag\n");
351 rc = -ENOMEM;
352 goto out;
353 }
354
355 /* Copy hdrs, adjusting len and seqnum */
356 memcpy(fpdu_info->hdr_vbase, cb->data_start, fpdu_info->hdr_len);
357 iph = (struct iphdr *)(fpdu_info->hdr_vbase + ETH_HLEN);
358 tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
359 }
360
361 iph->tot_len = cpu_to_be16(tot_len);
362 iph->saddr = cpu_to_be32(0x7f000001);
363
364 tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
365 tcph->ack_seq = cpu_to_be32(ack);
366 tcph->window = cpu_to_be16(wnd);
367
368 nesqp->pau_rcv_nxt += fpdu_len + fin_rcvd;
369
370 memcpy(fpdu_info->frags, frags, sizeof(fpdu_info->frags));
371 fpdu_info->frag_cnt = frag_cnt;
372 fpdu_info->nesqp = nesqp;
373 *pau_fpdu_info = fpdu_info;
374
375 /* Update skb's for next pass */
376 for (i = 0; i < frag_cnt; i++) {
377 cb = (struct nes_rskb_cb *)&frags[i].skb->cb[0];
378 skb_pull(frags[i].skb, frags[i].frag_len);
379
380 if (frags[i].skb->len == 0) {
381 /* Pull skb off the list - it will be freed in the callback */
382 if (!skb_queue_empty(&nesqp->pau_list))
383 skb_unlink(frags[i].skb, &nesqp->pau_list);
384 } else {
385 /* Last skb still has data so update the seq */
386 iph = (struct iphdr *)(cb->data_start + ETH_HLEN);
387 tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
388 tcph->seq = cpu_to_be32(nesqp->pau_rcv_nxt);
389 }
390 }
391
392out:
393 if (rc) {
394 if (fpdu_info) {
395 if (fpdu_info->cqp_request)
396 nes_put_cqp_request(nesdev, fpdu_info->cqp_request);
397 kfree(fpdu_info);
398 }
399 }
400 return rc;
401}
402
403/**
404 * forward_fpdu - send complete fpdus, one at a time
405 */
406static int forward_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
407{
408 struct nes_device *nesdev = nesvnic->nesdev;
409 struct pau_fpdu_info *fpdu_info;
410 struct nes_hw_cqp_wqe *cqp_wqe;
411 struct nes_cqp_request *cqp_request;
412 unsigned long flags;
413 u64 u64tmp;
414 u32 u32tmp;
415 int rc;
416
417 while (1) {
418 spin_lock_irqsave(&nesqp->pau_lock, flags);
419 rc = get_fpdu_info(nesdev, nesqp, &fpdu_info);
420 if (rc || (fpdu_info == NULL)) {
421 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
422 return rc;
423 }
424
425 cqp_request = fpdu_info->cqp_request;
426 cqp_wqe = &cqp_request->cqp_wqe;
427 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
428 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_OPCODE_IDX,
429 NES_CQP_DOWNLOAD_SEGMENT |
430 (((u32)nesvnic->logical_port) << NES_CQP_OP_LOGICAL_PORT_SHIFT));
431
432 u32tmp = fpdu_info->hdr_len << 16;
433 u32tmp |= fpdu_info->hdr_len + (u32)fpdu_info->data_len;
434 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_DL_LENGTH_0_TOTAL_IDX,
435 u32tmp);
436
437 u32tmp = (fpdu_info->frags[1].frag_len << 16) | fpdu_info->frags[0].frag_len;
438 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_2_1_IDX,
439 u32tmp);
440
441 u32tmp = (fpdu_info->frags[3].frag_len << 16) | fpdu_info->frags[2].frag_len;
442 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_LENGTH_4_3_IDX,
443 u32tmp);
444
445 u64tmp = (u64)fpdu_info->hdr_pbase;
446 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX,
447 lower_32_bits(u64tmp));
448 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_HIGH_IDX,
449 upper_32_bits(u64tmp));
450
451 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_LOW_IDX,
452 lower_32_bits(fpdu_info->frags[0].physaddr));
453 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG1_HIGH_IDX,
454 upper_32_bits(fpdu_info->frags[0].physaddr));
455
456 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_LOW_IDX,
457 lower_32_bits(fpdu_info->frags[1].physaddr));
458 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG2_HIGH_IDX,
459 upper_32_bits(fpdu_info->frags[1].physaddr));
460
461 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_LOW_IDX,
462 lower_32_bits(fpdu_info->frags[2].physaddr));
463 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG3_HIGH_IDX,
464 upper_32_bits(fpdu_info->frags[2].physaddr));
465
466 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_LOW_IDX,
467 lower_32_bits(fpdu_info->frags[3].physaddr));
468 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_NIC_SQ_WQE_FRAG4_HIGH_IDX,
469 upper_32_bits(fpdu_info->frags[3].physaddr));
470
471 cqp_request->cqp_callback_pointer = fpdu_info;
472 cqp_request->callback = 1;
473 cqp_request->cqp_callback = nes_download_callback;
474
475 atomic_set(&cqp_request->refcount, 1);
476 nes_post_cqp_request(nesdev, cqp_request);
477 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
478 }
479
480 return 0;
481}
482
483static void process_fpdus(struct nes_vnic *nesvnic, struct nes_qp *nesqp)
484{
485 int again = 1;
486 unsigned long flags;
487
488 do {
489 /* Ignore rc - if it failed, tcp retries will cause it to try again */
490 forward_fpdus(nesvnic, nesqp);
491
492 spin_lock_irqsave(&nesqp->pau_lock, flags);
493 if (nesqp->pau_pending) {
494 nesqp->pau_pending = 0;
495 } else {
496 nesqp->pau_busy = 0;
497 again = 0;
498 }
499
500 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
501 } while (again);
502}
503
504/**
505 * queue_fpdus - Handle fpdu's that hw passed up to sw
506 */
507static void queue_fpdus(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
508{
509 struct sk_buff *tmpskb;
510 struct nes_rskb_cb *cb;
511 struct iphdr *iph;
512 struct tcphdr *tcph;
513 unsigned char *tcph_end;
514 u32 rcv_nxt;
515 u32 rcv_wnd;
516 u32 seqnum;
517 u32 len;
518 bool process_it = false;
519 unsigned long flags;
520
521 /* Move data ptr to after tcp header */
522 iph = (struct iphdr *)skb->data;
523 tcph = (struct tcphdr *)(((char *)iph) + (4 * iph->ihl));
524 seqnum = be32_to_cpu(tcph->seq);
525 tcph_end = (((char *)tcph) + (4 * tcph->doff));
526
527 len = be16_to_cpu(iph->tot_len);
528 if (skb->len > len)
529 skb_trim(skb, len);
530 skb_pull(skb, tcph_end - skb->data);
531
532 /* Initialize tracking values */
533 cb = (struct nes_rskb_cb *)&skb->cb[0];
534 cb->seqnum = seqnum;
535
536 /* Make sure data is in the receive window */
537 rcv_nxt = nesqp->pau_rcv_nxt;
538 rcv_wnd = le32_to_cpu(nesqp->nesqp_context->rcv_wnd);
539 if (!between(seqnum, rcv_nxt, (rcv_nxt + rcv_wnd))) {
540 nes_mgt_free_skb(nesvnic->nesdev, skb, PCI_DMA_TODEVICE);
541 nes_rem_ref_cm_node(nesqp->cm_node);
542 return;
543 }
544
545 spin_lock_irqsave(&nesqp->pau_lock, flags);
546
547 if (nesqp->pau_busy)
548 nesqp->pau_pending = 1;
549 else
550 nesqp->pau_busy = 1;
551
552 /* Queue skb by sequence number */
553 if (skb_queue_len(&nesqp->pau_list) == 0) {
554 skb_queue_head(&nesqp->pau_list, skb);
555 } else {
556 tmpskb = nesqp->pau_list.next;
557 while (tmpskb != (struct sk_buff *)&nesqp->pau_list) {
558 cb = (struct nes_rskb_cb *)&tmpskb->cb[0];
559 if (before(seqnum, cb->seqnum))
560 break;
561 tmpskb = tmpskb->next;
562 }
563 skb_insert(tmpskb, skb, &nesqp->pau_list);
564 }
565 if (nesqp->pau_state == PAU_READY)
566 process_it = true;
567 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
568
569 if (process_it)
570 process_fpdus(nesvnic, nesqp);
571
572 return;
573}
574
575/**
576 * mgt_thread - Handle mgt skbs in a safe context
577 */
578static int mgt_thread(void *context)
579{
580 struct nes_vnic *nesvnic = context;
581 struct sk_buff *skb;
582 struct nes_rskb_cb *cb;
583
584 while (!kthread_should_stop()) {
585 wait_event_interruptible(nesvnic->mgt_wait_queue,
586 skb_queue_len(&nesvnic->mgt_skb_list) || kthread_should_stop());
587 while ((skb_queue_len(&nesvnic->mgt_skb_list)) && !kthread_should_stop()) {
588 skb = skb_dequeue(&nesvnic->mgt_skb_list);
589 cb = (struct nes_rskb_cb *)&skb->cb[0];
590 cb->data_start = skb->data - ETH_HLEN;
591 cb->busaddr = pci_map_single(nesvnic->nesdev->pcidev, cb->data_start,
592 nesvnic->max_frame_size, PCI_DMA_TODEVICE);
593 queue_fpdus(skb, nesvnic, cb->nesqp);
594 }
595 }
596
597 /* Closing down so delete any entries on the queue */
598 while (skb_queue_len(&nesvnic->mgt_skb_list)) {
599 skb = skb_dequeue(&nesvnic->mgt_skb_list);
600 cb = (struct nes_rskb_cb *)&skb->cb[0];
601 nes_rem_ref_cm_node(cb->nesqp->cm_node);
602 dev_kfree_skb_any(skb);
603 }
604 return 0;
605}
606
607/**
608 * nes_queue_skbs - Queue skb so it can be handled in a thread context
609 */
610void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp)
611{
612 struct nes_rskb_cb *cb;
613
614 cb = (struct nes_rskb_cb *)&skb->cb[0];
615 cb->nesqp = nesqp;
616 skb_queue_tail(&nesvnic->mgt_skb_list, skb);
617 wake_up_interruptible(&nesvnic->mgt_wait_queue);
618}
619
620void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp)
621{
622 struct sk_buff *skb;
623 unsigned long flags;
624 atomic_inc(&pau_qps_destroyed);
625
626 /* Free packets that have not yet been forwarded */
627 /* Lock is acquired by skb_dequeue when removing the skb */
628 spin_lock_irqsave(&nesqp->pau_lock, flags);
629 while (skb_queue_len(&nesqp->pau_list)) {
630 skb = skb_dequeue(&nesqp->pau_list);
631 nes_mgt_free_skb(nesdev, skb, PCI_DMA_TODEVICE);
632 nes_rem_ref_cm_node(nesqp->cm_node);
633 }
634 spin_unlock_irqrestore(&nesqp->pau_lock, flags);
635}
636
637static void nes_chg_qh_handler(struct nes_device *nesdev, struct nes_cqp_request *cqp_request)
638{
639 struct pau_qh_chg *qh_chg = cqp_request->cqp_callback_pointer;
640 struct nes_cqp_request *new_request;
641 struct nes_hw_cqp_wqe *cqp_wqe;
642 struct nes_adapter *nesadapter;
643 struct nes_qp *nesqp;
644 struct nes_v4_quad nes_quad;
645 u32 crc_value;
646 u64 u64temp;
647
648 nesadapter = nesdev->nesadapter;
649 nesqp = qh_chg->nesqp;
650
651 /* Should we handle the bad completion */
652 if (cqp_request->major_code)
653 WARN(1, PFX "Invalid cqp_request major_code=0x%x\n",
654 cqp_request->major_code);
655
656 switch (nesqp->pau_state) {
657 case PAU_DEL_QH:
658 /* Old hash code deleted, now set the new one */
659 nesqp->pau_state = PAU_ADD_LB_QH;
660 new_request = nes_get_cqp_request(nesdev);
661 if (new_request == NULL) {
662 nes_debug(NES_DBG_PAU, "Failed to get a new_request.\n");
663 WARN_ON(1);
664 return;
665 }
666
667 memset(&nes_quad, 0, sizeof(nes_quad));
668 nes_quad.DstIpAdrIndex =
669 cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
670 nes_quad.SrcIpadr = cpu_to_be32(0x7f000001);
671 nes_quad.TcpPorts[0] = swab16(nesqp->nesqp_context->tcpPorts[1]);
672 nes_quad.TcpPorts[1] = swab16(nesqp->nesqp_context->tcpPorts[0]);
673
674 /* Produce hash key */
675 crc_value = get_crc_value(&nes_quad);
676 nesqp->hte_index = cpu_to_be32(crc_value ^ 0xffffffff);
677 nes_debug(NES_DBG_PAU, "new HTE Index = 0x%08X, CRC = 0x%08X\n",
678 nesqp->hte_index, nesqp->hte_index & nesadapter->hte_index_mask);
679
680 nesqp->hte_index &= nesadapter->hte_index_mask;
681 nesqp->nesqp_context->hte_index = cpu_to_le32(nesqp->hte_index);
682 nesqp->nesqp_context->ip0 = cpu_to_le32(0x7f000001);
683 nesqp->nesqp_context->rcv_nxt = cpu_to_le32(nesqp->pau_rcv_nxt);
684
685 cqp_wqe = &new_request->cqp_wqe;
686 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
687 set_wqe_32bit_value(cqp_wqe->wqe_words,
688 NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH |
689 NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
690 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
691 u64temp = (u64)nesqp->nesqp_context_pbase;
692 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
693
694 nes_debug(NES_DBG_PAU, "Waiting for CQP completion for adding the quad hash.\n");
695
696 new_request->cqp_callback_pointer = qh_chg;
697 new_request->callback = 1;
698 new_request->cqp_callback = nes_chg_qh_handler;
699 atomic_set(&new_request->refcount, 1);
700 nes_post_cqp_request(nesdev, new_request);
701 break;
702
703 case PAU_ADD_LB_QH:
704 /* Start processing the queued fpdu's */
705 nesqp->pau_state = PAU_READY;
706 process_fpdus(qh_chg->nesvnic, qh_chg->nesqp);
707 kfree(qh_chg);
708 break;
709 }
710}
711
712/**
713 * nes_change_quad_hash
714 */
715static int nes_change_quad_hash(struct nes_device *nesdev,
716 struct nes_vnic *nesvnic, struct nes_qp *nesqp)
717{
718 struct nes_cqp_request *cqp_request = NULL;
719 struct pau_qh_chg *qh_chg = NULL;
720 u64 u64temp;
721 struct nes_hw_cqp_wqe *cqp_wqe;
722 int ret = 0;
723
724 cqp_request = nes_get_cqp_request(nesdev);
725 if (cqp_request == NULL) {
726 nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
727 ret = -ENOMEM;
728 goto chg_qh_err;
729 }
730
731 qh_chg = kmalloc(sizeof *qh_chg, GFP_ATOMIC);
732 if (qh_chg == NULL) {
733 nes_debug(NES_DBG_PAU, "Failed to get a cqp_request.\n");
734 ret = -ENOMEM;
735 goto chg_qh_err;
736 }
737 qh_chg->nesdev = nesdev;
738 qh_chg->nesvnic = nesvnic;
739 qh_chg->nesqp = nesqp;
740 nesqp->pau_state = PAU_DEL_QH;
741
742 cqp_wqe = &cqp_request->cqp_wqe;
743 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
744 set_wqe_32bit_value(cqp_wqe->wqe_words,
745 NES_CQP_WQE_OPCODE_IDX, NES_CQP_MANAGE_QUAD_HASH | NES_CQP_QP_DEL_HTE |
746 NES_CQP_QP_TYPE_IWARP | NES_CQP_QP_CONTEXT_VALID | NES_CQP_QP_IWARP_STATE_RTS);
747 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX, nesqp->hwqp.qp_id);
748 u64temp = (u64)nesqp->nesqp_context_pbase;
749 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
750
751 nes_debug(NES_DBG_PAU, "Waiting for CQP completion for deleting the quad hash.\n");
752
753 cqp_request->cqp_callback_pointer = qh_chg;
754 cqp_request->callback = 1;
755 cqp_request->cqp_callback = nes_chg_qh_handler;
756 atomic_set(&cqp_request->refcount, 1);
757 nes_post_cqp_request(nesdev, cqp_request);
758
759 return ret;
760
761chg_qh_err:
762 kfree(qh_chg);
763 if (cqp_request)
764 nes_put_cqp_request(nesdev, cqp_request);
765 return ret;
766}
767
768/**
769 * nes_mgt_ce_handler
770 * This management code deals with any packed and unaligned (pau) fpdu's
771 * that the hardware cannot handle.
772 */
773static void nes_mgt_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
774{
775 struct nes_vnic_mgt *mgtvnic = container_of(cq, struct nes_vnic_mgt, mgt_cq);
776 struct nes_adapter *nesadapter = nesdev->nesadapter;
777 u32 head;
778 u32 cq_size;
779 u32 cqe_count = 0;
780 u32 cqe_misc;
781 u32 qp_id = 0;
782 u32 skbs_needed;
783 unsigned long context;
784 struct nes_qp *nesqp;
785 struct sk_buff *rx_skb;
786 struct nes_rskb_cb *cb;
787
788 head = cq->cq_head;
789 cq_size = cq->cq_size;
790
791 while (1) {
792 cqe_misc = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]);
793 if (!(cqe_misc & NES_NIC_CQE_VALID))
794 break;
795
796 nesqp = NULL;
797 if (cqe_misc & NES_NIC_CQE_ACCQP_VALID) {
798 qp_id = le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_ACCQP_ID_IDX]);
799 qp_id &= 0x001fffff;
800 if (qp_id < nesadapter->max_qp) {
801 context = (unsigned long)nesadapter->qp_table[qp_id - NES_FIRST_QPN];
802 nesqp = (struct nes_qp *)context;
803 }
804 }
805
806 if (nesqp) {
807 if (nesqp->pau_mode == false) {
808 nesqp->pau_mode = true; /* First time for this qp */
809 nesqp->pau_rcv_nxt = le32_to_cpu(
810 cq->cq_vbase[head].cqe_words[NES_NIC_CQE_HASH_RCVNXT]);
811 skb_queue_head_init(&nesqp->pau_list);
812 spin_lock_init(&nesqp->pau_lock);
813 atomic_inc(&pau_qps_created);
814 nes_change_quad_hash(nesdev, mgtvnic->nesvnic, nesqp);
815 }
816
817 rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
818 rx_skb->len = 0;
819 skb_put(rx_skb, cqe_misc & 0x0000ffff);
820 rx_skb->protocol = eth_type_trans(rx_skb, mgtvnic->nesvnic->netdev);
821 cb = (struct nes_rskb_cb *)&rx_skb->cb[0];
822 pci_unmap_single(nesdev->pcidev, cb->busaddr, cb->maplen, PCI_DMA_FROMDEVICE);
823 cb->busaddr = 0;
824 mgtvnic->mgt.rq_tail++;
825 mgtvnic->mgt.rq_tail &= mgtvnic->mgt.rq_size - 1;
826
827 nes_add_ref_cm_node(nesqp->cm_node);
828 nes_queue_mgt_skbs(rx_skb, mgtvnic->nesvnic, nesqp);
829 } else {
830 printk(KERN_ERR PFX "Invalid QP %d for packed/unaligned handling\n", qp_id);
831 }
832
833 cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX] = 0;
834 cqe_count++;
835 if (++head >= cq_size)
836 head = 0;
837
838 if (cqe_count == 255) {
839 /* Replenish mgt CQ */
840 nes_write32(nesdev->regs + NES_CQE_ALLOC, cq->cq_number | (cqe_count << 16));
841 nesdev->currcq_count += cqe_count;
842 cqe_count = 0;
843 }
844
845 skbs_needed = atomic_inc_return(&mgtvnic->rx_skbs_needed);
846 if (skbs_needed > (mgtvnic->mgt.rq_size >> 1))
847 nes_replenish_mgt_rq(mgtvnic);
848 }
849
850 cq->cq_head = head;
851 nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
852 cq->cq_number | (cqe_count << 16));
853 nes_read32(nesdev->regs + NES_CQE_ALLOC);
854 nesdev->currcq_count += cqe_count;
855}
856
857/**
858 * nes_init_mgt_qp
859 */
860int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic)
861{
862 struct nes_vnic_mgt *mgtvnic;
863 u32 counter;
864 void *vmem;
865 dma_addr_t pmem;
866 struct nes_hw_cqp_wqe *cqp_wqe;
867 u32 cqp_head;
868 unsigned long flags;
869 struct nes_hw_nic_qp_context *mgt_context;
870 u64 u64temp;
871 struct nes_hw_nic_rq_wqe *mgt_rqe;
872 struct sk_buff *skb;
873 u32 wqe_count;
874 struct nes_rskb_cb *cb;
875 u32 mgt_mem_size;
876 void *mgt_vbase;
877 dma_addr_t mgt_pbase;
878 int i;
879 int ret;
880
881 /* Allocate space the all mgt QPs once */
882 mgtvnic = kzalloc(NES_MGT_QP_COUNT * sizeof(struct nes_vnic_mgt), GFP_KERNEL);
883 if (mgtvnic == NULL) {
884 nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt structure\n");
885 return -ENOMEM;
886 }
887
888 /* Allocate fragment, RQ, and CQ; Reuse CEQ based on the PCI function */
889 /* We are not sending from this NIC so sq is not allocated */
890 mgt_mem_size = 256 +
891 (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe)) +
892 (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_cqe)) +
893 sizeof(struct nes_hw_nic_qp_context);
894 mgt_mem_size = (mgt_mem_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
895 mgt_vbase = pci_alloc_consistent(nesdev->pcidev, NES_MGT_QP_COUNT * mgt_mem_size, &mgt_pbase);
896 if (!mgt_vbase) {
897 kfree(mgtvnic);
898 nes_debug(NES_DBG_INIT, "Unable to allocate memory for mgt host descriptor rings\n");
899 return -ENOMEM;
900 }
901
902 nesvnic->mgt_mem_size = NES_MGT_QP_COUNT * mgt_mem_size;
903 nesvnic->mgt_vbase = mgt_vbase;
904 nesvnic->mgt_pbase = mgt_pbase;
905
906 skb_queue_head_init(&nesvnic->mgt_skb_list);
907 init_waitqueue_head(&nesvnic->mgt_wait_queue);
908 nesvnic->mgt_thread = kthread_run(mgt_thread, nesvnic, "nes_mgt_thread");
909
910 for (i = 0; i < NES_MGT_QP_COUNT; i++) {
911 mgtvnic->nesvnic = nesvnic;
912 mgtvnic->mgt.qp_id = nesdev->mac_index + NES_MGT_QP_OFFSET + i;
913 memset(mgt_vbase, 0, mgt_mem_size);
914 nes_debug(NES_DBG_INIT, "Allocated mgt QP structures at %p (phys = %016lX), size = %u.\n",
915 mgt_vbase, (unsigned long)mgt_pbase, mgt_mem_size);
916
917 vmem = (void *)(((unsigned long)mgt_vbase + (256 - 1)) &
918 ~(unsigned long)(256 - 1));
919 pmem = (dma_addr_t)(((unsigned long long)mgt_pbase + (256 - 1)) &
920 ~(unsigned long long)(256 - 1));
921
922 spin_lock_init(&mgtvnic->mgt.rq_lock);
923
924 /* setup the RQ */
925 mgtvnic->mgt.rq_vbase = vmem;
926 mgtvnic->mgt.rq_pbase = pmem;
927 mgtvnic->mgt.rq_head = 0;
928 mgtvnic->mgt.rq_tail = 0;
929 mgtvnic->mgt.rq_size = NES_MGT_WQ_COUNT;
930
931 /* setup the CQ */
932 vmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
933 pmem += (NES_MGT_WQ_COUNT * sizeof(struct nes_hw_nic_rq_wqe));
934
935 mgtvnic->mgt_cq.cq_number = mgtvnic->mgt.qp_id;
936 mgtvnic->mgt_cq.cq_vbase = vmem;
937 mgtvnic->mgt_cq.cq_pbase = pmem;
938 mgtvnic->mgt_cq.cq_head = 0;
939 mgtvnic->mgt_cq.cq_size = NES_MGT_WQ_COUNT;
940
941 mgtvnic->mgt_cq.ce_handler = nes_mgt_ce_handler;
942
943 /* Send CreateCQ request to CQP */
944 spin_lock_irqsave(&nesdev->cqp.lock, flags);
945 cqp_head = nesdev->cqp.sq_head;
946
947 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
948 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
949
950 cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(
951 NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
952 ((u32)mgtvnic->mgt_cq.cq_size << 16));
953 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(
954 mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16));
955 u64temp = (u64)mgtvnic->mgt_cq.cq_pbase;
956 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_CQ_WQE_PBL_LOW_IDX, u64temp);
957 cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
958 u64temp = (unsigned long)&mgtvnic->mgt_cq;
959 cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX] = cpu_to_le32((u32)(u64temp >> 1));
960 cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] =
961 cpu_to_le32(((u32)((u64temp) >> 33)) & 0x7FFFFFFF);
962 cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
963
964 if (++cqp_head >= nesdev->cqp.sq_size)
965 cqp_head = 0;
966 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
967 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
968
969 /* Send CreateQP request to CQP */
970 mgt_context = (void *)(&mgtvnic->mgt_cq.cq_vbase[mgtvnic->mgt_cq.cq_size]);
971 mgt_context->context_words[NES_NIC_CTX_MISC_IDX] =
972 cpu_to_le32((u32)NES_MGT_CTX_SIZE |
973 ((u32)PCI_FUNC(nesdev->pcidev->devfn) << 12));
974 nes_debug(NES_DBG_INIT, "RX_WINDOW_BUFFER_PAGE_TABLE_SIZE = 0x%08X, RX_WINDOW_BUFFER_SIZE = 0x%08X\n",
975 nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_PAGE_TABLE_SIZE),
976 nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE));
977 if (nes_read_indexed(nesdev, NES_IDX_RX_WINDOW_BUFFER_SIZE) != 0)
978 mgt_context->context_words[NES_NIC_CTX_MISC_IDX] |= cpu_to_le32(NES_NIC_BACK_STORE);
979
980 u64temp = (u64)mgtvnic->mgt.rq_pbase;
981 mgt_context->context_words[NES_NIC_CTX_SQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
982 mgt_context->context_words[NES_NIC_CTX_SQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
983 u64temp = (u64)mgtvnic->mgt.rq_pbase;
984 mgt_context->context_words[NES_NIC_CTX_RQ_LOW_IDX] = cpu_to_le32((u32)u64temp);
985 mgt_context->context_words[NES_NIC_CTX_RQ_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
986
987 cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_QP |
988 NES_CQP_QP_TYPE_NIC);
989 cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(mgtvnic->mgt.qp_id);
990 u64temp = (u64)mgtvnic->mgt_cq.cq_pbase +
991 (mgtvnic->mgt_cq.cq_size * sizeof(struct nes_hw_nic_cqe));
992 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_QP_WQE_CONTEXT_LOW_IDX, u64temp);
993
994 if (++cqp_head >= nesdev->cqp.sq_size)
995 cqp_head = 0;
996 nesdev->cqp.sq_head = cqp_head;
997
998 barrier();
999
1000 /* Ring doorbell (2 WQEs) */
1001 nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
1002
1003 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1004 nes_debug(NES_DBG_INIT, "Waiting for create MGT QP%u to complete.\n",
1005 mgtvnic->mgt.qp_id);
1006
1007 ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
1008 NES_EVENT_TIMEOUT);
1009 nes_debug(NES_DBG_INIT, "Create MGT QP%u completed, wait_event_timeout ret = %u.\n",
1010 mgtvnic->mgt.qp_id, ret);
1011 if (!ret) {
1012 nes_debug(NES_DBG_INIT, "MGT QP%u create timeout expired\n", mgtvnic->mgt.qp_id);
1013 if (i == 0) {
1014 pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
1015 nesvnic->mgt_pbase);
1016 kfree(mgtvnic);
1017 } else {
1018 nes_destroy_mgt(nesvnic);
1019 }
1020 return -EIO;
1021 }
1022
1023 /* Populate the RQ */
1024 for (counter = 0; counter < (NES_MGT_WQ_COUNT - 1); counter++) {
1025 skb = dev_alloc_skb(nesvnic->max_frame_size);
1026 if (!skb) {
1027 nes_debug(NES_DBG_INIT, "%s: out of memory for receive skb\n", netdev->name);
1028 return -ENOMEM;
1029 }
1030
1031 skb->dev = netdev;
1032
1033 pmem = pci_map_single(nesdev->pcidev, skb->data,
1034 nesvnic->max_frame_size, PCI_DMA_FROMDEVICE);
1035 cb = (struct nes_rskb_cb *)&skb->cb[0];
1036 cb->busaddr = pmem;
1037 cb->maplen = nesvnic->max_frame_size;
1038
1039 mgt_rqe = &mgtvnic->mgt.rq_vbase[counter];
1040 mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_1_0_IDX] = cpu_to_le32((u32)nesvnic->max_frame_size);
1041 mgt_rqe->wqe_words[NES_NIC_RQ_WQE_LENGTH_3_2_IDX] = 0;
1042 mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_LOW_IDX] = cpu_to_le32((u32)pmem);
1043 mgt_rqe->wqe_words[NES_NIC_RQ_WQE_FRAG0_HIGH_IDX] = cpu_to_le32((u32)((u64)pmem >> 32));
1044 mgtvnic->mgt.rx_skb[counter] = skb;
1045 }
1046
1047 init_timer(&mgtvnic->rq_wqes_timer);
1048 mgtvnic->rq_wqes_timer.function = nes_mgt_rq_wqes_timeout;
1049 mgtvnic->rq_wqes_timer.data = (unsigned long)mgtvnic;
1050
1051 wqe_count = NES_MGT_WQ_COUNT - 1;
1052 mgtvnic->mgt.rq_head = wqe_count;
1053 barrier();
1054 do {
1055 counter = min(wqe_count, ((u32)255));
1056 wqe_count -= counter;
1057 nes_write32(nesdev->regs + NES_WQE_ALLOC, (counter << 24) | mgtvnic->mgt.qp_id);
1058 } while (wqe_count);
1059
1060 nes_write32(nesdev->regs + NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
1061 mgtvnic->mgt_cq.cq_number);
1062 nes_read32(nesdev->regs + NES_CQE_ALLOC);
1063
1064 mgt_vbase += mgt_mem_size;
1065 mgt_pbase += mgt_mem_size;
1066 nesvnic->mgtvnic[i] = mgtvnic++;
1067 }
1068 return 0;
1069}
1070
1071
1072void nes_destroy_mgt(struct nes_vnic *nesvnic)
1073{
1074 struct nes_device *nesdev = nesvnic->nesdev;
1075 struct nes_vnic_mgt *mgtvnic;
1076 struct nes_vnic_mgt *first_mgtvnic;
1077 unsigned long flags;
1078 struct nes_hw_cqp_wqe *cqp_wqe;
1079 u32 cqp_head;
1080 struct sk_buff *rx_skb;
1081 int i;
1082 int ret;
1083
1084 kthread_stop(nesvnic->mgt_thread);
1085
1086 /* Free remaining NIC receive buffers */
1087 first_mgtvnic = nesvnic->mgtvnic[0];
1088 for (i = 0; i < NES_MGT_QP_COUNT; i++) {
1089 mgtvnic = nesvnic->mgtvnic[i];
1090 if (mgtvnic == NULL)
1091 continue;
1092
1093 while (mgtvnic->mgt.rq_head != mgtvnic->mgt.rq_tail) {
1094 rx_skb = mgtvnic->mgt.rx_skb[mgtvnic->mgt.rq_tail];
1095 nes_mgt_free_skb(nesdev, rx_skb, PCI_DMA_FROMDEVICE);
1096 mgtvnic->mgt.rq_tail++;
1097 mgtvnic->mgt.rq_tail &= (mgtvnic->mgt.rq_size - 1);
1098 }
1099
1100 spin_lock_irqsave(&nesdev->cqp.lock, flags);
1101
1102 /* Destroy NIC QP */
1103 cqp_head = nesdev->cqp.sq_head;
1104 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
1105 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
1106
1107 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
1108 (NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_NIC));
1109 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
1110 mgtvnic->mgt.qp_id);
1111
1112 if (++cqp_head >= nesdev->cqp.sq_size)
1113 cqp_head = 0;
1114
1115 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
1116
1117 /* Destroy NIC CQ */
1118 nes_fill_init_cqp_wqe(cqp_wqe, nesdev);
1119 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_OPCODE_IDX,
1120 (NES_CQP_DESTROY_CQ | ((u32)mgtvnic->mgt_cq.cq_size << 16)));
1121 set_wqe_32bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_ID_IDX,
1122 (mgtvnic->mgt_cq.cq_number | ((u32)nesdev->ceq_index << 16)));
1123
1124 if (++cqp_head >= nesdev->cqp.sq_size)
1125 cqp_head = 0;
1126
1127 nesdev->cqp.sq_head = cqp_head;
1128 barrier();
1129
1130 /* Ring doorbell (2 WQEs) */
1131 nes_write32(nesdev->regs + NES_WQE_ALLOC, 0x02800000 | nesdev->cqp.qp_id);
1132
1133 spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
1134 nes_debug(NES_DBG_SHUTDOWN, "Waiting for CQP, cqp_head=%u, cqp.sq_head=%u,"
1135 " cqp.sq_tail=%u, cqp.sq_size=%u\n",
1136 cqp_head, nesdev->cqp.sq_head,
1137 nesdev->cqp.sq_tail, nesdev->cqp.sq_size);
1138
1139 ret = wait_event_timeout(nesdev->cqp.waitq, (nesdev->cqp.sq_tail == cqp_head),
1140 NES_EVENT_TIMEOUT);
1141
1142 nes_debug(NES_DBG_SHUTDOWN, "Destroy MGT QP returned, wait_event_timeout ret = %u, cqp_head=%u,"
1143 " cqp.sq_head=%u, cqp.sq_tail=%u\n",
1144 ret, cqp_head, nesdev->cqp.sq_head, nesdev->cqp.sq_tail);
1145 if (!ret)
1146 nes_debug(NES_DBG_SHUTDOWN, "MGT QP%u destroy timeout expired\n",
1147 mgtvnic->mgt.qp_id);
1148
1149 nesvnic->mgtvnic[i] = NULL;
1150 }
1151
1152 if (nesvnic->mgt_vbase) {
1153 pci_free_consistent(nesdev->pcidev, nesvnic->mgt_mem_size, nesvnic->mgt_vbase,
1154 nesvnic->mgt_pbase);
1155 nesvnic->mgt_vbase = NULL;
1156 nesvnic->mgt_pbase = 0;
1157 }
1158
1159 kfree(first_mgtvnic);
1160}
diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h
deleted file mode 100644
index 4f7f701c4a8..00000000000
--- a/drivers/infiniband/hw/nes/nes_mgt.h
+++ /dev/null
@@ -1,97 +0,0 @@
1/*
2* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved.
3*
4* This software is available to you under a choice of one of two
5* licenses. You may choose to be licensed under the terms of the GNU
6* General Public License (GPL) Version 2, available from the file
7* COPYING in the main directory of this source tree, or the
8* OpenIB.org BSD license below:
9*
10* Redistribution and use in source and binary forms, with or
11* without modification, are permitted provided that the following
12* conditions are met:
13*
14* - Redistributions of source code must retain the above
15* copyright notice, this list of conditions and the following
16* disclaimer.
17*
18* - Redistributions in binary form must reproduce the above
19* copyright notice, this list of conditions and the following
20* disclaimer in the documentation and/or other materials
21* provided with the distribution.
22*
23* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30* SOFTWARE.
31*/
32
33#ifndef __NES_MGT_H
34#define __NES_MGT_H
35
36#define MPA_FRAMING 6 /* length is 2 bytes, crc is 4 bytes */
37
38int nes_init_mgt_qp(struct nes_device *nesdev, struct net_device *netdev, struct nes_vnic *nesvnic);
39void nes_queue_mgt_skbs(struct sk_buff *skb, struct nes_vnic *nesvnic, struct nes_qp *nesqp);
40void nes_destroy_mgt(struct nes_vnic *nesvnic);
41void nes_destroy_pau_qp(struct nes_device *nesdev, struct nes_qp *nesqp);
42
43struct nes_hw_mgt {
44 struct nes_hw_nic_rq_wqe *rq_vbase; /* virtual address of rq */
45 dma_addr_t rq_pbase; /* PCI memory for host rings */
46 struct sk_buff *rx_skb[NES_NIC_WQ_SIZE];
47 u16 qp_id;
48 u16 sq_head;
49 u16 rq_head;
50 u16 rq_tail;
51 u16 rq_size;
52 u8 replenishing_rq;
53 u8 reserved;
54 spinlock_t rq_lock;
55};
56
57struct nes_vnic_mgt {
58 struct nes_vnic *nesvnic;
59 struct nes_hw_mgt mgt;
60 struct nes_hw_nic_cq mgt_cq;
61 atomic_t rx_skbs_needed;
62 struct timer_list rq_wqes_timer;
63 atomic_t rx_skb_timer_running;
64};
65
66#define MAX_FPDU_FRAGS 4
67struct pau_fpdu_frag {
68 struct sk_buff *skb;
69 u64 physaddr;
70 u32 frag_len;
71 bool cmplt;
72};
73
74struct pau_fpdu_info {
75 struct nes_qp *nesqp;
76 struct nes_cqp_request *cqp_request;
77 void *hdr_vbase;
78 dma_addr_t hdr_pbase;
79 int hdr_len;
80 u16 data_len;
81 u16 frag_cnt;
82 struct pau_fpdu_frag frags[MAX_FPDU_FRAGS];
83};
84
85enum pau_qh_state {
86 PAU_DEL_QH,
87 PAU_ADD_LB_QH,
88 PAU_READY
89};
90
91struct pau_qh_chg {
92 struct nes_device *nesdev;
93 struct nes_vnic *nesvnic;
94 struct nes_qp *nesqp;
95};
96
97#endif /* __NES_MGT_H */
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 9542e1644a5..9d7ffebff21 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -243,9 +243,10 @@ static int nes_netdev_open(struct net_device *netdev)
243 243
244 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags); 244 spin_lock_irqsave(&nesdev->nesadapter->phy_lock, flags);
245 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) { 245 if (nesdev->nesadapter->phy_type[nesdev->mac_index] == NES_PHY_TYPE_SFP_D) {
246 if (nesdev->link_recheck)
247 cancel_delayed_work(&nesdev->work);
246 nesdev->link_recheck = 1; 248 nesdev->link_recheck = 1;
247 mod_delayed_work(system_wq, &nesdev->work, 249 schedule_delayed_work(&nesdev->work, NES_LINK_RECHECK_DELAY);
248 NES_LINK_RECHECK_DELAY);
249 } 250 }
250 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags); 251 spin_unlock_irqrestore(&nesdev->nesadapter->phy_lock, flags);
251 252
@@ -384,20 +385,24 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
384 /* bump past the vlan tag */ 385 /* bump past the vlan tag */
385 wqe_fragment_length++; 386 wqe_fragment_length++;
386 /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */ 387 /* wqe_fragment_address = (u64 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_FRAG0_LOW_IDX]; */
387 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
388 388
389 if (skb->ip_summed == CHECKSUM_PARTIAL) { 389 if (skb->ip_summed == CHECKSUM_PARTIAL) {
390 if (skb_is_gso(skb)) { 390 tcph = tcp_hdr(skb);
391 tcph = tcp_hdr(skb); 391 if (1) {
392 /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... is_gso = %u seg size = %u\n", 392 if (skb_is_gso(skb)) {
393 netdev->name, skb_is_gso(skb), skb_shinfo(skb)->gso_size); */ 393 /* nes_debug(NES_DBG_NIC_TX, "%s: TSO request... seg size = %u\n",
394 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE | (u16)skb_shinfo(skb)->gso_size; 394 netdev->name, skb_is_gso(skb)); */
395 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX, 395 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE |
396 ((u32)tcph->doff) | 396 NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
397 (((u32)(((unsigned char *)tcph) - skb->data)) << 4)); 397 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_LSO_INFO_IDX,
398 ((u32)tcph->doff) |
399 (((u32)(((unsigned char *)tcph) - skb->data)) << 4));
400 } else {
401 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION;
402 }
398 } 403 }
399 } else { /* CHECKSUM_HW */ 404 } else { /* CHECKSUM_HW */
400 wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM; 405 wqe_misc |= NES_NIC_SQ_WQE_DISABLE_CHKSUM | NES_NIC_SQ_WQE_COMPLETION;
401 } 406 }
402 407
403 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX, 408 set_wqe_32bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_TOTAL_LENGTH_IDX,
@@ -436,13 +441,13 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
436 nesnic->tx_skb[nesnic->sq_head] = skb; 441 nesnic->tx_skb[nesnic->sq_head] = skb;
437 for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags; 442 for (skb_fragment_index = 0; skb_fragment_index < skb_shinfo(skb)->nr_frags;
438 skb_fragment_index++) { 443 skb_fragment_index++) {
439 skb_frag_t *frag = 444 bus_address = pci_map_page( nesdev->pcidev,
440 &skb_shinfo(skb)->frags[skb_fragment_index]; 445 skb_shinfo(skb)->frags[skb_fragment_index].page,
441 bus_address = skb_frag_dma_map(&nesdev->pcidev->dev, 446 skb_shinfo(skb)->frags[skb_fragment_index].page_offset,
442 frag, 0, skb_frag_size(frag), 447 skb_shinfo(skb)->frags[skb_fragment_index].size,
443 DMA_TO_DEVICE); 448 PCI_DMA_TODEVICE);
444 wqe_fragment_length[wqe_fragment_index] = 449 wqe_fragment_length[wqe_fragment_index] =
445 cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index])); 450 cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size);
446 set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), 451 set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
447 bus_address); 452 bus_address);
448 wqe_fragment_index++; 453 wqe_fragment_index++;
@@ -556,12 +561,11 @@ tso_sq_no_longer_full:
556 /* Map all the buffers */ 561 /* Map all the buffers */
557 for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags; 562 for (tso_frag_count=0; tso_frag_count < skb_shinfo(skb)->nr_frags;
558 tso_frag_count++) { 563 tso_frag_count++) {
559 skb_frag_t *frag = 564 tso_bus_address[tso_frag_count] = pci_map_page( nesdev->pcidev,
560 &skb_shinfo(skb)->frags[tso_frag_count]; 565 skb_shinfo(skb)->frags[tso_frag_count].page,
561 tso_bus_address[tso_frag_count] = 566 skb_shinfo(skb)->frags[tso_frag_count].page_offset,
562 skb_frag_dma_map(&nesdev->pcidev->dev, 567 skb_shinfo(skb)->frags[tso_frag_count].size,
563 frag, 0, skb_frag_size(frag), 568 PCI_DMA_TODEVICE);
564 DMA_TO_DEVICE);
565 } 569 }
566 570
567 tso_frag_index = 0; 571 tso_frag_index = 0;
@@ -592,10 +596,10 @@ tso_sq_no_longer_full:
592 nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n", 596 nes_debug(NES_DBG_NIC_TX, "ERROR: SKB header too big, headlen=%u, FIRST_FRAG_SIZE=%u\n",
593 original_first_length, NES_FIRST_FRAG_SIZE); 597 original_first_length, NES_FIRST_FRAG_SIZE);
594 nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u," 598 nes_debug(NES_DBG_NIC_TX, "%s Request to tx NIC packet length %u, headlen %u,"
595 " (%u frags), is_gso = %u tso_size=%u\n", 599 " (%u frags), tso_size=%u\n",
596 netdev->name, 600 netdev->name,
597 skb->len, skb_headlen(skb), 601 skb->len, skb_headlen(skb),
598 skb_shinfo(skb)->nr_frags, skb_is_gso(skb), skb_shinfo(skb)->gso_size); 602 skb_shinfo(skb)->nr_frags, skb_is_gso(skb));
599 } 603 }
600 memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer, 604 memcpy(&nesnic->first_frag_vbase[nesnic->sq_head].buffer,
601 skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE), 605 skb->data, min(((unsigned int)NES_FIRST_FRAG_SIZE),
@@ -632,11 +636,11 @@ tso_sq_no_longer_full:
632 } 636 }
633 while (wqe_fragment_index < 5) { 637 while (wqe_fragment_index < 5) {
634 wqe_fragment_length[wqe_fragment_index] = 638 wqe_fragment_length[wqe_fragment_index] =
635 cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index])); 639 cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size);
636 set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), 640 set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index),
637 (u64)tso_bus_address[tso_frag_index]); 641 (u64)tso_bus_address[tso_frag_index]);
638 wqe_fragment_index++; 642 wqe_fragment_index++;
639 tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]); 643 tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size;
640 if (wqe_fragment_index < 5) 644 if (wqe_fragment_index < 5)
641 wqe_fragment_length[wqe_fragment_index] = 0; 645 wqe_fragment_length[wqe_fragment_index] = 0;
642 if (tso_frag_index == tso_frag_count) 646 if (tso_frag_index == tso_frag_count)
@@ -647,8 +651,8 @@ tso_sq_no_longer_full:
647 } else { 651 } else {
648 nesnic->tx_skb[nesnic->sq_head] = NULL; 652 nesnic->tx_skb[nesnic->sq_head] = NULL;
649 } 653 }
650 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_shinfo(skb)->gso_size; 654 wqe_misc |= NES_NIC_SQ_WQE_COMPLETION | (u16)skb_is_gso(skb);
651 if ((tso_wqe_length + original_first_length) > skb_shinfo(skb)->gso_size) { 655 if ((tso_wqe_length + original_first_length) > skb_is_gso(skb)) {
652 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE; 656 wqe_misc |= NES_NIC_SQ_WQE_LSO_ENABLE;
653 } else { 657 } else {
654 iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset); 658 iph->tot_len = htons(tso_wqe_length + original_first_length - nhoffset);
@@ -944,13 +948,12 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
944 addr, 948 addr,
945 perfect_filter_register_address+(mc_index * 8), 949 perfect_filter_register_address+(mc_index * 8),
946 mc_nic_index); 950 mc_nic_index);
947 macaddr_high = ((u8) addr[0]) << 8; 951 macaddr_high = ((u16) addr[0]) << 8;
948 macaddr_high += (u8) addr[1]; 952 macaddr_high += (u16) addr[1];
949 macaddr_low = ((u8) addr[2]) << 24; 953 macaddr_low = ((u32) addr[2]) << 24;
950 macaddr_low += ((u8) addr[3]) << 16; 954 macaddr_low += ((u32) addr[3]) << 16;
951 macaddr_low += ((u8) addr[4]) << 8; 955 macaddr_low += ((u32) addr[4]) << 8;
952 macaddr_low += (u8) addr[5]; 956 macaddr_low += (u32) addr[5];
953
954 nes_write_indexed(nesdev, 957 nes_write_indexed(nesdev,
955 perfect_filter_register_address+(mc_index * 8), 958 perfect_filter_register_address+(mc_index * 8),
956 macaddr_low); 959 macaddr_low);
@@ -1087,8 +1090,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
1087 "LRO aggregated", 1090 "LRO aggregated",
1088 "LRO flushed", 1091 "LRO flushed",
1089 "LRO no_desc", 1092 "LRO no_desc",
1090 "PAU CreateQPs",
1091 "PAU DestroyQPs",
1092}; 1093};
1093#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset) 1094#define NES_ETHTOOL_STAT_COUNT ARRAY_SIZE(nes_ethtool_stringset)
1094 1095
@@ -1304,8 +1305,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
1304 target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated; 1305 target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated;
1305 target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed; 1306 target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed;
1306 target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc; 1307 target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc;
1307 target_stat_values[++index] = atomic_read(&pau_qps_created);
1308 target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
1309} 1308}
1310 1309
1311/** 1310/**
@@ -1585,7 +1584,7 @@ static const struct ethtool_ops nes_ethtool_ops = {
1585 .set_pauseparam = nes_netdev_set_pauseparam, 1584 .set_pauseparam = nes_netdev_set_pauseparam,
1586}; 1585};
1587 1586
1588static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) 1587static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, u32 features)
1589{ 1588{
1590 struct nes_adapter *nesadapter = nesdev->nesadapter; 1589 struct nes_adapter *nesadapter = nesdev->nesadapter;
1591 u32 u32temp; 1590 u32 u32temp;
@@ -1606,7 +1605,7 @@ static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev,
1606 spin_unlock_irqrestore(&nesadapter->phy_lock, flags); 1605 spin_unlock_irqrestore(&nesadapter->phy_lock, flags);
1607} 1606}
1608 1607
1609static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features) 1608static u32 nes_fix_features(struct net_device *netdev, u32 features)
1610{ 1609{
1611 /* 1610 /*
1612 * Since there is no support for separate rx/tx vlan accel 1611 * Since there is no support for separate rx/tx vlan accel
@@ -1620,7 +1619,7 @@ static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_feat
1620 return features; 1619 return features;
1621} 1620}
1622 1621
1623static int nes_set_features(struct net_device *netdev, netdev_features_t features) 1622static int nes_set_features(struct net_device *netdev, u32 features)
1624{ 1623{
1625 struct nes_vnic *nesvnic = netdev_priv(netdev); 1624 struct nes_vnic *nesvnic = netdev_priv(netdev);
1626 struct nes_device *nesdev = nesvnic->nesdev; 1625 struct nes_device *nesdev = nesvnic->nesdev;
@@ -1639,7 +1638,7 @@ static const struct net_device_ops nes_netdev_ops = {
1639 .ndo_get_stats = nes_netdev_get_stats, 1638 .ndo_get_stats = nes_netdev_get_stats,
1640 .ndo_tx_timeout = nes_netdev_tx_timeout, 1639 .ndo_tx_timeout = nes_netdev_tx_timeout,
1641 .ndo_set_mac_address = nes_netdev_set_mac_address, 1640 .ndo_set_mac_address = nes_netdev_set_mac_address,
1642 .ndo_set_rx_mode = nes_netdev_set_multicast_list, 1641 .ndo_set_multicast_list = nes_netdev_set_multicast_list,
1643 .ndo_change_mtu = nes_netdev_change_mtu, 1642 .ndo_change_mtu = nes_netdev_change_mtu,
1644 .ndo_validate_addr = eth_validate_addr, 1643 .ndo_validate_addr = eth_validate_addr,
1645 .ndo_fix_features = nes_fix_features, 1644 .ndo_fix_features = nes_fix_features,
@@ -1675,10 +1674,12 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1675 netdev->hard_header_len = ETH_HLEN; 1674 netdev->hard_header_len = ETH_HLEN;
1676 netdev->addr_len = ETH_ALEN; 1675 netdev->addr_len = ETH_ALEN;
1677 netdev->type = ARPHRD_ETHER; 1676 netdev->type = ARPHRD_ETHER;
1677 netdev->features = NETIF_F_HIGHDMA;
1678 netdev->netdev_ops = &nes_netdev_ops; 1678 netdev->netdev_ops = &nes_netdev_ops;
1679 netdev->ethtool_ops = &nes_ethtool_ops; 1679 netdev->ethtool_ops = &nes_ethtool_ops;
1680 netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128); 1680 netif_napi_add(netdev, &nesvnic->napi, nes_netdev_poll, 128);
1681 nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n"); 1681 nes_debug(NES_DBG_INIT, "Enabling VLAN Insert/Delete.\n");
1682 netdev->features |= NETIF_F_HW_VLAN_TX;
1682 1683
1683 /* Fill in the port structure */ 1684 /* Fill in the port structure */
1684 nesvnic->netdev = netdev; 1685 nesvnic->netdev = netdev;
@@ -1705,11 +1706,11 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
1705 netdev->dev_addr[5] = (u8)u64temp; 1706 netdev->dev_addr[5] = (u8)u64temp;
1706 memcpy(netdev->perm_addr, netdev->dev_addr, 6); 1707 memcpy(netdev->perm_addr, netdev->dev_addr, 6);
1707 1708
1708 netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM | NETIF_F_HW_VLAN_RX; 1709 netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
1710 NETIF_F_HW_VLAN_RX;
1709 if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV)) 1711 if ((nesvnic->logical_port < 2) || (nesdev->nesadapter->hw_rev != NE020_REV))
1710 netdev->hw_features |= NETIF_F_TSO; 1712 netdev->hw_features |= NETIF_F_TSO;
1711 1713 netdev->features |= netdev->hw_features;
1712 netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX;
1713 netdev->hw_features |= NETIF_F_LRO; 1714 netdev->hw_features |= NETIF_F_LRO;
1714 1715
1715 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d," 1716 nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h
index 4926de74448..71e133ab209 100644
--- a/drivers/infiniband/hw/nes/nes_user.h
+++ b/drivers/infiniband/hw/nes/nes_user.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Topspin Communications. All rights reserved. 3 * Copyright (c) 2005 Topspin Communications. All rights reserved.
4 * Copyright (c) 2005 Cisco Systems. All rights reserved. 4 * Copyright (c) 2005 Cisco Systems. All rights reserved.
5 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 5 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c
index 2042c0f2975..f9c417c6b3b 100644
--- a/drivers/infiniband/hw/nes/nes_utils.c
+++ b/drivers/infiniband/hw/nes/nes_utils.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -51,34 +51,13 @@
51 51
52#include "nes.h" 52#include "nes.h"
53 53
54
55
54static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); 56static u16 nes_read16_eeprom(void __iomem *addr, u16 offset);
55 57
56u32 mh_detected; 58u32 mh_detected;
57u32 mh_pauses_sent; 59u32 mh_pauses_sent;
58 60
59static u32 nes_set_pau(struct nes_device *nesdev)
60{
61 u32 ret = 0;
62 u32 counter;
63
64 nes_write_indexed(nesdev, NES_IDX_GPR2, NES_ENABLE_PAU);
65 nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
66
67 for (counter = 0; counter < NES_PAU_COUNTER; counter++) {
68 udelay(30);
69 if (!nes_read_indexed(nesdev, NES_IDX_GPR2)) {
70 printk(KERN_INFO PFX "PAU is supported.\n");
71 break;
72 }
73 nes_write_indexed(nesdev, NES_IDX_GPR_TRIGGER, 1);
74 }
75 if (counter == NES_PAU_COUNTER) {
76 printk(KERN_INFO PFX "PAU is not supported.\n");
77 return -EPERM;
78 }
79 return ret;
80}
81
82/** 61/**
83 * nes_read_eeprom_values - 62 * nes_read_eeprom_values -
84 */ 63 */
@@ -208,11 +187,6 @@ int nes_read_eeprom_values(struct nes_device *nesdev, struct nes_adapter *nesada
208 if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3)) 187 if (((major_ver == 3) && (minor_ver >= 16)) || (major_ver > 3))
209 nesadapter->send_term_ok = 1; 188 nesadapter->send_term_ok = 1;
210 189
211 if (nes_drv_opt & NES_DRV_OPT_ENABLE_PAU) {
212 if (!nes_set_pau(nesdev))
213 nesadapter->allow_unaligned_fpdus = 1;
214 }
215
216 nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) + 190 nesadapter->firmware_version = (((u32)(u8)(eeprom_data>>8)) << 16) +
217 (u32)((u8)eeprom_data); 191 (u32)((u8)eeprom_data);
218 192
@@ -620,7 +594,6 @@ void nes_put_cqp_request(struct nes_device *nesdev,
620 nes_free_cqp_request(nesdev, cqp_request); 594 nes_free_cqp_request(nesdev, cqp_request);
621} 595}
622 596
623
624/** 597/**
625 * nes_post_cqp_request 598 * nes_post_cqp_request
626 */ 599 */
@@ -631,8 +604,6 @@ void nes_post_cqp_request(struct nes_device *nesdev,
631 unsigned long flags; 604 unsigned long flags;
632 u32 cqp_head; 605 u32 cqp_head;
633 u64 u64temp; 606 u64 u64temp;
634 u32 opcode;
635 int ctx_index = NES_CQP_WQE_COMP_CTX_LOW_IDX;
636 607
637 spin_lock_irqsave(&nesdev->cqp.lock, flags); 608 spin_lock_irqsave(&nesdev->cqp.lock, flags);
638 609
@@ -643,20 +614,17 @@ void nes_post_cqp_request(struct nes_device *nesdev,
643 nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1; 614 nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
644 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head]; 615 cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
645 memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe)); 616 memcpy(cqp_wqe, &cqp_request->cqp_wqe, sizeof(*cqp_wqe));
646 opcode = le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX]);
647 if ((opcode & NES_CQP_OPCODE_MASK) == NES_CQP_DOWNLOAD_SEGMENT)
648 ctx_index = NES_CQP_WQE_DL_COMP_CTX_LOW_IDX;
649 barrier(); 617 barrier();
650 u64temp = (unsigned long)cqp_request; 618 u64temp = (unsigned long)cqp_request;
651 set_wqe_64bit_value(cqp_wqe->wqe_words, ctx_index, u64temp); 619 set_wqe_64bit_value(cqp_wqe->wqe_words, NES_CQP_WQE_COMP_SCRATCH_LOW_IDX,
620 u64temp);
652 nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ," 621 nes_debug(NES_DBG_CQP, "CQP request (opcode 0x%02X), line 1 = 0x%08X put on CQPs SQ,"
653 " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u," 622 " request = %p, cqp_head = %u, cqp_tail = %u, cqp_size = %u,"
654 " waiting = %d, refcount = %d.\n", 623 " waiting = %d, refcount = %d.\n",
655 opcode & NES_CQP_OPCODE_MASK, 624 le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX])&0x3f,
656 le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request, 625 le32_to_cpu(cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX]), cqp_request,
657 nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size, 626 nesdev->cqp.sq_head, nesdev->cqp.sq_tail, nesdev->cqp.sq_size,
658 cqp_request->waiting, atomic_read(&cqp_request->refcount)); 627 cqp_request->waiting, atomic_read(&cqp_request->refcount));
659
660 barrier(); 628 barrier();
661 629
662 /* Ring doorbell (1 WQEs) */ 630 /* Ring doorbell (1 WQEs) */
@@ -677,6 +645,7 @@ void nes_post_cqp_request(struct nes_device *nesdev,
677 return; 645 return;
678} 646}
679 647
648
680/** 649/**
681 * nes_arp_table 650 * nes_arp_table
682 */ 651 */
@@ -699,7 +668,7 @@ int nes_arp_table(struct nes_device *nesdev, u32 ip_addr, u8 *mac_addr, u32 acti
699 668
700 arp_index = 0; 669 arp_index = 0;
701 err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps, 670 err = nes_alloc_resource(nesadapter, nesadapter->allocated_arps,
702 nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index, NES_RESOURCE_ARP); 671 nesadapter->arp_table_size, (u32 *)&arp_index, &nesadapter->next_arp_index);
703 if (err) { 672 if (err) {
704 nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err); 673 nes_debug(NES_DBG_NETDEV, "nes_alloc_resource returned error = %u\n", err);
705 return err; 674 return err;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 07e4fbad987..9f2f7d4b119 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * 3 *
4 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -80,7 +80,7 @@ static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd) {
80 next_stag_index %= nesadapter->max_mr; 80 next_stag_index %= nesadapter->max_mr;
81 81
82 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 82 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
83 nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_MW); 83 nesadapter->max_mr, &stag_index, &next_stag_index);
84 if (ret) { 84 if (ret) {
85 return ERR_PTR(ret); 85 return ERR_PTR(ret);
86 } 86 }
@@ -404,7 +404,7 @@ static struct ib_mr *nes_alloc_fast_reg_mr(struct ib_pd *ibpd, int max_page_list
404 404
405 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 405 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
406 nesadapter->max_mr, &stag_index, 406 nesadapter->max_mr, &stag_index,
407 &next_stag_index, NES_RESOURCE_FAST_MR); 407 &next_stag_index);
408 if (err) 408 if (err)
409 return ERR_PTR(err); 409 return ERR_PTR(err);
410 410
@@ -597,7 +597,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
597 props->pkey_tbl_len = 1; 597 props->pkey_tbl_len = 1;
598 props->qkey_viol_cntr = 0; 598 props->qkey_viol_cntr = 0;
599 props->active_width = IB_WIDTH_4X; 599 props->active_width = IB_WIDTH_4X;
600 props->active_speed = IB_SPEED_SDR; 600 props->active_speed = 1;
601 props->max_msg_sz = 0x80000000; 601 props->max_msg_sz = 0x80000000;
602 602
603 return 0; 603 return 0;
@@ -780,7 +780,7 @@ static struct ib_pd *nes_alloc_pd(struct ib_device *ibdev,
780 netdev_refcnt_read(nesvnic->netdev)); 780 netdev_refcnt_read(nesvnic->netdev));
781 781
782 err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds, 782 err = nes_alloc_resource(nesadapter, nesadapter->allocated_pds,
783 nesadapter->max_pd, &pd_num, &nesadapter->next_pd, NES_RESOURCE_PD); 783 nesadapter->max_pd, &pd_num, &nesadapter->next_pd);
784 if (err) { 784 if (err) {
785 return ERR_PTR(err); 785 return ERR_PTR(err);
786 } 786 }
@@ -1157,7 +1157,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1157 nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size); 1157 nes_debug(NES_DBG_QP, "RQ size=%u, SQ Size=%u\n", rq_size, sq_size);
1158 1158
1159 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps, 1159 ret = nes_alloc_resource(nesadapter, nesadapter->allocated_qps,
1160 nesadapter->max_qp, &qp_num, &nesadapter->next_qp, NES_RESOURCE_QP); 1160 nesadapter->max_qp, &qp_num, &nesadapter->next_qp);
1161 if (ret) { 1161 if (ret) {
1162 return ERR_PTR(ret); 1162 return ERR_PTR(ret);
1163 } 1163 }
@@ -1404,9 +1404,6 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1404 } 1404 }
1405 1405
1406 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR); 1406 nesqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR);
1407 init_timer(&nesqp->terminate_timer);
1408 nesqp->terminate_timer.function = nes_terminate_timeout;
1409 nesqp->terminate_timer.data = (unsigned long)nesqp;
1410 1407
1411 /* update the QP table */ 1408 /* update the QP table */
1412 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp; 1409 nesdev->nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = nesqp;
@@ -1416,6 +1413,7 @@ static struct ib_qp *nes_create_qp(struct ib_pd *ibpd,
1416 return &nesqp->ibqp; 1413 return &nesqp->ibqp;
1417} 1414}
1418 1415
1416
1419/** 1417/**
1420 * nes_clean_cq 1418 * nes_clean_cq
1421 */ 1419 */
@@ -1460,7 +1458,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
1460 struct ib_qp_attr attr; 1458 struct ib_qp_attr attr;
1461 struct iw_cm_id *cm_id; 1459 struct iw_cm_id *cm_id;
1462 struct iw_cm_event cm_event; 1460 struct iw_cm_event cm_event;
1463 int ret = 0; 1461 int ret;
1464 1462
1465 atomic_inc(&sw_qps_destroyed); 1463 atomic_inc(&sw_qps_destroyed);
1466 nesqp->destroyed = 1; 1464 nesqp->destroyed = 1;
@@ -1513,6 +1511,7 @@ static int nes_destroy_qp(struct ib_qp *ibqp)
1513 if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq)) 1511 if ((nesqp->nesrcq) && (nesqp->nesrcq != nesqp->nesscq))
1514 nes_clean_cq(nesqp, nesqp->nesrcq); 1512 nes_clean_cq(nesqp, nesqp->nesrcq);
1515 } 1513 }
1514
1516 nes_rem_ref(&nesqp->ibqp); 1515 nes_rem_ref(&nesqp->ibqp);
1517 return 0; 1516 return 0;
1518} 1517}
@@ -1548,7 +1547,7 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
1548 return ERR_PTR(-EINVAL); 1547 return ERR_PTR(-EINVAL);
1549 1548
1550 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs, 1549 err = nes_alloc_resource(nesadapter, nesadapter->allocated_cqs,
1551 nesadapter->max_cq, &cq_num, &nesadapter->next_cq, NES_RESOURCE_CQ); 1550 nesadapter->max_cq, &cq_num, &nesadapter->next_cq);
1552 if (err) { 1551 if (err) {
1553 return ERR_PTR(err); 1552 return ERR_PTR(err);
1554 } 1553 }
@@ -2131,7 +2130,7 @@ static struct ib_mr *nes_reg_phys_mr(struct ib_pd *ib_pd,
2131 return ERR_PTR(-EINVAL); 2130 return ERR_PTR(-EINVAL);
2132 2131
2133 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr, 2132 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, nesadapter->max_mr,
2134 &stag_index, &next_stag_index, NES_RESOURCE_PHYS_MR); 2133 &stag_index, &next_stag_index);
2135 if (err) { 2134 if (err) {
2136 return ERR_PTR(err); 2135 return ERR_PTR(err);
2137 } 2136 }
@@ -2339,10 +2338,8 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2339 2338
2340 skip_pages = ((u32)region->offset) >> 12; 2339 skip_pages = ((u32)region->offset) >> 12;
2341 2340
2342 if (ib_copy_from_udata(&req, udata, sizeof(req))) { 2341 if (ib_copy_from_udata(&req, udata, sizeof(req)))
2343 ib_umem_release(region);
2344 return ERR_PTR(-EFAULT); 2342 return ERR_PTR(-EFAULT);
2345 }
2346 nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type); 2343 nes_debug(NES_DBG_MR, "Memory Registration type = %08X.\n", req.reg_type);
2347 2344
2348 switch (req.reg_type) { 2345 switch (req.reg_type) {
@@ -2362,7 +2359,7 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2362 next_stag_index %= nesadapter->max_mr; 2359 next_stag_index %= nesadapter->max_mr;
2363 2360
2364 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs, 2361 err = nes_alloc_resource(nesadapter, nesadapter->allocated_mrs,
2365 nesadapter->max_mr, &stag_index, &next_stag_index, NES_RESOURCE_USER_MR); 2362 nesadapter->max_mr, &stag_index, &next_stag_index);
2366 if (err) { 2363 if (err) {
2367 ib_umem_release(region); 2364 ib_umem_release(region);
2368 return ERR_PTR(err); 2365 return ERR_PTR(err);
@@ -2561,11 +2558,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2561 return ibmr; 2558 return ibmr;
2562 case IWNES_MEMREG_TYPE_QP: 2559 case IWNES_MEMREG_TYPE_QP:
2563 case IWNES_MEMREG_TYPE_CQ: 2560 case IWNES_MEMREG_TYPE_CQ:
2564 if (!region->length) {
2565 nes_debug(NES_DBG_MR, "Unable to register zero length region for CQ\n");
2566 ib_umem_release(region);
2567 return ERR_PTR(-EINVAL);
2568 }
2569 nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL); 2561 nespbl = kzalloc(sizeof(*nespbl), GFP_KERNEL);
2570 if (!nespbl) { 2562 if (!nespbl) {
2571 nes_debug(NES_DBG_MR, "Unable to allocate PBL\n"); 2563 nes_debug(NES_DBG_MR, "Unable to allocate PBL\n");
@@ -2639,7 +2631,6 @@ static struct ib_mr *nes_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
2639 return &nesmr->ibmr; 2631 return &nesmr->ibmr;
2640 } 2632 }
2641 2633
2642 ib_umem_release(region);
2643 return ERR_PTR(-ENOSYS); 2634 return ERR_PTR(-ENOSYS);
2644} 2635}
2645 2636
@@ -3013,7 +3004,6 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3013 switch (nesqp->hw_iwarp_state) { 3004 switch (nesqp->hw_iwarp_state) {
3014 case NES_AEQE_IWARP_STATE_CLOSING: 3005 case NES_AEQE_IWARP_STATE_CLOSING:
3015 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING; 3006 next_iwarp_state = NES_CQP_QP_IWARP_STATE_CLOSING;
3016 break;
3017 case NES_AEQE_IWARP_STATE_TERMINATE: 3007 case NES_AEQE_IWARP_STATE_TERMINATE:
3018 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE; 3008 next_iwarp_state = NES_CQP_QP_IWARP_STATE_TERMINATE;
3019 break; 3009 break;
@@ -3076,9 +3066,18 @@ int nes_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
3076 } 3066 }
3077 3067
3078 nesqp->ibqp_state = attr->qp_state; 3068 nesqp->ibqp_state = attr->qp_state;
3079 nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK; 3069 if (((nesqp->iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) ==
3080 nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n", 3070 (u32)NES_CQP_QP_IWARP_STATE_RTS) &&
3081 nesqp->iwarp_state); 3071 ((next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK) >
3072 (u32)NES_CQP_QP_IWARP_STATE_RTS)) {
3073 nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
3074 nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
3075 nesqp->iwarp_state);
3076 } else {
3077 nesqp->iwarp_state = next_iwarp_state & NES_CQP_QP_IWARP_STATE_MASK;
3078 nes_debug(NES_DBG_MOD_QP, "Change nesqp->iwarp_state=%08x\n",
3079 nesqp->iwarp_state);
3080 }
3082 } 3081 }
3083 3082
3084 if (attr_mask & IB_QP_ACCESS_FLAGS) { 3083 if (attr_mask & IB_QP_ACCESS_FLAGS) {
@@ -3427,8 +3426,6 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr,
3427 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, 3426 NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX,
3428 ib_wr->wr.fast_reg.length); 3427 ib_wr->wr.fast_reg.length);
3429 set_wqe_32bit_value(wqe->wqe_words, 3428 set_wqe_32bit_value(wqe->wqe_words,
3430 NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0);
3431 set_wqe_32bit_value(wqe->wqe_words,
3432 NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, 3429 NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX,
3433 ib_wr->wr.fast_reg.rkey); 3430 ib_wr->wr.fast_reg.rkey);
3434 /* Set page size: */ 3431 /* Set page size: */
@@ -3725,7 +3722,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
3725 entry->opcode = IB_WC_SEND; 3722 entry->opcode = IB_WC_SEND;
3726 break; 3723 break;
3727 case NES_IWARP_SQ_OP_LOCINV: 3724 case NES_IWARP_SQ_OP_LOCINV:
3728 entry->opcode = IB_WC_LOCAL_INV; 3725 entry->opcode = IB_WR_LOCAL_INV;
3729 break; 3726 break;
3730 case NES_IWARP_SQ_OP_FAST_REG: 3727 case NES_IWARP_SQ_OP_FAST_REG:
3731 entry->opcode = IB_WC_FAST_REG_MR; 3728 entry->opcode = IB_WC_FAST_REG_MR;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h
index 0eff7c44d76..2df9993e0ca 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.h
+++ b/drivers/infiniband/hw/nes/nes_verbs.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved.
3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. 3 * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
4 * 4 *
5 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -139,8 +139,7 @@ struct nes_qp {
139 struct nes_cq *nesrcq; 139 struct nes_cq *nesrcq;
140 struct nes_pd *nespd; 140 struct nes_pd *nespd;
141 void *cm_node; /* handle of the node this QP is associated with */ 141 void *cm_node; /* handle of the node this QP is associated with */
142 void *ietf_frame; 142 struct ietf_mpa_frame *ietf_frame;
143 u8 ietf_frame_size;
144 dma_addr_t ietf_frame_pbase; 143 dma_addr_t ietf_frame_pbase;
145 struct ib_mr *lsmm_mr; 144 struct ib_mr *lsmm_mr;
146 struct nes_hw_qp hwqp; 145 struct nes_hw_qp hwqp;
@@ -155,7 +154,6 @@ struct nes_qp {
155 u32 mmap_sq_db_index; 154 u32 mmap_sq_db_index;
156 u32 mmap_rq_db_index; 155 u32 mmap_rq_db_index;
157 spinlock_t lock; 156 spinlock_t lock;
158 spinlock_t pau_lock;
159 struct nes_qp_context *nesqp_context; 157 struct nes_qp_context *nesqp_context;
160 dma_addr_t nesqp_context_pbase; 158 dma_addr_t nesqp_context_pbase;
161 void *pbl_vbase; 159 void *pbl_vbase;
@@ -163,8 +161,6 @@ struct nes_qp {
163 struct page *page; 161 struct page *page;
164 struct timer_list terminate_timer; 162 struct timer_list terminate_timer;
165 enum ib_event_type terminate_eventtype; 163 enum ib_event_type terminate_eventtype;
166 struct sk_buff_head pau_list;
167 u32 pau_rcv_nxt;
168 u16 active_conn:1; 164 u16 active_conn:1;
169 u16 skip_lsmm:1; 165 u16 skip_lsmm:1;
170 u16 user_mode:1; 166 u16 user_mode:1;
@@ -172,8 +168,7 @@ struct nes_qp {
172 u16 flush_issued:1; 168 u16 flush_issued:1;
173 u16 destroyed:1; 169 u16 destroyed:1;
174 u16 sig_all:1; 170 u16 sig_all:1;
175 u16 pau_mode:1; 171 u16 rsvd:9;
176 u16 rsvd:8;
177 u16 private_data_len; 172 u16 private_data_len;
178 u16 term_sq_flush_code; 173 u16 term_sq_flush_code;
179 u16 term_rq_flush_code; 174 u16 term_rq_flush_code;
@@ -181,8 +176,5 @@ struct nes_qp {
181 u8 hw_tcp_state; 176 u8 hw_tcp_state;
182 u8 term_flags; 177 u8 term_flags;
183 u8 sq_kmapped; 178 u8 sq_kmapped;
184 u8 pau_busy;
185 u8 pau_pending;
186 u8 pau_state;
187}; 179};
188#endif /* NES_VERBS_H */ 180#endif /* NES_VERBS_H */
diff --git a/drivers/infiniband/hw/ocrdma/Kconfig b/drivers/infiniband/hw/ocrdma/Kconfig
deleted file mode 100644
index b5b6056c851..00000000000
--- a/drivers/infiniband/hw/ocrdma/Kconfig
+++ /dev/null
@@ -1,8 +0,0 @@
1config INFINIBAND_OCRDMA
2 tristate "Emulex One Connect HCA support"
3 depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
4 select NET_VENDOR_EMULEX
5 select BE2NET
6 ---help---
7 This driver provides low-level InfiniBand over Ethernet
8 support for Emulex One Connect host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/ocrdma/Makefile b/drivers/infiniband/hw/ocrdma/Makefile
deleted file mode 100644
index 06a5bed12e4..00000000000
--- a/drivers/infiniband/hw/ocrdma/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1ccflags-y := -Idrivers/net/ethernet/emulex/benet
2
3obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma.o
4
5ocrdma-y := ocrdma_main.o ocrdma_verbs.o ocrdma_hw.o ocrdma_ah.o
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
deleted file mode 100644
index 48970af2367..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ /dev/null
@@ -1,393 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_H__
29#define __OCRDMA_H__
30
31#include <linux/mutex.h>
32#include <linux/list.h>
33#include <linux/spinlock.h>
34#include <linux/pci.h>
35
36#include <rdma/ib_verbs.h>
37#include <rdma/ib_user_verbs.h>
38
39#include <be_roce.h>
40#include "ocrdma_sli.h"
41
42#define OCRDMA_ROCE_DEV_VERSION "1.0.0"
43#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
44
45#define ocrdma_err(format, arg...) printk(KERN_ERR format, ##arg)
46
47#define OCRDMA_MAX_AH 512
48
49#define OCRDMA_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
50
51struct ocrdma_dev_attr {
52 u8 fw_ver[32];
53 u32 vendor_id;
54 u32 device_id;
55 u16 max_pd;
56 u16 max_cq;
57 u16 max_cqe;
58 u16 max_qp;
59 u16 max_wqe;
60 u16 max_rqe;
61 u32 max_inline_data;
62 int max_send_sge;
63 int max_recv_sge;
64 int max_srq_sge;
65 int max_mr;
66 u64 max_mr_size;
67 u32 max_num_mr_pbl;
68 int max_fmr;
69 int max_map_per_fmr;
70 int max_pages_per_frmr;
71 u16 max_ord_per_qp;
72 u16 max_ird_per_qp;
73
74 int device_cap_flags;
75 u8 cq_overflow_detect;
76 u8 srq_supported;
77
78 u32 wqe_size;
79 u32 rqe_size;
80 u32 ird_page_size;
81 u8 local_ca_ack_delay;
82 u8 ird;
83 u8 num_ird_pages;
84};
85
86struct ocrdma_pbl {
87 void *va;
88 dma_addr_t pa;
89};
90
91struct ocrdma_queue_info {
92 void *va;
93 dma_addr_t dma;
94 u32 size;
95 u16 len;
96 u16 entry_size; /* Size of an element in the queue */
97 u16 id; /* qid, where to ring the doorbell. */
98 u16 head, tail;
99 bool created;
100 atomic_t used; /* Number of valid elements in the queue */
101};
102
103struct ocrdma_eq {
104 struct ocrdma_queue_info q;
105 u32 vector;
106 int cq_cnt;
107 struct ocrdma_dev *dev;
108 char irq_name[32];
109};
110
111struct ocrdma_mq {
112 struct ocrdma_queue_info sq;
113 struct ocrdma_queue_info cq;
114 bool rearm_cq;
115};
116
117struct mqe_ctx {
118 struct mutex lock; /* for serializing mailbox commands on MQ */
119 wait_queue_head_t cmd_wait;
120 u32 tag;
121 u16 cqe_status;
122 u16 ext_status;
123 bool cmd_done;
124};
125
126struct ocrdma_dev {
127 struct ib_device ibdev;
128 struct ocrdma_dev_attr attr;
129
130 struct mutex dev_lock; /* provides syncronise access to device data */
131 spinlock_t flush_q_lock ____cacheline_aligned;
132
133 struct ocrdma_cq **cq_tbl;
134 struct ocrdma_qp **qp_tbl;
135
136 struct ocrdma_eq meq;
137 struct ocrdma_eq *qp_eq_tbl;
138 int eq_cnt;
139 u16 base_eqid;
140 u16 max_eq;
141
142 union ib_gid *sgid_tbl;
143 /* provided synchronization to sgid table for
144 * updating gid entries triggered by notifier.
145 */
146 spinlock_t sgid_lock;
147
148 int gsi_qp_created;
149 struct ocrdma_cq *gsi_sqcq;
150 struct ocrdma_cq *gsi_rqcq;
151
152 struct {
153 struct ocrdma_av *va;
154 dma_addr_t pa;
155 u32 size;
156 u32 num_ah;
157 /* provide synchronization for av
158 * entry allocations.
159 */
160 spinlock_t lock;
161 u32 ahid;
162 struct ocrdma_pbl pbl;
163 } av_tbl;
164
165 void *mbx_cmd;
166 struct ocrdma_mq mq;
167 struct mqe_ctx mqe_ctx;
168
169 struct be_dev_info nic_info;
170
171 struct list_head entry;
172 struct rcu_head rcu;
173 int id;
174};
175
176struct ocrdma_cq {
177 struct ib_cq ibcq;
178 struct ocrdma_dev *dev;
179 struct ocrdma_cqe *va;
180 u32 phase;
181 u32 getp; /* pointer to pending wrs to
182 * return to stack, wrap arounds
183 * at max_hw_cqe
184 */
185 u32 max_hw_cqe;
186 bool phase_change;
187 bool armed, solicited;
188 bool arm_needed;
189
190 spinlock_t cq_lock ____cacheline_aligned; /* provide synchronization
191 * to cq polling
192 */
193 /* syncronizes cq completion handler invoked from multiple context */
194 spinlock_t comp_handler_lock ____cacheline_aligned;
195 u16 id;
196 u16 eqn;
197
198 struct ocrdma_ucontext *ucontext;
199 dma_addr_t pa;
200 u32 len;
201 atomic_t use_cnt;
202
203 /* head of all qp's sq and rq for which cqes need to be flushed
204 * by the software.
205 */
206 struct list_head sq_head, rq_head;
207};
208
209struct ocrdma_pd {
210 struct ib_pd ibpd;
211 struct ocrdma_dev *dev;
212 struct ocrdma_ucontext *uctx;
213 atomic_t use_cnt;
214 u32 id;
215 int num_dpp_qp;
216 u32 dpp_page;
217 bool dpp_enabled;
218};
219
220struct ocrdma_ah {
221 struct ib_ah ibah;
222 struct ocrdma_dev *dev;
223 struct ocrdma_av *av;
224 u16 sgid_index;
225 u32 id;
226};
227
228struct ocrdma_qp_hwq_info {
229 u8 *va; /* virtual address */
230 u32 max_sges;
231 u32 head, tail;
232 u32 entry_size;
233 u32 max_cnt;
234 u32 max_wqe_idx;
235 u16 dbid; /* qid, where to ring the doorbell. */
236 u32 len;
237 dma_addr_t pa;
238};
239
240struct ocrdma_srq {
241 struct ib_srq ibsrq;
242 struct ocrdma_dev *dev;
243 u8 __iomem *db;
244 /* provide synchronization to multiple context(s) posting rqe */
245 spinlock_t q_lock ____cacheline_aligned;
246
247 struct ocrdma_qp_hwq_info rq;
248 struct ocrdma_pd *pd;
249 atomic_t use_cnt;
250 u32 id;
251 u64 *rqe_wr_id_tbl;
252 u32 *idx_bit_fields;
253 u32 bit_fields_len;
254};
255
256struct ocrdma_qp {
257 struct ib_qp ibqp;
258 struct ocrdma_dev *dev;
259
260 u8 __iomem *sq_db;
261 /* provide synchronization to multiple context(s) posting wqe, rqe */
262 spinlock_t q_lock ____cacheline_aligned;
263 struct ocrdma_qp_hwq_info sq;
264 struct {
265 uint64_t wrid;
266 uint16_t dpp_wqe_idx;
267 uint16_t dpp_wqe;
268 uint8_t signaled;
269 uint8_t rsvd[3];
270 } *wqe_wr_id_tbl;
271 u32 max_inline_data;
272 struct ocrdma_cq *sq_cq;
273 /* list maintained per CQ to flush SQ errors */
274 struct list_head sq_entry;
275
276 u8 __iomem *rq_db;
277 struct ocrdma_qp_hwq_info rq;
278 u64 *rqe_wr_id_tbl;
279 struct ocrdma_cq *rq_cq;
280 struct ocrdma_srq *srq;
281 /* list maintained per CQ to flush RQ errors */
282 struct list_head rq_entry;
283
284 enum ocrdma_qp_state state; /* QP state */
285 int cap_flags;
286 u32 max_ord, max_ird;
287
288 u32 id;
289 struct ocrdma_pd *pd;
290
291 enum ib_qp_type qp_type;
292
293 int sgid_idx;
294 u32 qkey;
295 bool dpp_enabled;
296 u8 *ird_q_va;
297};
298
299#define OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp) \
300 (((qp->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) && \
301 (qp->id < 64)) ? 24 : 16)
302
303struct ocrdma_hw_mr {
304 struct ocrdma_dev *dev;
305 u32 lkey;
306 u8 fr_mr;
307 u8 remote_atomic;
308 u8 remote_rd;
309 u8 remote_wr;
310 u8 local_rd;
311 u8 local_wr;
312 u8 mw_bind;
313 u8 rsvd;
314 u64 len;
315 struct ocrdma_pbl *pbl_table;
316 u32 num_pbls;
317 u32 num_pbes;
318 u32 pbl_size;
319 u32 pbe_size;
320 u64 fbo;
321 u64 va;
322};
323
324struct ocrdma_mr {
325 struct ib_mr ibmr;
326 struct ib_umem *umem;
327 struct ocrdma_hw_mr hwmr;
328 struct ocrdma_pd *pd;
329};
330
331struct ocrdma_ucontext {
332 struct ib_ucontext ibucontext;
333 struct ocrdma_dev *dev;
334
335 struct list_head mm_head;
336 struct mutex mm_list_lock; /* protects list entries of mm type */
337 struct {
338 u32 *va;
339 dma_addr_t pa;
340 u32 len;
341 } ah_tbl;
342};
343
344struct ocrdma_mm {
345 struct {
346 u64 phy_addr;
347 unsigned long len;
348 } key;
349 struct list_head entry;
350};
351
352static inline struct ocrdma_dev *get_ocrdma_dev(struct ib_device *ibdev)
353{
354 return container_of(ibdev, struct ocrdma_dev, ibdev);
355}
356
357static inline struct ocrdma_ucontext *get_ocrdma_ucontext(struct ib_ucontext
358 *ibucontext)
359{
360 return container_of(ibucontext, struct ocrdma_ucontext, ibucontext);
361}
362
363static inline struct ocrdma_pd *get_ocrdma_pd(struct ib_pd *ibpd)
364{
365 return container_of(ibpd, struct ocrdma_pd, ibpd);
366}
367
368static inline struct ocrdma_cq *get_ocrdma_cq(struct ib_cq *ibcq)
369{
370 return container_of(ibcq, struct ocrdma_cq, ibcq);
371}
372
373static inline struct ocrdma_qp *get_ocrdma_qp(struct ib_qp *ibqp)
374{
375 return container_of(ibqp, struct ocrdma_qp, ibqp);
376}
377
378static inline struct ocrdma_mr *get_ocrdma_mr(struct ib_mr *ibmr)
379{
380 return container_of(ibmr, struct ocrdma_mr, ibmr);
381}
382
383static inline struct ocrdma_ah *get_ocrdma_ah(struct ib_ah *ibah)
384{
385 return container_of(ibah, struct ocrdma_ah, ibah);
386}
387
388static inline struct ocrdma_srq *get_ocrdma_srq(struct ib_srq *ibsrq)
389{
390 return container_of(ibsrq, struct ocrdma_srq, ibsrq);
391}
392
393#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
deleted file mode 100644
index 517ab20b727..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h
+++ /dev/null
@@ -1,131 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_ABI_H__
29#define __OCRDMA_ABI_H__
30
31struct ocrdma_alloc_ucontext_resp {
32 u32 dev_id;
33 u32 wqe_size;
34 u32 max_inline_data;
35 u32 dpp_wqe_size;
36 u64 ah_tbl_page;
37 u32 ah_tbl_len;
38 u32 rsvd;
39 u8 fw_ver[32];
40 u32 rqe_size;
41 u64 rsvd1;
42} __packed;
43
44/* user kernel communication data structures. */
45struct ocrdma_alloc_pd_ureq {
46 u64 rsvd1;
47} __packed;
48
49struct ocrdma_alloc_pd_uresp {
50 u32 id;
51 u32 dpp_enabled;
52 u32 dpp_page_addr_hi;
53 u32 dpp_page_addr_lo;
54 u64 rsvd1;
55} __packed;
56
57struct ocrdma_create_cq_ureq {
58 u32 dpp_cq;
59 u32 rsvd;
60} __packed;
61
62#define MAX_CQ_PAGES 8
63struct ocrdma_create_cq_uresp {
64 u32 cq_id;
65 u32 page_size;
66 u32 num_pages;
67 u32 max_hw_cqe;
68 u64 page_addr[MAX_CQ_PAGES];
69 u64 db_page_addr;
70 u32 db_page_size;
71 u32 phase_change;
72 u64 rsvd1;
73 u64 rsvd2;
74} __packed;
75
76#define MAX_QP_PAGES 8
77#define MAX_UD_AV_PAGES 8
78
79struct ocrdma_create_qp_ureq {
80 u8 enable_dpp_cq;
81 u8 rsvd;
82 u16 dpp_cq_id;
83 u32 rsvd1;
84};
85
86struct ocrdma_create_qp_uresp {
87 u16 qp_id;
88 u16 sq_dbid;
89 u16 rq_dbid;
90 u16 resv0;
91 u32 sq_page_size;
92 u32 rq_page_size;
93 u32 num_sq_pages;
94 u32 num_rq_pages;
95 u64 sq_page_addr[MAX_QP_PAGES];
96 u64 rq_page_addr[MAX_QP_PAGES];
97 u64 db_page_addr;
98 u32 db_page_size;
99 u32 dpp_credit;
100 u32 dpp_offset;
101 u32 rsvd1;
102 u32 num_wqe_allocated;
103 u32 num_rqe_allocated;
104 u32 db_sq_offset;
105 u32 db_rq_offset;
106 u32 db_shift;
107 u64 rsvd2;
108 u64 rsvd3;
109} __packed;
110
111struct ocrdma_create_srq_uresp {
112 u16 rq_dbid;
113 u16 resv0;
114 u32 resv1;
115
116 u32 rq_page_size;
117 u32 num_rq_pages;
118
119 u64 rq_page_addr[MAX_QP_PAGES];
120 u64 db_page_addr;
121
122 u32 db_page_size;
123 u32 num_rqe_allocated;
124 u32 db_rq_offset;
125 u32 db_shift;
126
127 u64 rsvd2;
128 u64 rsvd3;
129} __packed;
130
131#endif /* __OCRDMA_ABI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
deleted file mode 100644
index a877a8ed790..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#include <net/neighbour.h>
29#include <net/netevent.h>
30
31#include <rdma/ib_addr.h>
32#include <rdma/ib_cache.h>
33
34#include "ocrdma.h"
35#include "ocrdma_verbs.h"
36#include "ocrdma_ah.h"
37#include "ocrdma_hw.h"
38
39static inline int set_av_attr(struct ocrdma_ah *ah,
40 struct ib_ah_attr *attr, int pdid)
41{
42 int status = 0;
43 u16 vlan_tag; bool vlan_enabled = false;
44 struct ocrdma_dev *dev = ah->dev;
45 struct ocrdma_eth_vlan eth;
46 struct ocrdma_grh grh;
47 int eth_sz;
48
49 memset(&eth, 0, sizeof(eth));
50 memset(&grh, 0, sizeof(grh));
51
52 ah->sgid_index = attr->grh.sgid_index;
53
54 vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
55 if (vlan_tag && (vlan_tag < 0x1000)) {
56 eth.eth_type = cpu_to_be16(0x8100);
57 eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
58 vlan_tag |= (attr->sl & 7) << 13;
59 eth.vlan_tag = cpu_to_be16(vlan_tag);
60 eth_sz = sizeof(struct ocrdma_eth_vlan);
61 vlan_enabled = true;
62 } else {
63 eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
64 eth_sz = sizeof(struct ocrdma_eth_basic);
65 }
66 memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
67 status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
68 if (status)
69 return status;
70 status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
71 (union ib_gid *)&grh.sgid[0]);
72 if (status)
73 return status;
74
75 grh.tclass_flow = cpu_to_be32((6 << 28) |
76 (attr->grh.traffic_class << 24) |
77 attr->grh.flow_label);
78 /* 0x1b is next header value in GRH */
79 grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
80 (0x1b << 8) | attr->grh.hop_limit);
81
82 memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
83 memcpy(&ah->av->eth_hdr, &eth, eth_sz);
84 memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
85 if (vlan_enabled)
86 ah->av->valid |= OCRDMA_AV_VLAN_VALID;
87 return status;
88}
89
90struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
91{
92 u32 *ahid_addr;
93 int status;
94 struct ocrdma_ah *ah;
95 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
96 struct ocrdma_dev *dev = pd->dev;
97
98 if (!(attr->ah_flags & IB_AH_GRH))
99 return ERR_PTR(-EINVAL);
100
101 ah = kzalloc(sizeof *ah, GFP_ATOMIC);
102 if (!ah)
103 return ERR_PTR(-ENOMEM);
104 ah->dev = pd->dev;
105
106 status = ocrdma_alloc_av(dev, ah);
107 if (status)
108 goto av_err;
109 status = set_av_attr(ah, attr, pd->id);
110 if (status)
111 goto av_conf_err;
112
113 /* if pd is for the user process, pass the ah_id to user space */
114 if ((pd->uctx) && (pd->uctx->ah_tbl.va)) {
115 ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
116 *ahid_addr = ah->id;
117 }
118 return &ah->ibah;
119
120av_conf_err:
121 ocrdma_free_av(dev, ah);
122av_err:
123 kfree(ah);
124 return ERR_PTR(status);
125}
126
127int ocrdma_destroy_ah(struct ib_ah *ibah)
128{
129 struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
130 ocrdma_free_av(ah->dev, ah);
131 kfree(ah);
132 return 0;
133}
134
135int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
136{
137 struct ocrdma_ah *ah = get_ocrdma_ah(ibah);
138 struct ocrdma_av *av = ah->av;
139 struct ocrdma_grh *grh;
140 attr->ah_flags |= IB_AH_GRH;
141 if (ah->av->valid & Bit(1)) {
142 grh = (struct ocrdma_grh *)((u8 *)ah->av +
143 sizeof(struct ocrdma_eth_vlan));
144 attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
145 } else {
146 grh = (struct ocrdma_grh *)((u8 *)ah->av +
147 sizeof(struct ocrdma_eth_basic));
148 attr->sl = 0;
149 }
150 memcpy(&attr->grh.dgid.raw[0], &grh->dgid[0], sizeof(grh->dgid));
151 attr->grh.sgid_index = ah->sgid_index;
152 attr->grh.hop_limit = be32_to_cpu(grh->pdid_hoplimit) & 0xff;
153 attr->grh.traffic_class = be32_to_cpu(grh->tclass_flow) >> 24;
154 attr->grh.flow_label = be32_to_cpu(grh->tclass_flow) & 0x00ffffffff;
155 return 0;
156}
157
158int ocrdma_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
159{
160 /* modify_ah is unsupported */
161 return -ENOSYS;
162}
163
164int ocrdma_process_mad(struct ib_device *ibdev,
165 int process_mad_flags,
166 u8 port_num,
167 struct ib_wc *in_wc,
168 struct ib_grh *in_grh,
169 struct ib_mad *in_mad, struct ib_mad *out_mad)
170{
171 return IB_MAD_RESULT_SUCCESS;
172}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
deleted file mode 100644
index 8ac49e7f96d..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h
+++ /dev/null
@@ -1,42 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_AH_H__
29#define __OCRDMA_AH_H__
30
31struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
32int ocrdma_destroy_ah(struct ib_ah *);
33int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
34int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *);
35
36int ocrdma_process_mad(struct ib_device *,
37 int process_mad_flags,
38 u8 port_num,
39 struct ib_wc *in_wc,
40 struct ib_grh *in_grh,
41 struct ib_mad *in_mad, struct ib_mad *out_mad);
42#endif /* __OCRDMA_AH_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
deleted file mode 100644
index 71942af4fce..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ /dev/null
@@ -1,2631 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) CNA Adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#include <linux/sched.h>
29#include <linux/interrupt.h>
30#include <linux/log2.h>
31#include <linux/dma-mapping.h>
32
33#include <rdma/ib_verbs.h>
34#include <rdma/ib_user_verbs.h>
35#include <rdma/ib_addr.h>
36
37#include "ocrdma.h"
38#include "ocrdma_hw.h"
39#include "ocrdma_verbs.h"
40#include "ocrdma_ah.h"
41
42enum mbx_status {
43 OCRDMA_MBX_STATUS_FAILED = 1,
44 OCRDMA_MBX_STATUS_ILLEGAL_FIELD = 3,
45 OCRDMA_MBX_STATUS_OOR = 100,
46 OCRDMA_MBX_STATUS_INVALID_PD = 101,
47 OCRDMA_MBX_STATUS_PD_INUSE = 102,
48 OCRDMA_MBX_STATUS_INVALID_CQ = 103,
49 OCRDMA_MBX_STATUS_INVALID_QP = 104,
50 OCRDMA_MBX_STATUS_INVALID_LKEY = 105,
51 OCRDMA_MBX_STATUS_ORD_EXCEEDS = 106,
52 OCRDMA_MBX_STATUS_IRD_EXCEEDS = 107,
53 OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS = 108,
54 OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS = 109,
55 OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS = 110,
56 OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS = 111,
57 OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS = 112,
58 OCRDMA_MBX_STATUS_INVALID_STATE_CHANGE = 113,
59 OCRDMA_MBX_STATUS_MW_BOUND = 114,
60 OCRDMA_MBX_STATUS_INVALID_VA = 115,
61 OCRDMA_MBX_STATUS_INVALID_LENGTH = 116,
62 OCRDMA_MBX_STATUS_INVALID_FBO = 117,
63 OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS = 118,
64 OCRDMA_MBX_STATUS_INVALID_PBE_SIZE = 119,
65 OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY = 120,
66 OCRDMA_MBX_STATUS_INVALID_PBL_SHIFT = 121,
67 OCRDMA_MBX_STATUS_INVALID_SRQ_ID = 129,
68 OCRDMA_MBX_STATUS_SRQ_ERROR = 133,
69 OCRDMA_MBX_STATUS_RQE_EXCEEDS = 134,
70 OCRDMA_MBX_STATUS_MTU_EXCEEDS = 135,
71 OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS = 136,
72 OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS = 137,
73 OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS = 138,
74 OCRDMA_MBX_STATUS_QP_BOUND = 130,
75 OCRDMA_MBX_STATUS_INVALID_CHANGE = 139,
76 OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP = 140,
77 OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER = 141,
78 OCRDMA_MBX_STATUS_MW_STILL_BOUND = 142,
79 OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID = 143,
80 OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS = 144
81};
82
83enum additional_status {
84 OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES = 22
85};
86
87enum cqe_status {
88 OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES = 1,
89 OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER = 2,
90 OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES = 3,
91 OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING = 4,
92 OCRDMA_MBX_CQE_STATUS_DMA_FAILED = 5
93};
94
95static inline void *ocrdma_get_eqe(struct ocrdma_eq *eq)
96{
97 return (u8 *)eq->q.va + (eq->q.tail * sizeof(struct ocrdma_eqe));
98}
99
100static inline void ocrdma_eq_inc_tail(struct ocrdma_eq *eq)
101{
102 eq->q.tail = (eq->q.tail + 1) & (OCRDMA_EQ_LEN - 1);
103}
104
105static inline void *ocrdma_get_mcqe(struct ocrdma_dev *dev)
106{
107 struct ocrdma_mcqe *cqe = (struct ocrdma_mcqe *)
108 ((u8 *) dev->mq.cq.va +
109 (dev->mq.cq.tail * sizeof(struct ocrdma_mcqe)));
110
111 if (!(le32_to_cpu(cqe->valid_ae_cmpl_cons) & OCRDMA_MCQE_VALID_MASK))
112 return NULL;
113 return cqe;
114}
115
116static inline void ocrdma_mcq_inc_tail(struct ocrdma_dev *dev)
117{
118 dev->mq.cq.tail = (dev->mq.cq.tail + 1) & (OCRDMA_MQ_CQ_LEN - 1);
119}
120
121static inline struct ocrdma_mqe *ocrdma_get_mqe(struct ocrdma_dev *dev)
122{
123 return (struct ocrdma_mqe *)((u8 *) dev->mq.sq.va +
124 (dev->mq.sq.head *
125 sizeof(struct ocrdma_mqe)));
126}
127
128static inline void ocrdma_mq_inc_head(struct ocrdma_dev *dev)
129{
130 dev->mq.sq.head = (dev->mq.sq.head + 1) & (OCRDMA_MQ_LEN - 1);
131 atomic_inc(&dev->mq.sq.used);
132}
133
134static inline void *ocrdma_get_mqe_rsp(struct ocrdma_dev *dev)
135{
136 return (void *)((u8 *) dev->mq.sq.va +
137 (dev->mqe_ctx.tag * sizeof(struct ocrdma_mqe)));
138}
139
140enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps)
141{
142 switch (qps) {
143 case OCRDMA_QPS_RST:
144 return IB_QPS_RESET;
145 case OCRDMA_QPS_INIT:
146 return IB_QPS_INIT;
147 case OCRDMA_QPS_RTR:
148 return IB_QPS_RTR;
149 case OCRDMA_QPS_RTS:
150 return IB_QPS_RTS;
151 case OCRDMA_QPS_SQD:
152 case OCRDMA_QPS_SQ_DRAINING:
153 return IB_QPS_SQD;
154 case OCRDMA_QPS_SQE:
155 return IB_QPS_SQE;
156 case OCRDMA_QPS_ERR:
157 return IB_QPS_ERR;
158 };
159 return IB_QPS_ERR;
160}
161
162static enum ocrdma_qp_state get_ocrdma_qp_state(enum ib_qp_state qps)
163{
164 switch (qps) {
165 case IB_QPS_RESET:
166 return OCRDMA_QPS_RST;
167 case IB_QPS_INIT:
168 return OCRDMA_QPS_INIT;
169 case IB_QPS_RTR:
170 return OCRDMA_QPS_RTR;
171 case IB_QPS_RTS:
172 return OCRDMA_QPS_RTS;
173 case IB_QPS_SQD:
174 return OCRDMA_QPS_SQD;
175 case IB_QPS_SQE:
176 return OCRDMA_QPS_SQE;
177 case IB_QPS_ERR:
178 return OCRDMA_QPS_ERR;
179 };
180 return OCRDMA_QPS_ERR;
181}
182
183static int ocrdma_get_mbx_errno(u32 status)
184{
185 int err_num = -EFAULT;
186 u8 mbox_status = (status & OCRDMA_MBX_RSP_STATUS_MASK) >>
187 OCRDMA_MBX_RSP_STATUS_SHIFT;
188 u8 add_status = (status & OCRDMA_MBX_RSP_ASTATUS_MASK) >>
189 OCRDMA_MBX_RSP_ASTATUS_SHIFT;
190
191 switch (mbox_status) {
192 case OCRDMA_MBX_STATUS_OOR:
193 case OCRDMA_MBX_STATUS_MAX_QP_EXCEEDS:
194 err_num = -EAGAIN;
195 break;
196
197 case OCRDMA_MBX_STATUS_INVALID_PD:
198 case OCRDMA_MBX_STATUS_INVALID_CQ:
199 case OCRDMA_MBX_STATUS_INVALID_SRQ_ID:
200 case OCRDMA_MBX_STATUS_INVALID_QP:
201 case OCRDMA_MBX_STATUS_INVALID_CHANGE:
202 case OCRDMA_MBX_STATUS_MTU_EXCEEDS:
203 case OCRDMA_MBX_STATUS_INVALID_RNR_NAK_TIMER:
204 case OCRDMA_MBX_STATUS_PKEY_INDEX_INVALID:
205 case OCRDMA_MBX_STATUS_PKEY_INDEX_EXCEEDS:
206 case OCRDMA_MBX_STATUS_ILLEGAL_FIELD:
207 case OCRDMA_MBX_STATUS_INVALID_PBL_ENTRY:
208 case OCRDMA_MBX_STATUS_INVALID_LKEY:
209 case OCRDMA_MBX_STATUS_INVALID_VA:
210 case OCRDMA_MBX_STATUS_INVALID_LENGTH:
211 case OCRDMA_MBX_STATUS_INVALID_FBO:
212 case OCRDMA_MBX_STATUS_INVALID_ACC_RIGHTS:
213 case OCRDMA_MBX_STATUS_INVALID_PBE_SIZE:
214 case OCRDMA_MBX_STATUS_ATOMIC_OPS_UNSUP:
215 case OCRDMA_MBX_STATUS_SRQ_ERROR:
216 case OCRDMA_MBX_STATUS_SRQ_SIZE_UNDERUNS:
217 err_num = -EINVAL;
218 break;
219
220 case OCRDMA_MBX_STATUS_PD_INUSE:
221 case OCRDMA_MBX_STATUS_QP_BOUND:
222 case OCRDMA_MBX_STATUS_MW_STILL_BOUND:
223 case OCRDMA_MBX_STATUS_MW_BOUND:
224 err_num = -EBUSY;
225 break;
226
227 case OCRDMA_MBX_STATUS_RECVQ_RQE_EXCEEDS:
228 case OCRDMA_MBX_STATUS_SGE_RECV_EXCEEDS:
229 case OCRDMA_MBX_STATUS_RQE_EXCEEDS:
230 case OCRDMA_MBX_STATUS_SRQ_LIMIT_EXCEEDS:
231 case OCRDMA_MBX_STATUS_ORD_EXCEEDS:
232 case OCRDMA_MBX_STATUS_IRD_EXCEEDS:
233 case OCRDMA_MBX_STATUS_SENDQ_WQE_EXCEEDS:
234 case OCRDMA_MBX_STATUS_SGE_SEND_EXCEEDS:
235 case OCRDMA_MBX_STATUS_SGE_WRITE_EXCEEDS:
236 err_num = -ENOBUFS;
237 break;
238
239 case OCRDMA_MBX_STATUS_FAILED:
240 switch (add_status) {
241 case OCRDMA_MBX_ADDI_STATUS_INSUFFICIENT_RESOURCES:
242 err_num = -EAGAIN;
243 break;
244 }
245 default:
246 err_num = -EFAULT;
247 }
248 return err_num;
249}
250
251static int ocrdma_get_mbx_cqe_errno(u16 cqe_status)
252{
253 int err_num = -EINVAL;
254
255 switch (cqe_status) {
256 case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_PRIVILEDGES:
257 err_num = -EPERM;
258 break;
259 case OCRDMA_MBX_CQE_STATUS_INVALID_PARAMETER:
260 err_num = -EINVAL;
261 break;
262 case OCRDMA_MBX_CQE_STATUS_INSUFFICIENT_RESOURCES:
263 case OCRDMA_MBX_CQE_STATUS_QUEUE_FLUSHING:
264 err_num = -EAGAIN;
265 break;
266 case OCRDMA_MBX_CQE_STATUS_DMA_FAILED:
267 err_num = -EIO;
268 break;
269 }
270 return err_num;
271}
272
273void ocrdma_ring_cq_db(struct ocrdma_dev *dev, u16 cq_id, bool armed,
274 bool solicited, u16 cqe_popped)
275{
276 u32 val = cq_id & OCRDMA_DB_CQ_RING_ID_MASK;
277
278 val |= ((cq_id & OCRDMA_DB_CQ_RING_ID_EXT_MASK) <<
279 OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT);
280
281 if (armed)
282 val |= (1 << OCRDMA_DB_CQ_REARM_SHIFT);
283 if (solicited)
284 val |= (1 << OCRDMA_DB_CQ_SOLICIT_SHIFT);
285 val |= (cqe_popped << OCRDMA_DB_CQ_NUM_POPPED_SHIFT);
286 iowrite32(val, dev->nic_info.db + OCRDMA_DB_CQ_OFFSET);
287}
288
289static void ocrdma_ring_mq_db(struct ocrdma_dev *dev)
290{
291 u32 val = 0;
292
293 val |= dev->mq.sq.id & OCRDMA_MQ_ID_MASK;
294 val |= 1 << OCRDMA_MQ_NUM_MQE_SHIFT;
295 iowrite32(val, dev->nic_info.db + OCRDMA_DB_MQ_OFFSET);
296}
297
298static void ocrdma_ring_eq_db(struct ocrdma_dev *dev, u16 eq_id,
299 bool arm, bool clear_int, u16 num_eqe)
300{
301 u32 val = 0;
302
303 val |= eq_id & OCRDMA_EQ_ID_MASK;
304 val |= ((eq_id & OCRDMA_EQ_ID_EXT_MASK) << OCRDMA_EQ_ID_EXT_MASK_SHIFT);
305 if (arm)
306 val |= (1 << OCRDMA_REARM_SHIFT);
307 if (clear_int)
308 val |= (1 << OCRDMA_EQ_CLR_SHIFT);
309 val |= (1 << OCRDMA_EQ_TYPE_SHIFT);
310 val |= (num_eqe << OCRDMA_NUM_EQE_SHIFT);
311 iowrite32(val, dev->nic_info.db + OCRDMA_DB_EQ_OFFSET);
312}
313
314static void ocrdma_init_mch(struct ocrdma_mbx_hdr *cmd_hdr,
315 u8 opcode, u8 subsys, u32 cmd_len)
316{
317 cmd_hdr->subsys_op = (opcode | (subsys << OCRDMA_MCH_SUBSYS_SHIFT));
318 cmd_hdr->timeout = 20; /* seconds */
319 cmd_hdr->cmd_len = cmd_len - sizeof(struct ocrdma_mbx_hdr);
320}
321
322static void *ocrdma_init_emb_mqe(u8 opcode, u32 cmd_len)
323{
324 struct ocrdma_mqe *mqe;
325
326 mqe = kzalloc(sizeof(struct ocrdma_mqe), GFP_KERNEL);
327 if (!mqe)
328 return NULL;
329 mqe->hdr.spcl_sge_cnt_emb |=
330 (OCRDMA_MQE_EMBEDDED << OCRDMA_MQE_HDR_EMB_SHIFT) &
331 OCRDMA_MQE_HDR_EMB_MASK;
332 mqe->hdr.pyld_len = cmd_len - sizeof(struct ocrdma_mqe_hdr);
333
334 ocrdma_init_mch(&mqe->u.emb_req.mch, opcode, OCRDMA_SUBSYS_ROCE,
335 mqe->hdr.pyld_len);
336 return mqe;
337}
338
339static void ocrdma_free_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q)
340{
341 dma_free_coherent(&dev->nic_info.pdev->dev, q->size, q->va, q->dma);
342}
343
344static int ocrdma_alloc_q(struct ocrdma_dev *dev,
345 struct ocrdma_queue_info *q, u16 len, u16 entry_size)
346{
347 memset(q, 0, sizeof(*q));
348 q->len = len;
349 q->entry_size = entry_size;
350 q->size = len * entry_size;
351 q->va = dma_alloc_coherent(&dev->nic_info.pdev->dev, q->size,
352 &q->dma, GFP_KERNEL);
353 if (!q->va)
354 return -ENOMEM;
355 memset(q->va, 0, q->size);
356 return 0;
357}
358
359static void ocrdma_build_q_pages(struct ocrdma_pa *q_pa, int cnt,
360 dma_addr_t host_pa, int hw_page_size)
361{
362 int i;
363
364 for (i = 0; i < cnt; i++) {
365 q_pa[i].lo = (u32) (host_pa & 0xffffffff);
366 q_pa[i].hi = (u32) upper_32_bits(host_pa);
367 host_pa += hw_page_size;
368 }
369}
370
371static void ocrdma_assign_eq_vect_gen2(struct ocrdma_dev *dev,
372 struct ocrdma_eq *eq)
373{
374 /* assign vector and update vector id for next EQ */
375 eq->vector = dev->nic_info.msix.start_vector;
376 dev->nic_info.msix.start_vector += 1;
377}
378
379static void ocrdma_free_eq_vect_gen2(struct ocrdma_dev *dev)
380{
381 /* this assumes that EQs are freed in exactly reverse order
382 * as its allocation.
383 */
384 dev->nic_info.msix.start_vector -= 1;
385}
386
387static int ocrdma_mbx_delete_q(struct ocrdma_dev *dev, struct ocrdma_queue_info *q,
388 int queue_type)
389{
390 u8 opcode = 0;
391 int status;
392 struct ocrdma_delete_q_req *cmd = dev->mbx_cmd;
393
394 switch (queue_type) {
395 case QTYPE_MCCQ:
396 opcode = OCRDMA_CMD_DELETE_MQ;
397 break;
398 case QTYPE_CQ:
399 opcode = OCRDMA_CMD_DELETE_CQ;
400 break;
401 case QTYPE_EQ:
402 opcode = OCRDMA_CMD_DELETE_EQ;
403 break;
404 default:
405 BUG();
406 }
407 memset(cmd, 0, sizeof(*cmd));
408 ocrdma_init_mch(&cmd->req, opcode, OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
409 cmd->id = q->id;
410
411 status = be_roce_mcc_cmd(dev->nic_info.netdev,
412 cmd, sizeof(*cmd), NULL, NULL);
413 if (!status)
414 q->created = false;
415 return status;
416}
417
418static int ocrdma_mbx_create_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
419{
420 int status;
421 struct ocrdma_create_eq_req *cmd = dev->mbx_cmd;
422 struct ocrdma_create_eq_rsp *rsp = dev->mbx_cmd;
423
424 memset(cmd, 0, sizeof(*cmd));
425 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_EQ, OCRDMA_SUBSYS_COMMON,
426 sizeof(*cmd));
427 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
428 cmd->req.rsvd_version = 0;
429 else
430 cmd->req.rsvd_version = 2;
431
432 cmd->num_pages = 4;
433 cmd->valid = OCRDMA_CREATE_EQ_VALID;
434 cmd->cnt = 4 << OCRDMA_CREATE_EQ_CNT_SHIFT;
435
436 ocrdma_build_q_pages(&cmd->pa[0], cmd->num_pages, eq->q.dma,
437 PAGE_SIZE_4K);
438 status = be_roce_mcc_cmd(dev->nic_info.netdev, cmd, sizeof(*cmd), NULL,
439 NULL);
440 if (!status) {
441 eq->q.id = rsp->vector_eqid & 0xffff;
442 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
443 ocrdma_assign_eq_vect_gen2(dev, eq);
444 else {
445 eq->vector = (rsp->vector_eqid >> 16) & 0xffff;
446 dev->nic_info.msix.start_vector += 1;
447 }
448 eq->q.created = true;
449 }
450 return status;
451}
452
453static int ocrdma_create_eq(struct ocrdma_dev *dev,
454 struct ocrdma_eq *eq, u16 q_len)
455{
456 int status;
457
458 status = ocrdma_alloc_q(dev, &eq->q, OCRDMA_EQ_LEN,
459 sizeof(struct ocrdma_eqe));
460 if (status)
461 return status;
462
463 status = ocrdma_mbx_create_eq(dev, eq);
464 if (status)
465 goto mbx_err;
466 eq->dev = dev;
467 ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
468
469 return 0;
470mbx_err:
471 ocrdma_free_q(dev, &eq->q);
472 return status;
473}
474
475static int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
476{
477 int irq;
478
479 if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
480 irq = dev->nic_info.pdev->irq;
481 else
482 irq = dev->nic_info.msix.vector_list[eq->vector];
483 return irq;
484}
485
486static void _ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
487{
488 if (eq->q.created) {
489 ocrdma_mbx_delete_q(dev, &eq->q, QTYPE_EQ);
490 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY)
491 ocrdma_free_eq_vect_gen2(dev);
492 ocrdma_free_q(dev, &eq->q);
493 }
494}
495
496static void ocrdma_destroy_eq(struct ocrdma_dev *dev, struct ocrdma_eq *eq)
497{
498 int irq;
499
500 /* disarm EQ so that interrupts are not generated
501 * during freeing and EQ delete is in progress.
502 */
503 ocrdma_ring_eq_db(dev, eq->q.id, false, false, 0);
504
505 irq = ocrdma_get_irq(dev, eq);
506 free_irq(irq, eq);
507 _ocrdma_destroy_eq(dev, eq);
508}
509
510static void ocrdma_destroy_qp_eqs(struct ocrdma_dev *dev)
511{
512 int i;
513
514 /* deallocate the data path eqs */
515 for (i = 0; i < dev->eq_cnt; i++)
516 ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
517}
518
519static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
520 struct ocrdma_queue_info *cq,
521 struct ocrdma_queue_info *eq)
522{
523 struct ocrdma_create_cq_cmd *cmd = dev->mbx_cmd;
524 struct ocrdma_create_cq_cmd_rsp *rsp = dev->mbx_cmd;
525 int status;
526
527 memset(cmd, 0, sizeof(*cmd));
528 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_CQ,
529 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
530
531 cmd->pgsz_pgcnt = PAGES_4K_SPANNED(cq->va, cq->size);
532 cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
533 cmd->eqn = (eq->id << OCRDMA_CREATE_CQ_EQID_SHIFT);
534
535 ocrdma_build_q_pages(&cmd->pa[0], cmd->pgsz_pgcnt,
536 cq->dma, PAGE_SIZE_4K);
537 status = be_roce_mcc_cmd(dev->nic_info.netdev,
538 cmd, sizeof(*cmd), NULL, NULL);
539 if (!status) {
540 cq->id = (rsp->cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
541 cq->created = true;
542 }
543 return status;
544}
545
546static u32 ocrdma_encoded_q_len(int q_len)
547{
548 u32 len_encoded = fls(q_len); /* log2(len) + 1 */
549
550 if (len_encoded == 16)
551 len_encoded = 0;
552 return len_encoded;
553}
554
555static int ocrdma_mbx_create_mq(struct ocrdma_dev *dev,
556 struct ocrdma_queue_info *mq,
557 struct ocrdma_queue_info *cq)
558{
559 int num_pages, status;
560 struct ocrdma_create_mq_req *cmd = dev->mbx_cmd;
561 struct ocrdma_create_mq_rsp *rsp = dev->mbx_cmd;
562 struct ocrdma_pa *pa;
563
564 memset(cmd, 0, sizeof(*cmd));
565 num_pages = PAGES_4K_SPANNED(mq->va, mq->size);
566
567 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
568 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ,
569 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
570 cmd->v0.pages = num_pages;
571 cmd->v0.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
572 cmd->v0.async_cqid_valid = (cq->id << 1);
573 cmd->v0.cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
574 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
575 cmd->v0.cqid_ringsize |=
576 (cq->id << OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT);
577 cmd->v0.valid = OCRDMA_CREATE_MQ_VALID;
578 pa = &cmd->v0.pa[0];
579 } else {
580 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_CREATE_MQ_EXT,
581 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
582 cmd->req.rsvd_version = 1;
583 cmd->v1.cqid_pages = num_pages;
584 cmd->v1.cqid_pages |= (cq->id << OCRDMA_CREATE_MQ_CQ_ID_SHIFT);
585 cmd->v1.async_cqid_valid = OCRDMA_CREATE_MQ_ASYNC_CQ_VALID;
586 cmd->v1.async_event_bitmap = Bit(20);
587 cmd->v1.async_cqid_ringsize = cq->id;
588 cmd->v1.async_cqid_ringsize |= (ocrdma_encoded_q_len(mq->len) <<
589 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT);
590 cmd->v1.valid = OCRDMA_CREATE_MQ_VALID;
591 pa = &cmd->v1.pa[0];
592 }
593 ocrdma_build_q_pages(pa, num_pages, mq->dma, PAGE_SIZE_4K);
594 status = be_roce_mcc_cmd(dev->nic_info.netdev,
595 cmd, sizeof(*cmd), NULL, NULL);
596 if (!status) {
597 mq->id = rsp->id;
598 mq->created = true;
599 }
600 return status;
601}
602
603static int ocrdma_create_mq(struct ocrdma_dev *dev)
604{
605 int status;
606
607 /* Alloc completion queue for Mailbox queue */
608 status = ocrdma_alloc_q(dev, &dev->mq.cq, OCRDMA_MQ_CQ_LEN,
609 sizeof(struct ocrdma_mcqe));
610 if (status)
611 goto alloc_err;
612
613 status = ocrdma_mbx_mq_cq_create(dev, &dev->mq.cq, &dev->meq.q);
614 if (status)
615 goto mbx_cq_free;
616
617 memset(&dev->mqe_ctx, 0, sizeof(dev->mqe_ctx));
618 init_waitqueue_head(&dev->mqe_ctx.cmd_wait);
619 mutex_init(&dev->mqe_ctx.lock);
620
621 /* Alloc Mailbox queue */
622 status = ocrdma_alloc_q(dev, &dev->mq.sq, OCRDMA_MQ_LEN,
623 sizeof(struct ocrdma_mqe));
624 if (status)
625 goto mbx_cq_destroy;
626 status = ocrdma_mbx_create_mq(dev, &dev->mq.sq, &dev->mq.cq);
627 if (status)
628 goto mbx_q_free;
629 ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, 0);
630 return 0;
631
632mbx_q_free:
633 ocrdma_free_q(dev, &dev->mq.sq);
634mbx_cq_destroy:
635 ocrdma_mbx_delete_q(dev, &dev->mq.cq, QTYPE_CQ);
636mbx_cq_free:
637 ocrdma_free_q(dev, &dev->mq.cq);
638alloc_err:
639 return status;
640}
641
642static void ocrdma_destroy_mq(struct ocrdma_dev *dev)
643{
644 struct ocrdma_queue_info *mbxq, *cq;
645
646 /* mqe_ctx lock synchronizes with any other pending cmds. */
647 mutex_lock(&dev->mqe_ctx.lock);
648 mbxq = &dev->mq.sq;
649 if (mbxq->created) {
650 ocrdma_mbx_delete_q(dev, mbxq, QTYPE_MCCQ);
651 ocrdma_free_q(dev, mbxq);
652 }
653 mutex_unlock(&dev->mqe_ctx.lock);
654
655 cq = &dev->mq.cq;
656 if (cq->created) {
657 ocrdma_mbx_delete_q(dev, cq, QTYPE_CQ);
658 ocrdma_free_q(dev, cq);
659 }
660}
661
662static void ocrdma_process_qpcat_error(struct ocrdma_dev *dev,
663 struct ocrdma_qp *qp)
664{
665 enum ib_qp_state new_ib_qps = IB_QPS_ERR;
666 enum ib_qp_state old_ib_qps;
667
668 if (qp == NULL)
669 BUG();
670 ocrdma_qp_state_machine(qp, new_ib_qps, &old_ib_qps);
671}
672
673static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
674 struct ocrdma_ae_mcqe *cqe)
675{
676 struct ocrdma_qp *qp = NULL;
677 struct ocrdma_cq *cq = NULL;
678 struct ib_event ib_evt;
679 int cq_event = 0;
680 int qp_event = 1;
681 int srq_event = 0;
682 int dev_event = 0;
683 int type = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_TYPE_MASK) >>
684 OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT;
685
686 if (cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPVALID)
687 qp = dev->qp_tbl[cqe->qpvalid_qpid & OCRDMA_AE_MCQE_QPID_MASK];
688 if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
689 cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
690
691 ib_evt.device = &dev->ibdev;
692
693 switch (type) {
694 case OCRDMA_CQ_ERROR:
695 ib_evt.element.cq = &cq->ibcq;
696 ib_evt.event = IB_EVENT_CQ_ERR;
697 cq_event = 1;
698 qp_event = 0;
699 break;
700 case OCRDMA_CQ_OVERRUN_ERROR:
701 ib_evt.element.cq = &cq->ibcq;
702 ib_evt.event = IB_EVENT_CQ_ERR;
703 break;
704 case OCRDMA_CQ_QPCAT_ERROR:
705 ib_evt.element.qp = &qp->ibqp;
706 ib_evt.event = IB_EVENT_QP_FATAL;
707 ocrdma_process_qpcat_error(dev, qp);
708 break;
709 case OCRDMA_QP_ACCESS_ERROR:
710 ib_evt.element.qp = &qp->ibqp;
711 ib_evt.event = IB_EVENT_QP_ACCESS_ERR;
712 break;
713 case OCRDMA_QP_COMM_EST_EVENT:
714 ib_evt.element.qp = &qp->ibqp;
715 ib_evt.event = IB_EVENT_COMM_EST;
716 break;
717 case OCRDMA_SQ_DRAINED_EVENT:
718 ib_evt.element.qp = &qp->ibqp;
719 ib_evt.event = IB_EVENT_SQ_DRAINED;
720 break;
721 case OCRDMA_DEVICE_FATAL_EVENT:
722 ib_evt.element.port_num = 1;
723 ib_evt.event = IB_EVENT_DEVICE_FATAL;
724 qp_event = 0;
725 dev_event = 1;
726 break;
727 case OCRDMA_SRQCAT_ERROR:
728 ib_evt.element.srq = &qp->srq->ibsrq;
729 ib_evt.event = IB_EVENT_SRQ_ERR;
730 srq_event = 1;
731 qp_event = 0;
732 break;
733 case OCRDMA_SRQ_LIMIT_EVENT:
734 ib_evt.element.srq = &qp->srq->ibsrq;
735 ib_evt.event = IB_EVENT_SRQ_LIMIT_REACHED;
736 srq_event = 1;
737 qp_event = 0;
738 break;
739 case OCRDMA_QP_LAST_WQE_EVENT:
740 ib_evt.element.qp = &qp->ibqp;
741 ib_evt.event = IB_EVENT_QP_LAST_WQE_REACHED;
742 break;
743 default:
744 cq_event = 0;
745 qp_event = 0;
746 srq_event = 0;
747 dev_event = 0;
748 ocrdma_err("%s() unknown type=0x%x\n", __func__, type);
749 break;
750 }
751
752 if (qp_event) {
753 if (qp->ibqp.event_handler)
754 qp->ibqp.event_handler(&ib_evt, qp->ibqp.qp_context);
755 } else if (cq_event) {
756 if (cq->ibcq.event_handler)
757 cq->ibcq.event_handler(&ib_evt, cq->ibcq.cq_context);
758 } else if (srq_event) {
759 if (qp->srq->ibsrq.event_handler)
760 qp->srq->ibsrq.event_handler(&ib_evt,
761 qp->srq->ibsrq.
762 srq_context);
763 } else if (dev_event)
764 ib_dispatch_event(&ib_evt);
765
766}
767
768static void ocrdma_process_acqe(struct ocrdma_dev *dev, void *ae_cqe)
769{
770 /* async CQE processing */
771 struct ocrdma_ae_mcqe *cqe = ae_cqe;
772 u32 evt_code = (cqe->valid_ae_event & OCRDMA_AE_MCQE_EVENT_CODE_MASK) >>
773 OCRDMA_AE_MCQE_EVENT_CODE_SHIFT;
774
775 if (evt_code == OCRDMA_ASYNC_EVE_CODE)
776 ocrdma_dispatch_ibevent(dev, cqe);
777 else
778 ocrdma_err("%s(%d) invalid evt code=0x%x\n",
779 __func__, dev->id, evt_code);
780}
781
782static void ocrdma_process_mcqe(struct ocrdma_dev *dev, struct ocrdma_mcqe *cqe)
783{
784 if (dev->mqe_ctx.tag == cqe->tag_lo && dev->mqe_ctx.cmd_done == false) {
785 dev->mqe_ctx.cqe_status = (cqe->status &
786 OCRDMA_MCQE_STATUS_MASK) >> OCRDMA_MCQE_STATUS_SHIFT;
787 dev->mqe_ctx.ext_status =
788 (cqe->status & OCRDMA_MCQE_ESTATUS_MASK)
789 >> OCRDMA_MCQE_ESTATUS_SHIFT;
790 dev->mqe_ctx.cmd_done = true;
791 wake_up(&dev->mqe_ctx.cmd_wait);
792 } else
793 ocrdma_err("%s() cqe for invalid tag0x%x.expected=0x%x\n",
794 __func__, cqe->tag_lo, dev->mqe_ctx.tag);
795}
796
797static int ocrdma_mq_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
798{
799 u16 cqe_popped = 0;
800 struct ocrdma_mcqe *cqe;
801
802 while (1) {
803 cqe = ocrdma_get_mcqe(dev);
804 if (cqe == NULL)
805 break;
806 ocrdma_le32_to_cpu(cqe, sizeof(*cqe));
807 cqe_popped += 1;
808 if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_AE_MASK)
809 ocrdma_process_acqe(dev, cqe);
810 else if (cqe->valid_ae_cmpl_cons & OCRDMA_MCQE_CMPL_MASK)
811 ocrdma_process_mcqe(dev, cqe);
812 else
813 ocrdma_err("%s() cqe->compl is not set.\n", __func__);
814 memset(cqe, 0, sizeof(struct ocrdma_mcqe));
815 ocrdma_mcq_inc_tail(dev);
816 }
817 ocrdma_ring_cq_db(dev, dev->mq.cq.id, true, false, cqe_popped);
818 return 0;
819}
820
821static void ocrdma_qp_buddy_cq_handler(struct ocrdma_dev *dev,
822 struct ocrdma_cq *cq)
823{
824 unsigned long flags;
825 struct ocrdma_qp *qp;
826 bool buddy_cq_found = false;
827 /* Go through list of QPs in error state which are using this CQ
828 * and invoke its callback handler to trigger CQE processing for
829 * error/flushed CQE. It is rare to find more than few entries in
830 * this list as most consumers stops after getting error CQE.
831 * List is traversed only once when a matching buddy cq found for a QP.
832 */
833 spin_lock_irqsave(&dev->flush_q_lock, flags);
834 list_for_each_entry(qp, &cq->sq_head, sq_entry) {
835 if (qp->srq)
836 continue;
837 /* if wq and rq share the same cq, than comp_handler
838 * is already invoked.
839 */
840 if (qp->sq_cq == qp->rq_cq)
841 continue;
842 /* if completion came on sq, rq's cq is buddy cq.
843 * if completion came on rq, sq's cq is buddy cq.
844 */
845 if (qp->sq_cq == cq)
846 cq = qp->rq_cq;
847 else
848 cq = qp->sq_cq;
849 buddy_cq_found = true;
850 break;
851 }
852 spin_unlock_irqrestore(&dev->flush_q_lock, flags);
853 if (buddy_cq_found == false)
854 return;
855 if (cq->ibcq.comp_handler) {
856 spin_lock_irqsave(&cq->comp_handler_lock, flags);
857 (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
858 spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
859 }
860}
861
862static void ocrdma_qp_cq_handler(struct ocrdma_dev *dev, u16 cq_idx)
863{
864 unsigned long flags;
865 struct ocrdma_cq *cq;
866
867 if (cq_idx >= OCRDMA_MAX_CQ)
868 BUG();
869
870 cq = dev->cq_tbl[cq_idx];
871 if (cq == NULL) {
872 ocrdma_err("%s%d invalid id=0x%x\n", __func__, dev->id, cq_idx);
873 return;
874 }
875 spin_lock_irqsave(&cq->cq_lock, flags);
876 cq->armed = false;
877 cq->solicited = false;
878 spin_unlock_irqrestore(&cq->cq_lock, flags);
879
880 ocrdma_ring_cq_db(dev, cq->id, false, false, 0);
881
882 if (cq->ibcq.comp_handler) {
883 spin_lock_irqsave(&cq->comp_handler_lock, flags);
884 (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context);
885 spin_unlock_irqrestore(&cq->comp_handler_lock, flags);
886 }
887 ocrdma_qp_buddy_cq_handler(dev, cq);
888}
889
890static void ocrdma_cq_handler(struct ocrdma_dev *dev, u16 cq_id)
891{
892 /* process the MQ-CQE. */
893 if (cq_id == dev->mq.cq.id)
894 ocrdma_mq_cq_handler(dev, cq_id);
895 else
896 ocrdma_qp_cq_handler(dev, cq_id);
897}
898
899static irqreturn_t ocrdma_irq_handler(int irq, void *handle)
900{
901 struct ocrdma_eq *eq = handle;
902 struct ocrdma_dev *dev = eq->dev;
903 struct ocrdma_eqe eqe;
904 struct ocrdma_eqe *ptr;
905 u16 eqe_popped = 0;
906 u16 cq_id;
907 while (1) {
908 ptr = ocrdma_get_eqe(eq);
909 eqe = *ptr;
910 ocrdma_le32_to_cpu(&eqe, sizeof(eqe));
911 if ((eqe.id_valid & OCRDMA_EQE_VALID_MASK) == 0)
912 break;
913 eqe_popped += 1;
914 ptr->id_valid = 0;
915 /* check whether its CQE or not. */
916 if ((eqe.id_valid & OCRDMA_EQE_FOR_CQE_MASK) == 0) {
917 cq_id = eqe.id_valid >> OCRDMA_EQE_RESOURCE_ID_SHIFT;
918 ocrdma_cq_handler(dev, cq_id);
919 }
920 ocrdma_eq_inc_tail(eq);
921 }
922 ocrdma_ring_eq_db(dev, eq->q.id, true, true, eqe_popped);
923 /* Ring EQ doorbell with num_popped to 0 to enable interrupts again. */
924 if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
925 ocrdma_ring_eq_db(dev, eq->q.id, true, true, 0);
926 return IRQ_HANDLED;
927}
928
929static void ocrdma_post_mqe(struct ocrdma_dev *dev, struct ocrdma_mqe *cmd)
930{
931 struct ocrdma_mqe *mqe;
932
933 dev->mqe_ctx.tag = dev->mq.sq.head;
934 dev->mqe_ctx.cmd_done = false;
935 mqe = ocrdma_get_mqe(dev);
936 cmd->hdr.tag_lo = dev->mq.sq.head;
937 ocrdma_copy_cpu_to_le32(mqe, cmd, sizeof(*mqe));
938 /* make sure descriptor is written before ringing doorbell */
939 wmb();
940 ocrdma_mq_inc_head(dev);
941 ocrdma_ring_mq_db(dev);
942}
943
944static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
945{
946 long status;
947 /* 30 sec timeout */
948 status = wait_event_timeout(dev->mqe_ctx.cmd_wait,
949 (dev->mqe_ctx.cmd_done != false),
950 msecs_to_jiffies(30000));
951 if (status)
952 return 0;
953 else
954 return -1;
955}
956
957/* issue a mailbox command on the MQ */
958static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
959{
960 int status = 0;
961 u16 cqe_status, ext_status;
962 struct ocrdma_mqe *rsp;
963
964 mutex_lock(&dev->mqe_ctx.lock);
965 ocrdma_post_mqe(dev, mqe);
966 status = ocrdma_wait_mqe_cmpl(dev);
967 if (status)
968 goto mbx_err;
969 cqe_status = dev->mqe_ctx.cqe_status;
970 ext_status = dev->mqe_ctx.ext_status;
971 rsp = ocrdma_get_mqe_rsp(dev);
972 ocrdma_copy_le32_to_cpu(mqe, rsp, (sizeof(*mqe)));
973 if (cqe_status || ext_status) {
974 ocrdma_err
975 ("%s() opcode=0x%x, cqe_status=0x%x, ext_status=0x%x\n",
976 __func__,
977 (rsp->u.rsp.subsys_op & OCRDMA_MBX_RSP_OPCODE_MASK) >>
978 OCRDMA_MBX_RSP_OPCODE_SHIFT, cqe_status, ext_status);
979 status = ocrdma_get_mbx_cqe_errno(cqe_status);
980 goto mbx_err;
981 }
982 if (mqe->u.rsp.status & OCRDMA_MBX_RSP_STATUS_MASK)
983 status = ocrdma_get_mbx_errno(mqe->u.rsp.status);
984mbx_err:
985 mutex_unlock(&dev->mqe_ctx.lock);
986 return status;
987}
988
989static void ocrdma_get_attr(struct ocrdma_dev *dev,
990 struct ocrdma_dev_attr *attr,
991 struct ocrdma_mbx_query_config *rsp)
992{
993 attr->max_pd =
994 (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
995 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
996 attr->max_qp =
997 (rsp->qp_srq_cq_ird_ord & OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK) >>
998 OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT;
999 attr->max_send_sge = ((rsp->max_write_send_sge &
1000 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
1001 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT);
1002 attr->max_recv_sge = (rsp->max_write_send_sge &
1003 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >>
1004 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT;
1005 attr->max_srq_sge = (rsp->max_srq_rqe_sge &
1006 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >>
1007 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET;
1008 attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp &
1009 OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >>
1010 OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT;
1011 attr->max_ird_per_qp = (rsp->max_ird_ord_per_qp &
1012 OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK) >>
1013 OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT;
1014 attr->cq_overflow_detect = (rsp->qp_srq_cq_ird_ord &
1015 OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK) >>
1016 OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT;
1017 attr->srq_supported = (rsp->qp_srq_cq_ird_ord &
1018 OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK) >>
1019 OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT;
1020 attr->local_ca_ack_delay = (rsp->max_pd_ca_ack_delay &
1021 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK) >>
1022 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
1023 attr->max_mr = rsp->max_mr;
1024 attr->max_mr_size = ~0ull;
1025 attr->max_fmr = 0;
1026 attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
1027 attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
1028 attr->max_cqe = rsp->max_cq_cqes_per_cq &
1029 OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK;
1030 attr->wqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
1031 OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK) >>
1032 OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET) *
1033 OCRDMA_WQE_STRIDE;
1034 attr->rqe_size = ((rsp->wqe_rqe_stride_max_dpp_cqs &
1035 OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK) >>
1036 OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET) *
1037 OCRDMA_WQE_STRIDE;
1038 attr->max_inline_data =
1039 attr->wqe_size - (sizeof(struct ocrdma_hdr_wqe) +
1040 sizeof(struct ocrdma_sge));
1041 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1042 attr->ird = 1;
1043 attr->ird_page_size = OCRDMA_MIN_Q_PAGE_SIZE;
1044 attr->num_ird_pages = MAX_OCRDMA_IRD_PAGES;
1045 }
1046 dev->attr.max_wqe = rsp->max_wqes_rqes_per_q >>
1047 OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET;
1048 dev->attr.max_rqe = rsp->max_wqes_rqes_per_q &
1049 OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK;
1050}
1051
1052static int ocrdma_check_fw_config(struct ocrdma_dev *dev,
1053 struct ocrdma_fw_conf_rsp *conf)
1054{
1055 u32 fn_mode;
1056
1057 fn_mode = conf->fn_mode & OCRDMA_FN_MODE_RDMA;
1058 if (fn_mode != OCRDMA_FN_MODE_RDMA)
1059 return -EINVAL;
1060 dev->base_eqid = conf->base_eqid;
1061 dev->max_eq = conf->max_eq;
1062 dev->attr.max_cq = OCRDMA_MAX_CQ - 1;
1063 return 0;
1064}
1065
1066/* can be issued only during init time. */
1067static int ocrdma_mbx_query_fw_ver(struct ocrdma_dev *dev)
1068{
1069 int status = -ENOMEM;
1070 struct ocrdma_mqe *cmd;
1071 struct ocrdma_fw_ver_rsp *rsp;
1072
1073 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_VER, sizeof(*cmd));
1074 if (!cmd)
1075 return -ENOMEM;
1076 ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
1077 OCRDMA_CMD_GET_FW_VER,
1078 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
1079
1080 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1081 if (status)
1082 goto mbx_err;
1083 rsp = (struct ocrdma_fw_ver_rsp *)cmd;
1084 memset(&dev->attr.fw_ver[0], 0, sizeof(dev->attr.fw_ver));
1085 memcpy(&dev->attr.fw_ver[0], &rsp->running_ver[0],
1086 sizeof(rsp->running_ver));
1087 ocrdma_le32_to_cpu(dev->attr.fw_ver, sizeof(rsp->running_ver));
1088mbx_err:
1089 kfree(cmd);
1090 return status;
1091}
1092
1093/* can be issued only during init time. */
1094static int ocrdma_mbx_query_fw_config(struct ocrdma_dev *dev)
1095{
1096 int status = -ENOMEM;
1097 struct ocrdma_mqe *cmd;
1098 struct ocrdma_fw_conf_rsp *rsp;
1099
1100 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_GET_FW_CONFIG, sizeof(*cmd));
1101 if (!cmd)
1102 return -ENOMEM;
1103 ocrdma_init_mch((struct ocrdma_mbx_hdr *)&cmd->u.cmd[0],
1104 OCRDMA_CMD_GET_FW_CONFIG,
1105 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
1106 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1107 if (status)
1108 goto mbx_err;
1109 rsp = (struct ocrdma_fw_conf_rsp *)cmd;
1110 status = ocrdma_check_fw_config(dev, rsp);
1111mbx_err:
1112 kfree(cmd);
1113 return status;
1114}
1115
1116static int ocrdma_mbx_query_dev(struct ocrdma_dev *dev)
1117{
1118 int status = -ENOMEM;
1119 struct ocrdma_mbx_query_config *rsp;
1120 struct ocrdma_mqe *cmd;
1121
1122 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_CONFIG, sizeof(*cmd));
1123 if (!cmd)
1124 return status;
1125 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1126 if (status)
1127 goto mbx_err;
1128 rsp = (struct ocrdma_mbx_query_config *)cmd;
1129 ocrdma_get_attr(dev, &dev->attr, rsp);
1130mbx_err:
1131 kfree(cmd);
1132 return status;
1133}
1134
1135int ocrdma_mbx_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
1136{
1137 int status = -ENOMEM;
1138 struct ocrdma_alloc_pd *cmd;
1139 struct ocrdma_alloc_pd_rsp *rsp;
1140
1141 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_PD, sizeof(*cmd));
1142 if (!cmd)
1143 return status;
1144 if (pd->dpp_enabled)
1145 cmd->enable_dpp_rsvd |= OCRDMA_ALLOC_PD_ENABLE_DPP;
1146 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1147 if (status)
1148 goto mbx_err;
1149 rsp = (struct ocrdma_alloc_pd_rsp *)cmd;
1150 pd->id = rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_PDID_MASK;
1151 if (rsp->dpp_page_pdid & OCRDMA_ALLOC_PD_RSP_DPP) {
1152 pd->dpp_enabled = true;
1153 pd->dpp_page = rsp->dpp_page_pdid >>
1154 OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT;
1155 } else {
1156 pd->dpp_enabled = false;
1157 pd->num_dpp_qp = 0;
1158 }
1159mbx_err:
1160 kfree(cmd);
1161 return status;
1162}
1163
1164int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
1165{
1166 int status = -ENOMEM;
1167 struct ocrdma_dealloc_pd *cmd;
1168
1169 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_PD, sizeof(*cmd));
1170 if (!cmd)
1171 return status;
1172 cmd->id = pd->id;
1173 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1174 kfree(cmd);
1175 return status;
1176}
1177
1178static int ocrdma_build_q_conf(u32 *num_entries, int entry_size,
1179 int *num_pages, int *page_size)
1180{
1181 int i;
1182 int mem_size;
1183
1184 *num_entries = roundup_pow_of_two(*num_entries);
1185 mem_size = *num_entries * entry_size;
1186 /* find the possible lowest possible multiplier */
1187 for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
1188 if (mem_size <= (OCRDMA_Q_PAGE_BASE_SIZE << i))
1189 break;
1190 }
1191 if (i >= OCRDMA_MAX_Q_PAGE_SIZE_CNT)
1192 return -EINVAL;
1193 mem_size = roundup(mem_size,
1194 ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES));
1195 *num_pages =
1196 mem_size / ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
1197 *page_size = ((OCRDMA_Q_PAGE_BASE_SIZE << i) / OCRDMA_MAX_Q_PAGES);
1198 *num_entries = mem_size / entry_size;
1199 return 0;
1200}
1201
1202static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
1203{
1204 int i ;
1205 int status = 0;
1206 int max_ah;
1207 struct ocrdma_create_ah_tbl *cmd;
1208 struct ocrdma_create_ah_tbl_rsp *rsp;
1209 struct pci_dev *pdev = dev->nic_info.pdev;
1210 dma_addr_t pa;
1211 struct ocrdma_pbe *pbes;
1212
1213 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_AH_TBL, sizeof(*cmd));
1214 if (!cmd)
1215 return status;
1216
1217 max_ah = OCRDMA_MAX_AH;
1218 dev->av_tbl.size = sizeof(struct ocrdma_av) * max_ah;
1219
1220 /* number of PBEs in PBL */
1221 cmd->ah_conf = (OCRDMA_AH_TBL_PAGES <<
1222 OCRDMA_CREATE_AH_NUM_PAGES_SHIFT) &
1223 OCRDMA_CREATE_AH_NUM_PAGES_MASK;
1224
1225 /* page size */
1226 for (i = 0; i < OCRDMA_MAX_Q_PAGE_SIZE_CNT; i++) {
1227 if (PAGE_SIZE == (OCRDMA_MIN_Q_PAGE_SIZE << i))
1228 break;
1229 }
1230 cmd->ah_conf |= (i << OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT) &
1231 OCRDMA_CREATE_AH_PAGE_SIZE_MASK;
1232
1233 /* ah_entry size */
1234 cmd->ah_conf |= (sizeof(struct ocrdma_av) <<
1235 OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT) &
1236 OCRDMA_CREATE_AH_ENTRY_SIZE_MASK;
1237
1238 dev->av_tbl.pbl.va = dma_alloc_coherent(&pdev->dev, PAGE_SIZE,
1239 &dev->av_tbl.pbl.pa,
1240 GFP_KERNEL);
1241 if (dev->av_tbl.pbl.va == NULL)
1242 goto mem_err;
1243
1244 dev->av_tbl.va = dma_alloc_coherent(&pdev->dev, dev->av_tbl.size,
1245 &pa, GFP_KERNEL);
1246 if (dev->av_tbl.va == NULL)
1247 goto mem_err_ah;
1248 dev->av_tbl.pa = pa;
1249 dev->av_tbl.num_ah = max_ah;
1250 memset(dev->av_tbl.va, 0, dev->av_tbl.size);
1251
1252 pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
1253 for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
1254 pbes[i].pa_lo = (u32) (pa & 0xffffffff);
1255 pbes[i].pa_hi = (u32) upper_32_bits(pa);
1256 pa += PAGE_SIZE;
1257 }
1258 cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
1259 cmd->tbl_addr[0].hi = (u32)upper_32_bits(dev->av_tbl.pbl.pa);
1260 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1261 if (status)
1262 goto mbx_err;
1263 rsp = (struct ocrdma_create_ah_tbl_rsp *)cmd;
1264 dev->av_tbl.ahid = rsp->ahid & 0xFFFF;
1265 kfree(cmd);
1266 return 0;
1267
1268mbx_err:
1269 dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
1270 dev->av_tbl.pa);
1271 dev->av_tbl.va = NULL;
1272mem_err_ah:
1273 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
1274 dev->av_tbl.pbl.pa);
1275 dev->av_tbl.pbl.va = NULL;
1276 dev->av_tbl.size = 0;
1277mem_err:
1278 kfree(cmd);
1279 return status;
1280}
1281
1282static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
1283{
1284 struct ocrdma_delete_ah_tbl *cmd;
1285 struct pci_dev *pdev = dev->nic_info.pdev;
1286
1287 if (dev->av_tbl.va == NULL)
1288 return;
1289
1290 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_AH_TBL, sizeof(*cmd));
1291 if (!cmd)
1292 return;
1293 cmd->ahid = dev->av_tbl.ahid;
1294
1295 ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1296 dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
1297 dev->av_tbl.pa);
1298 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
1299 dev->av_tbl.pbl.pa);
1300 kfree(cmd);
1301}
1302
1303/* Multiple CQs uses the EQ. This routine returns least used
1304 * EQ to associate with CQ. This will distributes the interrupt
1305 * processing and CPU load to associated EQ, vector and so to that CPU.
1306 */
1307static u16 ocrdma_bind_eq(struct ocrdma_dev *dev)
1308{
1309 int i, selected_eq = 0, cq_cnt = 0;
1310 u16 eq_id;
1311
1312 mutex_lock(&dev->dev_lock);
1313 cq_cnt = dev->qp_eq_tbl[0].cq_cnt;
1314 eq_id = dev->qp_eq_tbl[0].q.id;
1315 /* find the EQ which is has the least number of
1316 * CQs associated with it.
1317 */
1318 for (i = 0; i < dev->eq_cnt; i++) {
1319 if (dev->qp_eq_tbl[i].cq_cnt < cq_cnt) {
1320 cq_cnt = dev->qp_eq_tbl[i].cq_cnt;
1321 eq_id = dev->qp_eq_tbl[i].q.id;
1322 selected_eq = i;
1323 }
1324 }
1325 dev->qp_eq_tbl[selected_eq].cq_cnt += 1;
1326 mutex_unlock(&dev->dev_lock);
1327 return eq_id;
1328}
1329
1330static void ocrdma_unbind_eq(struct ocrdma_dev *dev, u16 eq_id)
1331{
1332 int i;
1333
1334 mutex_lock(&dev->dev_lock);
1335 for (i = 0; i < dev->eq_cnt; i++) {
1336 if (dev->qp_eq_tbl[i].q.id != eq_id)
1337 continue;
1338 dev->qp_eq_tbl[i].cq_cnt -= 1;
1339 break;
1340 }
1341 mutex_unlock(&dev->dev_lock);
1342}
1343
1344int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1345 int entries, int dpp_cq)
1346{
1347 int status = -ENOMEM; int max_hw_cqe;
1348 struct pci_dev *pdev = dev->nic_info.pdev;
1349 struct ocrdma_create_cq *cmd;
1350 struct ocrdma_create_cq_rsp *rsp;
1351 u32 hw_pages, cqe_size, page_size, cqe_count;
1352
1353 if (dpp_cq)
1354 return -EINVAL;
1355 if (entries > dev->attr.max_cqe) {
1356 ocrdma_err("%s(%d) max_cqe=0x%x, requester_cqe=0x%x\n",
1357 __func__, dev->id, dev->attr.max_cqe, entries);
1358 return -EINVAL;
1359 }
1360 if (dpp_cq && (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY))
1361 return -EINVAL;
1362
1363 if (dpp_cq) {
1364 cq->max_hw_cqe = 1;
1365 max_hw_cqe = 1;
1366 cqe_size = OCRDMA_DPP_CQE_SIZE;
1367 hw_pages = 1;
1368 } else {
1369 cq->max_hw_cqe = dev->attr.max_cqe;
1370 max_hw_cqe = dev->attr.max_cqe;
1371 cqe_size = sizeof(struct ocrdma_cqe);
1372 hw_pages = OCRDMA_CREATE_CQ_MAX_PAGES;
1373 }
1374
1375 cq->len = roundup(max_hw_cqe * cqe_size, OCRDMA_MIN_Q_PAGE_SIZE);
1376
1377 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_CQ, sizeof(*cmd));
1378 if (!cmd)
1379 return -ENOMEM;
1380 ocrdma_init_mch(&cmd->cmd.req, OCRDMA_CMD_CREATE_CQ,
1381 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
1382 cq->va = dma_alloc_coherent(&pdev->dev, cq->len, &cq->pa, GFP_KERNEL);
1383 if (!cq->va) {
1384 status = -ENOMEM;
1385 goto mem_err;
1386 }
1387 memset(cq->va, 0, cq->len);
1388 page_size = cq->len / hw_pages;
1389 cmd->cmd.pgsz_pgcnt = (page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
1390 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT;
1391 cmd->cmd.pgsz_pgcnt |= hw_pages;
1392 cmd->cmd.ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
1393
1394 if (dev->eq_cnt < 0)
1395 goto eq_err;
1396 cq->eqn = ocrdma_bind_eq(dev);
1397 cmd->cmd.req.rsvd_version = OCRDMA_CREATE_CQ_VER2;
1398 cqe_count = cq->len / cqe_size;
1399 if (cqe_count > 1024)
1400 /* Set cnt to 3 to indicate more than 1024 cq entries */
1401 cmd->cmd.ev_cnt_flags |= (0x3 << OCRDMA_CREATE_CQ_CNT_SHIFT);
1402 else {
1403 u8 count = 0;
1404 switch (cqe_count) {
1405 case 256:
1406 count = 0;
1407 break;
1408 case 512:
1409 count = 1;
1410 break;
1411 case 1024:
1412 count = 2;
1413 break;
1414 default:
1415 goto mbx_err;
1416 }
1417 cmd->cmd.ev_cnt_flags |= (count << OCRDMA_CREATE_CQ_CNT_SHIFT);
1418 }
1419 /* shared eq between all the consumer cqs. */
1420 cmd->cmd.eqn = cq->eqn;
1421 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1422 if (dpp_cq)
1423 cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
1424 OCRDMA_CREATE_CQ_TYPE_SHIFT;
1425 cq->phase_change = false;
1426 cmd->cmd.cqe_count = (cq->len / cqe_size);
1427 } else {
1428 cmd->cmd.cqe_count = (cq->len / cqe_size) - 1;
1429 cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
1430 cq->phase_change = true;
1431 }
1432
1433 ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
1434 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1435 if (status)
1436 goto mbx_err;
1437
1438 rsp = (struct ocrdma_create_cq_rsp *)cmd;
1439 cq->id = (u16) (rsp->rsp.cq_id & OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK);
1440 kfree(cmd);
1441 return 0;
1442mbx_err:
1443 ocrdma_unbind_eq(dev, cq->eqn);
1444eq_err:
1445 dma_free_coherent(&pdev->dev, cq->len, cq->va, cq->pa);
1446mem_err:
1447 kfree(cmd);
1448 return status;
1449}
1450
1451int ocrdma_mbx_destroy_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq)
1452{
1453 int status = -ENOMEM;
1454 struct ocrdma_destroy_cq *cmd;
1455
1456 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_CQ, sizeof(*cmd));
1457 if (!cmd)
1458 return status;
1459 ocrdma_init_mch(&cmd->req, OCRDMA_CMD_DELETE_CQ,
1460 OCRDMA_SUBSYS_COMMON, sizeof(*cmd));
1461
1462 cmd->bypass_flush_qid |=
1463 (cq->id << OCRDMA_DESTROY_CQ_QID_SHIFT) &
1464 OCRDMA_DESTROY_CQ_QID_MASK;
1465
1466 ocrdma_unbind_eq(dev, cq->eqn);
1467 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1468 if (status)
1469 goto mbx_err;
1470 dma_free_coherent(&dev->nic_info.pdev->dev, cq->len, cq->va, cq->pa);
1471mbx_err:
1472 kfree(cmd);
1473 return status;
1474}
1475
1476int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
1477 u32 pdid, int addr_check)
1478{
1479 int status = -ENOMEM;
1480 struct ocrdma_alloc_lkey *cmd;
1481 struct ocrdma_alloc_lkey_rsp *rsp;
1482
1483 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_ALLOC_LKEY, sizeof(*cmd));
1484 if (!cmd)
1485 return status;
1486 cmd->pdid = pdid;
1487 cmd->pbl_sz_flags |= addr_check;
1488 cmd->pbl_sz_flags |= (hwmr->fr_mr << OCRDMA_ALLOC_LKEY_FMR_SHIFT);
1489 cmd->pbl_sz_flags |=
1490 (hwmr->remote_wr << OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT);
1491 cmd->pbl_sz_flags |=
1492 (hwmr->remote_rd << OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT);
1493 cmd->pbl_sz_flags |=
1494 (hwmr->local_wr << OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT);
1495 cmd->pbl_sz_flags |=
1496 (hwmr->remote_atomic << OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT);
1497 cmd->pbl_sz_flags |=
1498 (hwmr->num_pbls << OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT);
1499
1500 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1501 if (status)
1502 goto mbx_err;
1503 rsp = (struct ocrdma_alloc_lkey_rsp *)cmd;
1504 hwmr->lkey = rsp->lrkey;
1505mbx_err:
1506 kfree(cmd);
1507 return status;
1508}
1509
1510int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *dev, int fr_mr, u32 lkey)
1511{
1512 int status = -ENOMEM;
1513 struct ocrdma_dealloc_lkey *cmd;
1514
1515 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DEALLOC_LKEY, sizeof(*cmd));
1516 if (!cmd)
1517 return -ENOMEM;
1518 cmd->lkey = lkey;
1519 cmd->rsvd_frmr = fr_mr ? 1 : 0;
1520 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1521 if (status)
1522 goto mbx_err;
1523mbx_err:
1524 kfree(cmd);
1525 return status;
1526}
1527
1528static int ocrdma_mbx_reg_mr(struct ocrdma_dev *dev, struct ocrdma_hw_mr *hwmr,
1529 u32 pdid, u32 pbl_cnt, u32 pbe_size, u32 last)
1530{
1531 int status = -ENOMEM;
1532 int i;
1533 struct ocrdma_reg_nsmr *cmd;
1534 struct ocrdma_reg_nsmr_rsp *rsp;
1535
1536 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR, sizeof(*cmd));
1537 if (!cmd)
1538 return -ENOMEM;
1539 cmd->num_pbl_pdid =
1540 pdid | (hwmr->num_pbls << OCRDMA_REG_NSMR_NUM_PBL_SHIFT);
1541
1542 cmd->flags_hpage_pbe_sz |= (hwmr->remote_wr <<
1543 OCRDMA_REG_NSMR_REMOTE_WR_SHIFT);
1544 cmd->flags_hpage_pbe_sz |= (hwmr->remote_rd <<
1545 OCRDMA_REG_NSMR_REMOTE_RD_SHIFT);
1546 cmd->flags_hpage_pbe_sz |= (hwmr->local_wr <<
1547 OCRDMA_REG_NSMR_LOCAL_WR_SHIFT);
1548 cmd->flags_hpage_pbe_sz |= (hwmr->remote_atomic <<
1549 OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT);
1550 cmd->flags_hpage_pbe_sz |= (hwmr->mw_bind <<
1551 OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT);
1552 cmd->flags_hpage_pbe_sz |= (last << OCRDMA_REG_NSMR_LAST_SHIFT);
1553
1554 cmd->flags_hpage_pbe_sz |= (hwmr->pbe_size / OCRDMA_MIN_HPAGE_SIZE);
1555 cmd->flags_hpage_pbe_sz |= (hwmr->pbl_size / OCRDMA_MIN_HPAGE_SIZE) <<
1556 OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT;
1557 cmd->totlen_low = hwmr->len;
1558 cmd->totlen_high = upper_32_bits(hwmr->len);
1559 cmd->fbo_low = (u32) (hwmr->fbo & 0xffffffff);
1560 cmd->fbo_high = (u32) upper_32_bits(hwmr->fbo);
1561 cmd->va_loaddr = (u32) hwmr->va;
1562 cmd->va_hiaddr = (u32) upper_32_bits(hwmr->va);
1563
1564 for (i = 0; i < pbl_cnt; i++) {
1565 cmd->pbl[i].lo = (u32) (hwmr->pbl_table[i].pa & 0xffffffff);
1566 cmd->pbl[i].hi = upper_32_bits(hwmr->pbl_table[i].pa);
1567 }
1568 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1569 if (status)
1570 goto mbx_err;
1571 rsp = (struct ocrdma_reg_nsmr_rsp *)cmd;
1572 hwmr->lkey = rsp->lrkey;
1573mbx_err:
1574 kfree(cmd);
1575 return status;
1576}
1577
1578static int ocrdma_mbx_reg_mr_cont(struct ocrdma_dev *dev,
1579 struct ocrdma_hw_mr *hwmr, u32 pbl_cnt,
1580 u32 pbl_offset, u32 last)
1581{
1582 int status = -ENOMEM;
1583 int i;
1584 struct ocrdma_reg_nsmr_cont *cmd;
1585
1586 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_REGISTER_NSMR_CONT, sizeof(*cmd));
1587 if (!cmd)
1588 return -ENOMEM;
1589 cmd->lrkey = hwmr->lkey;
1590 cmd->num_pbl_offset = (pbl_cnt << OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT) |
1591 (pbl_offset & OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK);
1592 cmd->last = last << OCRDMA_REG_NSMR_CONT_LAST_SHIFT;
1593
1594 for (i = 0; i < pbl_cnt; i++) {
1595 cmd->pbl[i].lo =
1596 (u32) (hwmr->pbl_table[i + pbl_offset].pa & 0xffffffff);
1597 cmd->pbl[i].hi =
1598 upper_32_bits(hwmr->pbl_table[i + pbl_offset].pa);
1599 }
1600 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1601 if (status)
1602 goto mbx_err;
1603mbx_err:
1604 kfree(cmd);
1605 return status;
1606}
1607
1608int ocrdma_reg_mr(struct ocrdma_dev *dev,
1609 struct ocrdma_hw_mr *hwmr, u32 pdid, int acc)
1610{
1611 int status;
1612 u32 last = 0;
1613 u32 cur_pbl_cnt, pbl_offset;
1614 u32 pending_pbl_cnt = hwmr->num_pbls;
1615
1616 pbl_offset = 0;
1617 cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
1618 if (cur_pbl_cnt == pending_pbl_cnt)
1619 last = 1;
1620
1621 status = ocrdma_mbx_reg_mr(dev, hwmr, pdid,
1622 cur_pbl_cnt, hwmr->pbe_size, last);
1623 if (status) {
1624 ocrdma_err("%s() status=%d\n", __func__, status);
1625 return status;
1626 }
1627 /* if there is no more pbls to register then exit. */
1628 if (last)
1629 return 0;
1630
1631 while (!last) {
1632 pbl_offset += cur_pbl_cnt;
1633 pending_pbl_cnt -= cur_pbl_cnt;
1634 cur_pbl_cnt = min(pending_pbl_cnt, MAX_OCRDMA_NSMR_PBL);
1635 /* if we reach the end of the pbls, then need to set the last
1636 * bit, indicating no more pbls to register for this memory key.
1637 */
1638 if (cur_pbl_cnt == pending_pbl_cnt)
1639 last = 1;
1640
1641 status = ocrdma_mbx_reg_mr_cont(dev, hwmr, cur_pbl_cnt,
1642 pbl_offset, last);
1643 if (status)
1644 break;
1645 }
1646 if (status)
1647 ocrdma_err("%s() err. status=%d\n", __func__, status);
1648
1649 return status;
1650}
1651
1652bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
1653{
1654 struct ocrdma_qp *tmp;
1655 bool found = false;
1656 list_for_each_entry(tmp, &cq->sq_head, sq_entry) {
1657 if (qp == tmp) {
1658 found = true;
1659 break;
1660 }
1661 }
1662 return found;
1663}
1664
1665bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *cq, struct ocrdma_qp *qp)
1666{
1667 struct ocrdma_qp *tmp;
1668 bool found = false;
1669 list_for_each_entry(tmp, &cq->rq_head, rq_entry) {
1670 if (qp == tmp) {
1671 found = true;
1672 break;
1673 }
1674 }
1675 return found;
1676}
1677
1678void ocrdma_flush_qp(struct ocrdma_qp *qp)
1679{
1680 bool found;
1681 unsigned long flags;
1682
1683 spin_lock_irqsave(&qp->dev->flush_q_lock, flags);
1684 found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1685 if (!found)
1686 list_add_tail(&qp->sq_entry, &qp->sq_cq->sq_head);
1687 if (!qp->srq) {
1688 found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1689 if (!found)
1690 list_add_tail(&qp->rq_entry, &qp->rq_cq->rq_head);
1691 }
1692 spin_unlock_irqrestore(&qp->dev->flush_q_lock, flags);
1693}
1694
1695int ocrdma_qp_state_machine(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
1696 enum ib_qp_state *old_ib_state)
1697{
1698 unsigned long flags;
1699 int status = 0;
1700 enum ocrdma_qp_state new_state;
1701 new_state = get_ocrdma_qp_state(new_ib_state);
1702
1703 /* sync with wqe and rqe posting */
1704 spin_lock_irqsave(&qp->q_lock, flags);
1705
1706 if (old_ib_state)
1707 *old_ib_state = get_ibqp_state(qp->state);
1708 if (new_state == qp->state) {
1709 spin_unlock_irqrestore(&qp->q_lock, flags);
1710 return 1;
1711 }
1712
1713 switch (qp->state) {
1714 case OCRDMA_QPS_RST:
1715 switch (new_state) {
1716 case OCRDMA_QPS_RST:
1717 case OCRDMA_QPS_INIT:
1718 break;
1719 default:
1720 status = -EINVAL;
1721 break;
1722 };
1723 break;
1724 case OCRDMA_QPS_INIT:
1725 /* qps: INIT->XXX */
1726 switch (new_state) {
1727 case OCRDMA_QPS_INIT:
1728 case OCRDMA_QPS_RTR:
1729 break;
1730 case OCRDMA_QPS_ERR:
1731 ocrdma_flush_qp(qp);
1732 break;
1733 default:
1734 status = -EINVAL;
1735 break;
1736 };
1737 break;
1738 case OCRDMA_QPS_RTR:
1739 /* qps: RTS->XXX */
1740 switch (new_state) {
1741 case OCRDMA_QPS_RTS:
1742 break;
1743 case OCRDMA_QPS_ERR:
1744 ocrdma_flush_qp(qp);
1745 break;
1746 default:
1747 status = -EINVAL;
1748 break;
1749 };
1750 break;
1751 case OCRDMA_QPS_RTS:
1752 /* qps: RTS->XXX */
1753 switch (new_state) {
1754 case OCRDMA_QPS_SQD:
1755 case OCRDMA_QPS_SQE:
1756 break;
1757 case OCRDMA_QPS_ERR:
1758 ocrdma_flush_qp(qp);
1759 break;
1760 default:
1761 status = -EINVAL;
1762 break;
1763 };
1764 break;
1765 case OCRDMA_QPS_SQD:
1766 /* qps: SQD->XXX */
1767 switch (new_state) {
1768 case OCRDMA_QPS_RTS:
1769 case OCRDMA_QPS_SQE:
1770 case OCRDMA_QPS_ERR:
1771 break;
1772 default:
1773 status = -EINVAL;
1774 break;
1775 };
1776 break;
1777 case OCRDMA_QPS_SQE:
1778 switch (new_state) {
1779 case OCRDMA_QPS_RTS:
1780 case OCRDMA_QPS_ERR:
1781 break;
1782 default:
1783 status = -EINVAL;
1784 break;
1785 };
1786 break;
1787 case OCRDMA_QPS_ERR:
1788 /* qps: ERR->XXX */
1789 switch (new_state) {
1790 case OCRDMA_QPS_RST:
1791 break;
1792 default:
1793 status = -EINVAL;
1794 break;
1795 };
1796 break;
1797 default:
1798 status = -EINVAL;
1799 break;
1800 };
1801 if (!status)
1802 qp->state = new_state;
1803
1804 spin_unlock_irqrestore(&qp->q_lock, flags);
1805 return status;
1806}
1807
1808static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
1809{
1810 u32 flags = 0;
1811 if (qp->cap_flags & OCRDMA_QP_INB_RD)
1812 flags |= OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK;
1813 if (qp->cap_flags & OCRDMA_QP_INB_WR)
1814 flags |= OCRDMA_CREATE_QP_REQ_INB_WREN_MASK;
1815 if (qp->cap_flags & OCRDMA_QP_MW_BIND)
1816 flags |= OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK;
1817 if (qp->cap_flags & OCRDMA_QP_LKEY0)
1818 flags |= OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK;
1819 if (qp->cap_flags & OCRDMA_QP_FAST_REG)
1820 flags |= OCRDMA_CREATE_QP_REQ_FMR_EN_MASK;
1821 return flags;
1822}
1823
1824static int ocrdma_set_create_qp_sq_cmd(struct ocrdma_create_qp_req *cmd,
1825 struct ib_qp_init_attr *attrs,
1826 struct ocrdma_qp *qp)
1827{
1828 int status;
1829 u32 len, hw_pages, hw_page_size;
1830 dma_addr_t pa;
1831 struct ocrdma_dev *dev = qp->dev;
1832 struct pci_dev *pdev = dev->nic_info.pdev;
1833 u32 max_wqe_allocated;
1834 u32 max_sges = attrs->cap.max_send_sge;
1835
1836 max_wqe_allocated = attrs->cap.max_send_wr;
1837 /* need to allocate one extra to for GEN1 family */
1838 if (dev->nic_info.dev_family != OCRDMA_GEN2_FAMILY)
1839 max_wqe_allocated += 1;
1840
1841 status = ocrdma_build_q_conf(&max_wqe_allocated,
1842 dev->attr.wqe_size, &hw_pages, &hw_page_size);
1843 if (status) {
1844 ocrdma_err("%s() req. max_send_wr=0x%x\n", __func__,
1845 max_wqe_allocated);
1846 return -EINVAL;
1847 }
1848 qp->sq.max_cnt = max_wqe_allocated;
1849 len = (hw_pages * hw_page_size);
1850
1851 qp->sq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
1852 if (!qp->sq.va)
1853 return -EINVAL;
1854 memset(qp->sq.va, 0, len);
1855 qp->sq.len = len;
1856 qp->sq.pa = pa;
1857 qp->sq.entry_size = dev->attr.wqe_size;
1858 ocrdma_build_q_pages(&cmd->wq_addr[0], hw_pages, pa, hw_page_size);
1859
1860 cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
1861 << OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT);
1862 cmd->num_wq_rq_pages |= (hw_pages <<
1863 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT) &
1864 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK;
1865 cmd->max_sge_send_write |= (max_sges <<
1866 OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT) &
1867 OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK;
1868 cmd->max_sge_send_write |= (max_sges <<
1869 OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT) &
1870 OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK;
1871 cmd->max_wqe_rqe |= (ilog2(qp->sq.max_cnt) <<
1872 OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT) &
1873 OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK;
1874 cmd->wqe_rqe_size |= (dev->attr.wqe_size <<
1875 OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT) &
1876 OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK;
1877 return 0;
1878}
1879
1880static int ocrdma_set_create_qp_rq_cmd(struct ocrdma_create_qp_req *cmd,
1881 struct ib_qp_init_attr *attrs,
1882 struct ocrdma_qp *qp)
1883{
1884 int status;
1885 u32 len, hw_pages, hw_page_size;
1886 dma_addr_t pa = 0;
1887 struct ocrdma_dev *dev = qp->dev;
1888 struct pci_dev *pdev = dev->nic_info.pdev;
1889 u32 max_rqe_allocated = attrs->cap.max_recv_wr + 1;
1890
1891 status = ocrdma_build_q_conf(&max_rqe_allocated, dev->attr.rqe_size,
1892 &hw_pages, &hw_page_size);
1893 if (status) {
1894 ocrdma_err("%s() req. max_recv_wr=0x%x\n", __func__,
1895 attrs->cap.max_recv_wr + 1);
1896 return status;
1897 }
1898 qp->rq.max_cnt = max_rqe_allocated;
1899 len = (hw_pages * hw_page_size);
1900
1901 qp->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
1902 if (!qp->rq.va)
1903 return status;
1904 memset(qp->rq.va, 0, len);
1905 qp->rq.pa = pa;
1906 qp->rq.len = len;
1907 qp->rq.entry_size = dev->attr.rqe_size;
1908
1909 ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
1910 cmd->type_pgsz_pdn |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE) <<
1911 OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT);
1912 cmd->num_wq_rq_pages |=
1913 (hw_pages << OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT) &
1914 OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK;
1915 cmd->max_sge_recv_flags |= (attrs->cap.max_recv_sge <<
1916 OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT) &
1917 OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK;
1918 cmd->max_wqe_rqe |= (ilog2(qp->rq.max_cnt) <<
1919 OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT) &
1920 OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK;
1921 cmd->wqe_rqe_size |= (dev->attr.rqe_size <<
1922 OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT) &
1923 OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK;
1924 return 0;
1925}
1926
1927static void ocrdma_set_create_qp_dpp_cmd(struct ocrdma_create_qp_req *cmd,
1928 struct ocrdma_pd *pd,
1929 struct ocrdma_qp *qp,
1930 u8 enable_dpp_cq, u16 dpp_cq_id)
1931{
1932 pd->num_dpp_qp--;
1933 qp->dpp_enabled = true;
1934 cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
1935 if (!enable_dpp_cq)
1936 return;
1937 cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK;
1938 cmd->dpp_credits_cqid = dpp_cq_id;
1939 cmd->dpp_credits_cqid |= OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT <<
1940 OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT;
1941}
1942
1943static int ocrdma_set_create_qp_ird_cmd(struct ocrdma_create_qp_req *cmd,
1944 struct ocrdma_qp *qp)
1945{
1946 struct ocrdma_dev *dev = qp->dev;
1947 struct pci_dev *pdev = dev->nic_info.pdev;
1948 dma_addr_t pa = 0;
1949 int ird_page_size = dev->attr.ird_page_size;
1950 int ird_q_len = dev->attr.num_ird_pages * ird_page_size;
1951
1952 if (dev->attr.ird == 0)
1953 return 0;
1954
1955 qp->ird_q_va = dma_alloc_coherent(&pdev->dev, ird_q_len,
1956 &pa, GFP_KERNEL);
1957 if (!qp->ird_q_va)
1958 return -ENOMEM;
1959 memset(qp->ird_q_va, 0, ird_q_len);
1960 ocrdma_build_q_pages(&cmd->ird_addr[0], dev->attr.num_ird_pages,
1961 pa, ird_page_size);
1962 return 0;
1963}
1964
1965static void ocrdma_get_create_qp_rsp(struct ocrdma_create_qp_rsp *rsp,
1966 struct ocrdma_qp *qp,
1967 struct ib_qp_init_attr *attrs,
1968 u16 *dpp_offset, u16 *dpp_credit_lmt)
1969{
1970 u32 max_wqe_allocated, max_rqe_allocated;
1971 qp->id = rsp->qp_id & OCRDMA_CREATE_QP_RSP_QP_ID_MASK;
1972 qp->rq.dbid = rsp->sq_rq_id & OCRDMA_CREATE_QP_RSP_RQ_ID_MASK;
1973 qp->sq.dbid = rsp->sq_rq_id >> OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT;
1974 qp->max_ird = rsp->max_ord_ird & OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK;
1975 qp->max_ord = (rsp->max_ord_ird >> OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT);
1976 qp->dpp_enabled = false;
1977 if (rsp->dpp_response & OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK) {
1978 qp->dpp_enabled = true;
1979 *dpp_credit_lmt = (rsp->dpp_response &
1980 OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK) >>
1981 OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT;
1982 *dpp_offset = (rsp->dpp_response &
1983 OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK) >>
1984 OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT;
1985 }
1986 max_wqe_allocated =
1987 rsp->max_wqe_rqe >> OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT;
1988 max_wqe_allocated = 1 << max_wqe_allocated;
1989 max_rqe_allocated = 1 << ((u16)rsp->max_wqe_rqe);
1990
1991 qp->sq.max_cnt = max_wqe_allocated;
1992 qp->sq.max_wqe_idx = max_wqe_allocated - 1;
1993
1994 if (!attrs->srq) {
1995 qp->rq.max_cnt = max_rqe_allocated;
1996 qp->rq.max_wqe_idx = max_rqe_allocated - 1;
1997 }
1998}
1999
2000int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
2001 u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
2002 u16 *dpp_credit_lmt)
2003{
2004 int status = -ENOMEM;
2005 u32 flags = 0;
2006 struct ocrdma_dev *dev = qp->dev;
2007 struct ocrdma_pd *pd = qp->pd;
2008 struct pci_dev *pdev = dev->nic_info.pdev;
2009 struct ocrdma_cq *cq;
2010 struct ocrdma_create_qp_req *cmd;
2011 struct ocrdma_create_qp_rsp *rsp;
2012 int qptype;
2013
2014 switch (attrs->qp_type) {
2015 case IB_QPT_GSI:
2016 qptype = OCRDMA_QPT_GSI;
2017 break;
2018 case IB_QPT_RC:
2019 qptype = OCRDMA_QPT_RC;
2020 break;
2021 case IB_QPT_UD:
2022 qptype = OCRDMA_QPT_UD;
2023 break;
2024 default:
2025 return -EINVAL;
2026 };
2027
2028 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_QP, sizeof(*cmd));
2029 if (!cmd)
2030 return status;
2031 cmd->type_pgsz_pdn |= (qptype << OCRDMA_CREATE_QP_REQ_QPT_SHIFT) &
2032 OCRDMA_CREATE_QP_REQ_QPT_MASK;
2033 status = ocrdma_set_create_qp_sq_cmd(cmd, attrs, qp);
2034 if (status)
2035 goto sq_err;
2036
2037 if (attrs->srq) {
2038 struct ocrdma_srq *srq = get_ocrdma_srq(attrs->srq);
2039 cmd->max_sge_recv_flags |= OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK;
2040 cmd->rq_addr[0].lo = srq->id;
2041 qp->srq = srq;
2042 } else {
2043 status = ocrdma_set_create_qp_rq_cmd(cmd, attrs, qp);
2044 if (status)
2045 goto rq_err;
2046 }
2047
2048 status = ocrdma_set_create_qp_ird_cmd(cmd, qp);
2049 if (status)
2050 goto mbx_err;
2051
2052 cmd->type_pgsz_pdn |= (pd->id << OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT) &
2053 OCRDMA_CREATE_QP_REQ_PD_ID_MASK;
2054
2055 flags = ocrdma_set_create_qp_mbx_access_flags(qp);
2056
2057 cmd->max_sge_recv_flags |= flags;
2058 cmd->max_ord_ird |= (dev->attr.max_ord_per_qp <<
2059 OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT) &
2060 OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK;
2061 cmd->max_ord_ird |= (dev->attr.max_ird_per_qp <<
2062 OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT) &
2063 OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK;
2064 cq = get_ocrdma_cq(attrs->send_cq);
2065 cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT) &
2066 OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK;
2067 qp->sq_cq = cq;
2068 cq = get_ocrdma_cq(attrs->recv_cq);
2069 cmd->wq_rq_cqid |= (cq->id << OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT) &
2070 OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
2071 qp->rq_cq = cq;
2072
2073 if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
2074 (attrs->cap.max_inline_data <= dev->attr.max_inline_data))
2075 ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
2076 dpp_cq_id);
2077
2078 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
2079 if (status)
2080 goto mbx_err;
2081 rsp = (struct ocrdma_create_qp_rsp *)cmd;
2082 ocrdma_get_create_qp_rsp(rsp, qp, attrs, dpp_offset, dpp_credit_lmt);
2083 qp->state = OCRDMA_QPS_RST;
2084 kfree(cmd);
2085 return 0;
2086mbx_err:
2087 if (qp->rq.va)
2088 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
2089rq_err:
2090 ocrdma_err("%s(%d) rq_err\n", __func__, dev->id);
2091 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
2092sq_err:
2093 ocrdma_err("%s(%d) sq_err\n", __func__, dev->id);
2094 kfree(cmd);
2095 return status;
2096}
2097
2098int ocrdma_mbx_query_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
2099 struct ocrdma_qp_params *param)
2100{
2101 int status = -ENOMEM;
2102 struct ocrdma_query_qp *cmd;
2103 struct ocrdma_query_qp_rsp *rsp;
2104
2105 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_QUERY_QP, sizeof(*cmd));
2106 if (!cmd)
2107 return status;
2108 cmd->qp_id = qp->id;
2109 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
2110 if (status)
2111 goto mbx_err;
2112 rsp = (struct ocrdma_query_qp_rsp *)cmd;
2113 memcpy(param, &rsp->params, sizeof(struct ocrdma_qp_params));
2114mbx_err:
2115 kfree(cmd);
2116 return status;
2117}
2118
2119int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
2120 u8 *mac_addr)
2121{
2122 struct in6_addr in6;
2123
2124 memcpy(&in6, dgid, sizeof in6);
2125 if (rdma_is_multicast_addr(&in6))
2126 rdma_get_mcast_mac(&in6, mac_addr);
2127 else if (rdma_link_local_addr(&in6))
2128 rdma_get_ll_mac(&in6, mac_addr);
2129 else {
2130 ocrdma_err("%s() fail to resolve mac_addr.\n", __func__);
2131 return -EINVAL;
2132 }
2133 return 0;
2134}
2135
2136static void ocrdma_set_av_params(struct ocrdma_qp *qp,
2137 struct ocrdma_modify_qp *cmd,
2138 struct ib_qp_attr *attrs)
2139{
2140 struct ib_ah_attr *ah_attr = &attrs->ah_attr;
2141 union ib_gid sgid;
2142 u32 vlan_id;
2143 u8 mac_addr[6];
2144 if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
2145 return;
2146 cmd->params.tclass_sq_psn |=
2147 (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
2148 cmd->params.rnt_rc_sl_fl |=
2149 (ah_attr->grh.flow_label & OCRDMA_QP_PARAMS_FLOW_LABEL_MASK);
2150 cmd->params.hop_lmt_rq_psn |=
2151 (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
2152 cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
2153 memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
2154 sizeof(cmd->params.dgid));
2155 ocrdma_query_gid(&qp->dev->ibdev, 1,
2156 ah_attr->grh.sgid_index, &sgid);
2157 qp->sgid_idx = ah_attr->grh.sgid_index;
2158 memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
2159 ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
2160 cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
2161 (mac_addr[2] << 16) | (mac_addr[3] << 24);
2162 /* convert them to LE format. */
2163 ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
2164 ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
2165 cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
2166 vlan_id = rdma_get_vlan_id(&sgid);
2167 if (vlan_id && (vlan_id < 0x1000)) {
2168 cmd->params.vlan_dmac_b4_to_b5 |=
2169 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
2170 cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
2171 }
2172}
2173
2174static int ocrdma_set_qp_params(struct ocrdma_qp *qp,
2175 struct ocrdma_modify_qp *cmd,
2176 struct ib_qp_attr *attrs, int attr_mask,
2177 enum ib_qp_state old_qps)
2178{
2179 int status = 0;
2180 struct net_device *netdev = qp->dev->nic_info.netdev;
2181 int eth_mtu = iboe_get_mtu(netdev->mtu);
2182
2183 if (attr_mask & IB_QP_PKEY_INDEX) {
2184 cmd->params.path_mtu_pkey_indx |= (attrs->pkey_index &
2185 OCRDMA_QP_PARAMS_PKEY_INDEX_MASK);
2186 cmd->flags |= OCRDMA_QP_PARA_PKEY_VALID;
2187 }
2188 if (attr_mask & IB_QP_QKEY) {
2189 qp->qkey = attrs->qkey;
2190 cmd->params.qkey = attrs->qkey;
2191 cmd->flags |= OCRDMA_QP_PARA_QKEY_VALID;
2192 }
2193 if (attr_mask & IB_QP_AV)
2194 ocrdma_set_av_params(qp, cmd, attrs);
2195 else if (qp->qp_type == IB_QPT_GSI || qp->qp_type == IB_QPT_UD) {
2196 /* set the default mac address for UD, GSI QPs */
2197 cmd->params.dmac_b0_to_b3 = qp->dev->nic_info.mac_addr[0] |
2198 (qp->dev->nic_info.mac_addr[1] << 8) |
2199 (qp->dev->nic_info.mac_addr[2] << 16) |
2200 (qp->dev->nic_info.mac_addr[3] << 24);
2201 cmd->params.vlan_dmac_b4_to_b5 = qp->dev->nic_info.mac_addr[4] |
2202 (qp->dev->nic_info.mac_addr[5] << 8);
2203 }
2204 if ((attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) &&
2205 attrs->en_sqd_async_notify) {
2206 cmd->params.max_sge_recv_flags |=
2207 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC;
2208 cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
2209 }
2210 if (attr_mask & IB_QP_DEST_QPN) {
2211 cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->dest_qp_num &
2212 OCRDMA_QP_PARAMS_DEST_QPN_MASK);
2213 cmd->flags |= OCRDMA_QP_PARA_DST_QPN_VALID;
2214 }
2215 if (attr_mask & IB_QP_PATH_MTU) {
2216 if (ib_mtu_enum_to_int(eth_mtu) <
2217 ib_mtu_enum_to_int(attrs->path_mtu)) {
2218 status = -EINVAL;
2219 goto pmtu_err;
2220 }
2221 cmd->params.path_mtu_pkey_indx |=
2222 (ib_mtu_enum_to_int(attrs->path_mtu) <<
2223 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT) &
2224 OCRDMA_QP_PARAMS_PATH_MTU_MASK;
2225 cmd->flags |= OCRDMA_QP_PARA_PMTU_VALID;
2226 }
2227 if (attr_mask & IB_QP_TIMEOUT) {
2228 cmd->params.ack_to_rnr_rtc_dest_qpn |= attrs->timeout <<
2229 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
2230 cmd->flags |= OCRDMA_QP_PARA_ACK_TO_VALID;
2231 }
2232 if (attr_mask & IB_QP_RETRY_CNT) {
2233 cmd->params.rnt_rc_sl_fl |= (attrs->retry_cnt <<
2234 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT) &
2235 OCRDMA_QP_PARAMS_RETRY_CNT_MASK;
2236 cmd->flags |= OCRDMA_QP_PARA_RETRY_CNT_VALID;
2237 }
2238 if (attr_mask & IB_QP_MIN_RNR_TIMER) {
2239 cmd->params.rnt_rc_sl_fl |= (attrs->min_rnr_timer <<
2240 OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT) &
2241 OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK;
2242 cmd->flags |= OCRDMA_QP_PARA_RNT_VALID;
2243 }
2244 if (attr_mask & IB_QP_RNR_RETRY) {
2245 cmd->params.ack_to_rnr_rtc_dest_qpn |= (attrs->rnr_retry <<
2246 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT)
2247 & OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK;
2248 cmd->flags |= OCRDMA_QP_PARA_RRC_VALID;
2249 }
2250 if (attr_mask & IB_QP_SQ_PSN) {
2251 cmd->params.tclass_sq_psn |= (attrs->sq_psn & 0x00ffffff);
2252 cmd->flags |= OCRDMA_QP_PARA_SQPSN_VALID;
2253 }
2254 if (attr_mask & IB_QP_RQ_PSN) {
2255 cmd->params.hop_lmt_rq_psn |= (attrs->rq_psn & 0x00ffffff);
2256 cmd->flags |= OCRDMA_QP_PARA_RQPSN_VALID;
2257 }
2258 if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) {
2259 if (attrs->max_rd_atomic > qp->dev->attr.max_ord_per_qp) {
2260 status = -EINVAL;
2261 goto pmtu_err;
2262 }
2263 qp->max_ord = attrs->max_rd_atomic;
2264 cmd->flags |= OCRDMA_QP_PARA_MAX_ORD_VALID;
2265 }
2266 if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) {
2267 if (attrs->max_dest_rd_atomic > qp->dev->attr.max_ird_per_qp) {
2268 status = -EINVAL;
2269 goto pmtu_err;
2270 }
2271 qp->max_ird = attrs->max_dest_rd_atomic;
2272 cmd->flags |= OCRDMA_QP_PARA_MAX_IRD_VALID;
2273 }
2274 cmd->params.max_ord_ird = (qp->max_ord <<
2275 OCRDMA_QP_PARAMS_MAX_ORD_SHIFT) |
2276 (qp->max_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK);
2277pmtu_err:
2278 return status;
2279}
2280
2281int ocrdma_mbx_modify_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
2282 struct ib_qp_attr *attrs, int attr_mask,
2283 enum ib_qp_state old_qps)
2284{
2285 int status = -ENOMEM;
2286 struct ocrdma_modify_qp *cmd;
2287
2288 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_MODIFY_QP, sizeof(*cmd));
2289 if (!cmd)
2290 return status;
2291
2292 cmd->params.id = qp->id;
2293 cmd->flags = 0;
2294 if (attr_mask & IB_QP_STATE) {
2295 cmd->params.max_sge_recv_flags |=
2296 (get_ocrdma_qp_state(attrs->qp_state) <<
2297 OCRDMA_QP_PARAMS_STATE_SHIFT) &
2298 OCRDMA_QP_PARAMS_STATE_MASK;
2299 cmd->flags |= OCRDMA_QP_PARA_QPS_VALID;
2300 } else
2301 cmd->params.max_sge_recv_flags |=
2302 (qp->state << OCRDMA_QP_PARAMS_STATE_SHIFT) &
2303 OCRDMA_QP_PARAMS_STATE_MASK;
2304 status = ocrdma_set_qp_params(qp, cmd, attrs, attr_mask, old_qps);
2305 if (status)
2306 goto mbx_err;
2307 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
2308 if (status)
2309 goto mbx_err;
2310
2311mbx_err:
2312 kfree(cmd);
2313 return status;
2314}
2315
2316int ocrdma_mbx_destroy_qp(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
2317{
2318 int status = -ENOMEM;
2319 struct ocrdma_destroy_qp *cmd;
2320 struct pci_dev *pdev = dev->nic_info.pdev;
2321
2322 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_QP, sizeof(*cmd));
2323 if (!cmd)
2324 return status;
2325 cmd->qp_id = qp->id;
2326 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
2327 if (status)
2328 goto mbx_err;
2329
2330mbx_err:
2331 kfree(cmd);
2332 if (qp->sq.va)
2333 dma_free_coherent(&pdev->dev, qp->sq.len, qp->sq.va, qp->sq.pa);
2334 if (!qp->srq && qp->rq.va)
2335 dma_free_coherent(&pdev->dev, qp->rq.len, qp->rq.va, qp->rq.pa);
2336 if (qp->dpp_enabled)
2337 qp->pd->num_dpp_qp++;
2338 return status;
2339}
2340
2341int ocrdma_mbx_create_srq(struct ocrdma_srq *srq,
2342 struct ib_srq_init_attr *srq_attr,
2343 struct ocrdma_pd *pd)
2344{
2345 int status = -ENOMEM;
2346 int hw_pages, hw_page_size;
2347 int len;
2348 struct ocrdma_create_srq_rsp *rsp;
2349 struct ocrdma_create_srq *cmd;
2350 dma_addr_t pa;
2351 struct ocrdma_dev *dev = srq->dev;
2352 struct pci_dev *pdev = dev->nic_info.pdev;
2353 u32 max_rqe_allocated;
2354
2355 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
2356 if (!cmd)
2357 return status;
2358
2359 cmd->pgsz_pdid = pd->id & OCRDMA_CREATE_SRQ_PD_ID_MASK;
2360 max_rqe_allocated = srq_attr->attr.max_wr + 1;
2361 status = ocrdma_build_q_conf(&max_rqe_allocated,
2362 dev->attr.rqe_size,
2363 &hw_pages, &hw_page_size);
2364 if (status) {
2365 ocrdma_err("%s() req. max_wr=0x%x\n", __func__,
2366 srq_attr->attr.max_wr);
2367 status = -EINVAL;
2368 goto ret;
2369 }
2370 len = hw_pages * hw_page_size;
2371 srq->rq.va = dma_alloc_coherent(&pdev->dev, len, &pa, GFP_KERNEL);
2372 if (!srq->rq.va) {
2373 status = -ENOMEM;
2374 goto ret;
2375 }
2376 ocrdma_build_q_pages(&cmd->rq_addr[0], hw_pages, pa, hw_page_size);
2377
2378 srq->rq.entry_size = dev->attr.rqe_size;
2379 srq->rq.pa = pa;
2380 srq->rq.len = len;
2381 srq->rq.max_cnt = max_rqe_allocated;
2382
2383 cmd->max_sge_rqe = ilog2(max_rqe_allocated);
2384 cmd->max_sge_rqe |= srq_attr->attr.max_sge <<
2385 OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT;
2386
2387 cmd->pgsz_pdid |= (ilog2(hw_page_size / OCRDMA_MIN_Q_PAGE_SIZE)
2388 << OCRDMA_CREATE_SRQ_PG_SZ_SHIFT);
2389 cmd->pages_rqe_sz |= (dev->attr.rqe_size
2390 << OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT)
2391 & OCRDMA_CREATE_SRQ_RQE_SIZE_MASK;
2392 cmd->pages_rqe_sz |= hw_pages << OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT;
2393
2394 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
2395 if (status)
2396 goto mbx_err;
2397 rsp = (struct ocrdma_create_srq_rsp *)cmd;
2398 srq->id = rsp->id;
2399 srq->rq.dbid = rsp->id;
2400 max_rqe_allocated = ((rsp->max_sge_rqe_allocated &
2401 OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK) >>
2402 OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT);
2403 max_rqe_allocated = (1 << max_rqe_allocated);
2404 srq->rq.max_cnt = max_rqe_allocated;
2405 srq->rq.max_wqe_idx = max_rqe_allocated - 1;
2406 srq->rq.max_sges = (rsp->max_sge_rqe_allocated &
2407 OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK) >>
2408 OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT;
2409 goto ret;
2410mbx_err:
2411 dma_free_coherent(&pdev->dev, srq->rq.len, srq->rq.va, pa);
2412ret:
2413 kfree(cmd);
2414 return status;
2415}
2416
2417int ocrdma_mbx_modify_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
2418{
2419 int status = -ENOMEM;
2420 struct ocrdma_modify_srq *cmd;
2421 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
2422 if (!cmd)
2423 return status;
2424 cmd->id = srq->id;
2425 cmd->limit_max_rqe |= srq_attr->srq_limit <<
2426 OCRDMA_MODIFY_SRQ_LIMIT_SHIFT;
2427 status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
2428 kfree(cmd);
2429 return status;
2430}
2431
2432int ocrdma_mbx_query_srq(struct ocrdma_srq *srq, struct ib_srq_attr *srq_attr)
2433{
2434 int status = -ENOMEM;
2435 struct ocrdma_query_srq *cmd;
2436 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_CREATE_SRQ, sizeof(*cmd));
2437 if (!cmd)
2438 return status;
2439 cmd->id = srq->rq.dbid;
2440 status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
2441 if (status == 0) {
2442 struct ocrdma_query_srq_rsp *rsp =
2443 (struct ocrdma_query_srq_rsp *)cmd;
2444 srq_attr->max_sge =
2445 rsp->srq_lmt_max_sge &
2446 OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK;
2447 srq_attr->max_wr =
2448 rsp->max_rqe_pdid >> OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT;
2449 srq_attr->srq_limit = rsp->srq_lmt_max_sge >>
2450 OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT;
2451 }
2452 kfree(cmd);
2453 return status;
2454}
2455
2456int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
2457{
2458 int status = -ENOMEM;
2459 struct ocrdma_destroy_srq *cmd;
2460 struct pci_dev *pdev = dev->nic_info.pdev;
2461 cmd = ocrdma_init_emb_mqe(OCRDMA_CMD_DELETE_SRQ, sizeof(*cmd));
2462 if (!cmd)
2463 return status;
2464 cmd->id = srq->id;
2465 status = ocrdma_mbx_cmd(srq->dev, (struct ocrdma_mqe *)cmd);
2466 if (srq->rq.va)
2467 dma_free_coherent(&pdev->dev, srq->rq.len,
2468 srq->rq.va, srq->rq.pa);
2469 kfree(cmd);
2470 return status;
2471}
2472
2473int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
2474{
2475 int i;
2476 int status = -EINVAL;
2477 struct ocrdma_av *av;
2478 unsigned long flags;
2479
2480 av = dev->av_tbl.va;
2481 spin_lock_irqsave(&dev->av_tbl.lock, flags);
2482 for (i = 0; i < dev->av_tbl.num_ah; i++) {
2483 if (av->valid == 0) {
2484 av->valid = OCRDMA_AV_VALID;
2485 ah->av = av;
2486 ah->id = i;
2487 status = 0;
2488 break;
2489 }
2490 av++;
2491 }
2492 if (i == dev->av_tbl.num_ah)
2493 status = -EAGAIN;
2494 spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
2495 return status;
2496}
2497
2498int ocrdma_free_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
2499{
2500 unsigned long flags;
2501 spin_lock_irqsave(&dev->av_tbl.lock, flags);
2502 ah->av->valid = 0;
2503 spin_unlock_irqrestore(&dev->av_tbl.lock, flags);
2504 return 0;
2505}
2506
2507static int ocrdma_create_mq_eq(struct ocrdma_dev *dev)
2508{
2509 int status;
2510 int irq;
2511 unsigned long flags = 0;
2512 int num_eq = 0;
2513
2514 if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX)
2515 flags = IRQF_SHARED;
2516 else {
2517 num_eq = dev->nic_info.msix.num_vectors -
2518 dev->nic_info.msix.start_vector;
2519 /* minimum two vectors/eq are required for rdma to work.
2520 * one for control path and one for data path.
2521 */
2522 if (num_eq < 2)
2523 return -EBUSY;
2524 }
2525
2526 status = ocrdma_create_eq(dev, &dev->meq, OCRDMA_EQ_LEN);
2527 if (status)
2528 return status;
2529 sprintf(dev->meq.irq_name, "ocrdma_mq%d", dev->id);
2530 irq = ocrdma_get_irq(dev, &dev->meq);
2531 status = request_irq(irq, ocrdma_irq_handler, flags, dev->meq.irq_name,
2532 &dev->meq);
2533 if (status)
2534 _ocrdma_destroy_eq(dev, &dev->meq);
2535 return status;
2536}
2537
2538static int ocrdma_create_qp_eqs(struct ocrdma_dev *dev)
2539{
2540 int num_eq, i, status = 0;
2541 int irq;
2542 unsigned long flags = 0;
2543
2544 num_eq = dev->nic_info.msix.num_vectors -
2545 dev->nic_info.msix.start_vector;
2546 if (dev->nic_info.intr_mode == BE_INTERRUPT_MODE_INTX) {
2547 num_eq = 1;
2548 flags = IRQF_SHARED;
2549 } else
2550 num_eq = min_t(u32, num_eq, num_online_cpus());
2551 dev->qp_eq_tbl = kzalloc(sizeof(struct ocrdma_eq) * num_eq, GFP_KERNEL);
2552 if (!dev->qp_eq_tbl)
2553 return -ENOMEM;
2554
2555 for (i = 0; i < num_eq; i++) {
2556 status = ocrdma_create_eq(dev, &dev->qp_eq_tbl[i],
2557 OCRDMA_EQ_LEN);
2558 if (status) {
2559 status = -EINVAL;
2560 break;
2561 }
2562 sprintf(dev->qp_eq_tbl[i].irq_name, "ocrdma_qp%d-%d",
2563 dev->id, i);
2564 irq = ocrdma_get_irq(dev, &dev->qp_eq_tbl[i]);
2565 status = request_irq(irq, ocrdma_irq_handler, flags,
2566 dev->qp_eq_tbl[i].irq_name,
2567 &dev->qp_eq_tbl[i]);
2568 if (status) {
2569 _ocrdma_destroy_eq(dev, &dev->qp_eq_tbl[i]);
2570 status = -EINVAL;
2571 break;
2572 }
2573 dev->eq_cnt += 1;
2574 }
2575 /* one eq is sufficient for data path to work */
2576 if (dev->eq_cnt >= 1)
2577 return 0;
2578 if (status)
2579 ocrdma_destroy_qp_eqs(dev);
2580 return status;
2581}
2582
2583int ocrdma_init_hw(struct ocrdma_dev *dev)
2584{
2585 int status;
2586 /* set up control path eq */
2587 status = ocrdma_create_mq_eq(dev);
2588 if (status)
2589 return status;
2590 /* set up data path eq */
2591 status = ocrdma_create_qp_eqs(dev);
2592 if (status)
2593 goto qpeq_err;
2594 status = ocrdma_create_mq(dev);
2595 if (status)
2596 goto mq_err;
2597 status = ocrdma_mbx_query_fw_config(dev);
2598 if (status)
2599 goto conf_err;
2600 status = ocrdma_mbx_query_dev(dev);
2601 if (status)
2602 goto conf_err;
2603 status = ocrdma_mbx_query_fw_ver(dev);
2604 if (status)
2605 goto conf_err;
2606 status = ocrdma_mbx_create_ah_tbl(dev);
2607 if (status)
2608 goto conf_err;
2609 return 0;
2610
2611conf_err:
2612 ocrdma_destroy_mq(dev);
2613mq_err:
2614 ocrdma_destroy_qp_eqs(dev);
2615qpeq_err:
2616 ocrdma_destroy_eq(dev, &dev->meq);
2617 ocrdma_err("%s() status=%d\n", __func__, status);
2618 return status;
2619}
2620
2621void ocrdma_cleanup_hw(struct ocrdma_dev *dev)
2622{
2623 ocrdma_mbx_delete_ah_tbl(dev);
2624
2625 /* cleanup the data path eqs */
2626 ocrdma_destroy_qp_eqs(dev);
2627
2628 /* cleanup the control path */
2629 ocrdma_destroy_mq(dev);
2630 ocrdma_destroy_eq(dev, &dev->meq);
2631}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
deleted file mode 100644
index be5db77404d..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ /dev/null
@@ -1,132 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) CNA Adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_HW_H__
29#define __OCRDMA_HW_H__
30
31#include "ocrdma_sli.h"
32
33static inline void ocrdma_cpu_to_le32(void *dst, u32 len)
34{
35#ifdef __BIG_ENDIAN
36 int i = 0;
37 u32 *src_ptr = dst;
38 u32 *dst_ptr = dst;
39 for (; i < (len / 4); i++)
40 *(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
41#endif
42}
43
44static inline void ocrdma_le32_to_cpu(void *dst, u32 len)
45{
46#ifdef __BIG_ENDIAN
47 int i = 0;
48 u32 *src_ptr = dst;
49 u32 *dst_ptr = dst;
50 for (; i < (len / sizeof(u32)); i++)
51 *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
52#endif
53}
54
55static inline void ocrdma_copy_cpu_to_le32(void *dst, void *src, u32 len)
56{
57#ifdef __BIG_ENDIAN
58 int i = 0;
59 u32 *src_ptr = src;
60 u32 *dst_ptr = dst;
61 for (; i < (len / sizeof(u32)); i++)
62 *(dst_ptr + i) = cpu_to_le32p(src_ptr + i);
63#else
64 memcpy(dst, src, len);
65#endif
66}
67
68static inline void ocrdma_copy_le32_to_cpu(void *dst, void *src, u32 len)
69{
70#ifdef __BIG_ENDIAN
71 int i = 0;
72 u32 *src_ptr = src;
73 u32 *dst_ptr = dst;
74 for (; i < len / sizeof(u32); i++)
75 *(dst_ptr + i) = le32_to_cpu(*(src_ptr + i));
76#else
77 memcpy(dst, src, len);
78#endif
79}
80
81int ocrdma_init_hw(struct ocrdma_dev *);
82void ocrdma_cleanup_hw(struct ocrdma_dev *);
83
84enum ib_qp_state get_ibqp_state(enum ocrdma_qp_state qps);
85void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
86 bool solicited, u16 cqe_popped);
87
88/* verbs specific mailbox commands */
89int ocrdma_query_config(struct ocrdma_dev *,
90 struct ocrdma_mbx_query_config *config);
91int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
92
93int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
94int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
95
96int ocrdma_mbx_alloc_lkey(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
97 u32 pd_id, int addr_check);
98int ocrdma_mbx_dealloc_lkey(struct ocrdma_dev *, int fmr, u32 lkey);
99
100int ocrdma_reg_mr(struct ocrdma_dev *, struct ocrdma_hw_mr *hwmr,
101 u32 pd_id, int acc);
102int ocrdma_mbx_create_cq(struct ocrdma_dev *, struct ocrdma_cq *,
103 int entries, int dpp_cq);
104int ocrdma_mbx_destroy_cq(struct ocrdma_dev *, struct ocrdma_cq *);
105
106int ocrdma_mbx_create_qp(struct ocrdma_qp *, struct ib_qp_init_attr *attrs,
107 u8 enable_dpp_cq, u16 dpp_cq_id, u16 *dpp_offset,
108 u16 *dpp_credit_lmt);
109int ocrdma_mbx_modify_qp(struct ocrdma_dev *, struct ocrdma_qp *,
110 struct ib_qp_attr *attrs, int attr_mask,
111 enum ib_qp_state old_qps);
112int ocrdma_mbx_query_qp(struct ocrdma_dev *, struct ocrdma_qp *,
113 struct ocrdma_qp_params *param);
114int ocrdma_mbx_destroy_qp(struct ocrdma_dev *, struct ocrdma_qp *);
115
116int ocrdma_mbx_create_srq(struct ocrdma_srq *,
117 struct ib_srq_init_attr *,
118 struct ocrdma_pd *);
119int ocrdma_mbx_modify_srq(struct ocrdma_srq *, struct ib_srq_attr *);
120int ocrdma_mbx_query_srq(struct ocrdma_srq *, struct ib_srq_attr *);
121int ocrdma_mbx_destroy_srq(struct ocrdma_dev *, struct ocrdma_srq *);
122
123int ocrdma_alloc_av(struct ocrdma_dev *, struct ocrdma_ah *);
124int ocrdma_free_av(struct ocrdma_dev *, struct ocrdma_ah *);
125
126int ocrdma_qp_state_machine(struct ocrdma_qp *, enum ib_qp_state new_state,
127 enum ib_qp_state *old_ib_state);
128bool ocrdma_is_qp_in_sq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
129bool ocrdma_is_qp_in_rq_flushlist(struct ocrdma_cq *, struct ocrdma_qp *);
130void ocrdma_flush_qp(struct ocrdma_qp *);
131
132#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
deleted file mode 100644
index c4e0131f1b5..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ /dev/null
@@ -1,580 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#include <linux/module.h>
29#include <linux/idr.h>
30#include <rdma/ib_verbs.h>
31#include <rdma/ib_user_verbs.h>
32#include <rdma/ib_addr.h>
33
34#include <linux/netdevice.h>
35#include <net/addrconf.h>
36
37#include "ocrdma.h"
38#include "ocrdma_verbs.h"
39#include "ocrdma_ah.h"
40#include "be_roce.h"
41#include "ocrdma_hw.h"
42
43MODULE_VERSION(OCRDMA_ROCE_DEV_VERSION);
44MODULE_DESCRIPTION("Emulex RoCE HCA Driver");
45MODULE_AUTHOR("Emulex Corporation");
46MODULE_LICENSE("GPL");
47
48static LIST_HEAD(ocrdma_dev_list);
49static DEFINE_SPINLOCK(ocrdma_devlist_lock);
50static DEFINE_IDR(ocrdma_dev_id);
51
52static union ib_gid ocrdma_zero_sgid;
53
54static int ocrdma_get_instance(void)
55{
56 int instance = 0;
57
58 /* Assign an unused number */
59 if (!idr_pre_get(&ocrdma_dev_id, GFP_KERNEL))
60 return -1;
61 if (idr_get_new(&ocrdma_dev_id, NULL, &instance))
62 return -1;
63 return instance;
64}
65
66void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
67{
68 u8 mac_addr[6];
69
70 memcpy(&mac_addr[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
71 guid[0] = mac_addr[0] ^ 2;
72 guid[1] = mac_addr[1];
73 guid[2] = mac_addr[2];
74 guid[3] = 0xff;
75 guid[4] = 0xfe;
76 guid[5] = mac_addr[3];
77 guid[6] = mac_addr[4];
78 guid[7] = mac_addr[5];
79}
80
81static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
82 bool is_vlan, u16 vlan_id)
83{
84 sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
85 sgid->raw[8] = mac_addr[0] ^ 2;
86 sgid->raw[9] = mac_addr[1];
87 sgid->raw[10] = mac_addr[2];
88 if (is_vlan) {
89 sgid->raw[11] = vlan_id >> 8;
90 sgid->raw[12] = vlan_id & 0xff;
91 } else {
92 sgid->raw[11] = 0xff;
93 sgid->raw[12] = 0xfe;
94 }
95 sgid->raw[13] = mac_addr[3];
96 sgid->raw[14] = mac_addr[4];
97 sgid->raw[15] = mac_addr[5];
98}
99
100static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
101 bool is_vlan, u16 vlan_id)
102{
103 int i;
104 union ib_gid new_sgid;
105 unsigned long flags;
106
107 memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
108
109 ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
110
111 spin_lock_irqsave(&dev->sgid_lock, flags);
112 for (i = 0; i < OCRDMA_MAX_SGID; i++) {
113 if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
114 sizeof(union ib_gid))) {
115 /* found free entry */
116 memcpy(&dev->sgid_tbl[i], &new_sgid,
117 sizeof(union ib_gid));
118 spin_unlock_irqrestore(&dev->sgid_lock, flags);
119 return true;
120 } else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
121 sizeof(union ib_gid))) {
122 /* entry already present, no addition is required. */
123 spin_unlock_irqrestore(&dev->sgid_lock, flags);
124 return false;
125 }
126 }
127 spin_unlock_irqrestore(&dev->sgid_lock, flags);
128 return false;
129}
130
131static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
132 bool is_vlan, u16 vlan_id)
133{
134 int found = false;
135 int i;
136 union ib_gid sgid;
137 unsigned long flags;
138
139 ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
140
141 spin_lock_irqsave(&dev->sgid_lock, flags);
142 /* first is default sgid, which cannot be deleted. */
143 for (i = 1; i < OCRDMA_MAX_SGID; i++) {
144 if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
145 /* found matching entry */
146 memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
147 found = true;
148 break;
149 }
150 }
151 spin_unlock_irqrestore(&dev->sgid_lock, flags);
152 return found;
153}
154
155static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
156{
157 /* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
158 union ib_gid *sgid = &dev->sgid_tbl[0];
159
160 sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
161 ocrdma_get_guid(dev, &sgid->raw[8]);
162}
163
164#if IS_ENABLED(CONFIG_VLAN_8021Q)
165static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
166{
167 struct net_device *netdev, *tmp;
168 u16 vlan_id;
169 bool is_vlan;
170
171 netdev = dev->nic_info.netdev;
172
173 rcu_read_lock();
174 for_each_netdev_rcu(&init_net, tmp) {
175 if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
176 if (!netif_running(tmp) || !netif_oper_up(tmp))
177 continue;
178 if (netdev != tmp) {
179 vlan_id = vlan_dev_vlan_id(tmp);
180 is_vlan = true;
181 } else {
182 is_vlan = false;
183 vlan_id = 0;
184 tmp = netdev;
185 }
186 ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
187 }
188 }
189 rcu_read_unlock();
190}
191#else
192static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
193{
194
195}
196#endif /* VLAN */
197
198static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
199{
200 ocrdma_add_default_sgid(dev);
201 ocrdma_add_vlan_sgids(dev);
202 return 0;
203}
204
205#if IS_ENABLED(CONFIG_IPV6)
206
207static int ocrdma_inet6addr_event(struct notifier_block *notifier,
208 unsigned long event, void *ptr)
209{
210 struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
211 struct net_device *netdev = ifa->idev->dev;
212 struct ib_event gid_event;
213 struct ocrdma_dev *dev;
214 bool found = false;
215 bool updated = false;
216 bool is_vlan = false;
217 u16 vid = 0;
218
219 is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
220 if (is_vlan) {
221 vid = vlan_dev_vlan_id(netdev);
222 netdev = vlan_dev_real_dev(netdev);
223 }
224
225 rcu_read_lock();
226 list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
227 if (dev->nic_info.netdev == netdev) {
228 found = true;
229 break;
230 }
231 }
232 rcu_read_unlock();
233
234 if (!found)
235 return NOTIFY_DONE;
236 if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
237 return NOTIFY_DONE;
238
239 mutex_lock(&dev->dev_lock);
240 switch (event) {
241 case NETDEV_UP:
242 updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
243 break;
244 case NETDEV_DOWN:
245 updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
246 break;
247 default:
248 break;
249 }
250 if (updated) {
251 /* GID table updated, notify the consumers about it */
252 gid_event.device = &dev->ibdev;
253 gid_event.element.port_num = 1;
254 gid_event.event = IB_EVENT_GID_CHANGE;
255 ib_dispatch_event(&gid_event);
256 }
257 mutex_unlock(&dev->dev_lock);
258 return NOTIFY_OK;
259}
260
261static struct notifier_block ocrdma_inet6addr_notifier = {
262 .notifier_call = ocrdma_inet6addr_event
263};
264
265#endif /* IPV6 and VLAN */
266
267static enum rdma_link_layer ocrdma_link_layer(struct ib_device *device,
268 u8 port_num)
269{
270 return IB_LINK_LAYER_ETHERNET;
271}
272
273static int ocrdma_register_device(struct ocrdma_dev *dev)
274{
275 strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX);
276 ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid);
277 memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC,
278 sizeof(OCRDMA_NODE_DESC));
279 dev->ibdev.owner = THIS_MODULE;
280 dev->ibdev.uverbs_cmd_mask =
281 OCRDMA_UVERBS(GET_CONTEXT) |
282 OCRDMA_UVERBS(QUERY_DEVICE) |
283 OCRDMA_UVERBS(QUERY_PORT) |
284 OCRDMA_UVERBS(ALLOC_PD) |
285 OCRDMA_UVERBS(DEALLOC_PD) |
286 OCRDMA_UVERBS(REG_MR) |
287 OCRDMA_UVERBS(DEREG_MR) |
288 OCRDMA_UVERBS(CREATE_COMP_CHANNEL) |
289 OCRDMA_UVERBS(CREATE_CQ) |
290 OCRDMA_UVERBS(RESIZE_CQ) |
291 OCRDMA_UVERBS(DESTROY_CQ) |
292 OCRDMA_UVERBS(REQ_NOTIFY_CQ) |
293 OCRDMA_UVERBS(CREATE_QP) |
294 OCRDMA_UVERBS(MODIFY_QP) |
295 OCRDMA_UVERBS(QUERY_QP) |
296 OCRDMA_UVERBS(DESTROY_QP) |
297 OCRDMA_UVERBS(POLL_CQ) |
298 OCRDMA_UVERBS(POST_SEND) |
299 OCRDMA_UVERBS(POST_RECV);
300
301 dev->ibdev.uverbs_cmd_mask |=
302 OCRDMA_UVERBS(CREATE_AH) |
303 OCRDMA_UVERBS(MODIFY_AH) |
304 OCRDMA_UVERBS(QUERY_AH) |
305 OCRDMA_UVERBS(DESTROY_AH);
306
307 dev->ibdev.node_type = RDMA_NODE_IB_CA;
308 dev->ibdev.phys_port_cnt = 1;
309 dev->ibdev.num_comp_vectors = 1;
310
311 /* mandatory verbs. */
312 dev->ibdev.query_device = ocrdma_query_device;
313 dev->ibdev.query_port = ocrdma_query_port;
314 dev->ibdev.modify_port = ocrdma_modify_port;
315 dev->ibdev.query_gid = ocrdma_query_gid;
316 dev->ibdev.get_link_layer = ocrdma_link_layer;
317 dev->ibdev.alloc_pd = ocrdma_alloc_pd;
318 dev->ibdev.dealloc_pd = ocrdma_dealloc_pd;
319
320 dev->ibdev.create_cq = ocrdma_create_cq;
321 dev->ibdev.destroy_cq = ocrdma_destroy_cq;
322 dev->ibdev.resize_cq = ocrdma_resize_cq;
323
324 dev->ibdev.create_qp = ocrdma_create_qp;
325 dev->ibdev.modify_qp = ocrdma_modify_qp;
326 dev->ibdev.query_qp = ocrdma_query_qp;
327 dev->ibdev.destroy_qp = ocrdma_destroy_qp;
328
329 dev->ibdev.query_pkey = ocrdma_query_pkey;
330 dev->ibdev.create_ah = ocrdma_create_ah;
331 dev->ibdev.destroy_ah = ocrdma_destroy_ah;
332 dev->ibdev.query_ah = ocrdma_query_ah;
333 dev->ibdev.modify_ah = ocrdma_modify_ah;
334
335 dev->ibdev.poll_cq = ocrdma_poll_cq;
336 dev->ibdev.post_send = ocrdma_post_send;
337 dev->ibdev.post_recv = ocrdma_post_recv;
338 dev->ibdev.req_notify_cq = ocrdma_arm_cq;
339
340 dev->ibdev.get_dma_mr = ocrdma_get_dma_mr;
341 dev->ibdev.dereg_mr = ocrdma_dereg_mr;
342 dev->ibdev.reg_user_mr = ocrdma_reg_user_mr;
343
344 /* mandatory to support user space verbs consumer. */
345 dev->ibdev.alloc_ucontext = ocrdma_alloc_ucontext;
346 dev->ibdev.dealloc_ucontext = ocrdma_dealloc_ucontext;
347 dev->ibdev.mmap = ocrdma_mmap;
348 dev->ibdev.dma_device = &dev->nic_info.pdev->dev;
349
350 dev->ibdev.process_mad = ocrdma_process_mad;
351
352 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
353 dev->ibdev.uverbs_cmd_mask |=
354 OCRDMA_UVERBS(CREATE_SRQ) |
355 OCRDMA_UVERBS(MODIFY_SRQ) |
356 OCRDMA_UVERBS(QUERY_SRQ) |
357 OCRDMA_UVERBS(DESTROY_SRQ) |
358 OCRDMA_UVERBS(POST_SRQ_RECV);
359
360 dev->ibdev.create_srq = ocrdma_create_srq;
361 dev->ibdev.modify_srq = ocrdma_modify_srq;
362 dev->ibdev.query_srq = ocrdma_query_srq;
363 dev->ibdev.destroy_srq = ocrdma_destroy_srq;
364 dev->ibdev.post_srq_recv = ocrdma_post_srq_recv;
365 }
366 return ib_register_device(&dev->ibdev, NULL);
367}
368
369static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
370{
371 mutex_init(&dev->dev_lock);
372 dev->sgid_tbl = kzalloc(sizeof(union ib_gid) *
373 OCRDMA_MAX_SGID, GFP_KERNEL);
374 if (!dev->sgid_tbl)
375 goto alloc_err;
376 spin_lock_init(&dev->sgid_lock);
377
378 dev->cq_tbl = kzalloc(sizeof(struct ocrdma_cq *) *
379 OCRDMA_MAX_CQ, GFP_KERNEL);
380 if (!dev->cq_tbl)
381 goto alloc_err;
382
383 if (dev->attr.max_qp) {
384 dev->qp_tbl = kzalloc(sizeof(struct ocrdma_qp *) *
385 OCRDMA_MAX_QP, GFP_KERNEL);
386 if (!dev->qp_tbl)
387 goto alloc_err;
388 }
389 spin_lock_init(&dev->av_tbl.lock);
390 spin_lock_init(&dev->flush_q_lock);
391 return 0;
392alloc_err:
393 ocrdma_err("%s(%d) error.\n", __func__, dev->id);
394 return -ENOMEM;
395}
396
397static void ocrdma_free_resources(struct ocrdma_dev *dev)
398{
399 kfree(dev->qp_tbl);
400 kfree(dev->cq_tbl);
401 kfree(dev->sgid_tbl);
402}
403
404static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
405{
406 int status = 0;
407 struct ocrdma_dev *dev;
408
409 dev = (struct ocrdma_dev *)ib_alloc_device(sizeof(struct ocrdma_dev));
410 if (!dev) {
411 ocrdma_err("Unable to allocate ib device\n");
412 return NULL;
413 }
414 dev->mbx_cmd = kzalloc(sizeof(struct ocrdma_mqe_emb_cmd), GFP_KERNEL);
415 if (!dev->mbx_cmd)
416 goto idr_err;
417
418 memcpy(&dev->nic_info, dev_info, sizeof(*dev_info));
419 dev->id = ocrdma_get_instance();
420 if (dev->id < 0)
421 goto idr_err;
422
423 status = ocrdma_init_hw(dev);
424 if (status)
425 goto init_err;
426
427 status = ocrdma_alloc_resources(dev);
428 if (status)
429 goto alloc_err;
430
431 status = ocrdma_build_sgid_tbl(dev);
432 if (status)
433 goto alloc_err;
434
435 status = ocrdma_register_device(dev);
436 if (status)
437 goto alloc_err;
438
439 spin_lock(&ocrdma_devlist_lock);
440 list_add_tail_rcu(&dev->entry, &ocrdma_dev_list);
441 spin_unlock(&ocrdma_devlist_lock);
442 return dev;
443
444alloc_err:
445 ocrdma_free_resources(dev);
446 ocrdma_cleanup_hw(dev);
447init_err:
448 idr_remove(&ocrdma_dev_id, dev->id);
449idr_err:
450 kfree(dev->mbx_cmd);
451 ib_dealloc_device(&dev->ibdev);
452 ocrdma_err("%s() leaving. ret=%d\n", __func__, status);
453 return NULL;
454}
455
456static void ocrdma_remove_free(struct rcu_head *rcu)
457{
458 struct ocrdma_dev *dev = container_of(rcu, struct ocrdma_dev, rcu);
459
460 ocrdma_free_resources(dev);
461 ocrdma_cleanup_hw(dev);
462
463 idr_remove(&ocrdma_dev_id, dev->id);
464 kfree(dev->mbx_cmd);
465 ib_dealloc_device(&dev->ibdev);
466}
467
468static void ocrdma_remove(struct ocrdma_dev *dev)
469{
470 /* first unregister with stack to stop all the active traffic
471 * of the registered clients.
472 */
473 ib_unregister_device(&dev->ibdev);
474
475 spin_lock(&ocrdma_devlist_lock);
476 list_del_rcu(&dev->entry);
477 spin_unlock(&ocrdma_devlist_lock);
478 call_rcu(&dev->rcu, ocrdma_remove_free);
479}
480
481static int ocrdma_open(struct ocrdma_dev *dev)
482{
483 struct ib_event port_event;
484
485 port_event.event = IB_EVENT_PORT_ACTIVE;
486 port_event.element.port_num = 1;
487 port_event.device = &dev->ibdev;
488 ib_dispatch_event(&port_event);
489 return 0;
490}
491
492static int ocrdma_close(struct ocrdma_dev *dev)
493{
494 int i;
495 struct ocrdma_qp *qp, **cur_qp;
496 struct ib_event err_event;
497 struct ib_qp_attr attrs;
498 int attr_mask = IB_QP_STATE;
499
500 attrs.qp_state = IB_QPS_ERR;
501 mutex_lock(&dev->dev_lock);
502 if (dev->qp_tbl) {
503 cur_qp = dev->qp_tbl;
504 for (i = 0; i < OCRDMA_MAX_QP; i++) {
505 qp = cur_qp[i];
506 if (qp) {
507 /* change the QP state to ERROR */
508 _ocrdma_modify_qp(&qp->ibqp, &attrs, attr_mask);
509
510 err_event.event = IB_EVENT_QP_FATAL;
511 err_event.element.qp = &qp->ibqp;
512 err_event.device = &dev->ibdev;
513 ib_dispatch_event(&err_event);
514 }
515 }
516 }
517 mutex_unlock(&dev->dev_lock);
518
519 err_event.event = IB_EVENT_PORT_ERR;
520 err_event.element.port_num = 1;
521 err_event.device = &dev->ibdev;
522 ib_dispatch_event(&err_event);
523 return 0;
524}
525
526/* event handling via NIC driver ensures that all the NIC specific
527 * initialization done before RoCE driver notifies
528 * event to stack.
529 */
530static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
531{
532 switch (event) {
533 case BE_DEV_UP:
534 ocrdma_open(dev);
535 break;
536 case BE_DEV_DOWN:
537 ocrdma_close(dev);
538 break;
539 };
540}
541
542static struct ocrdma_driver ocrdma_drv = {
543 .name = "ocrdma_driver",
544 .add = ocrdma_add,
545 .remove = ocrdma_remove,
546 .state_change_handler = ocrdma_event_handler,
547};
548
549static void ocrdma_unregister_inet6addr_notifier(void)
550{
551#if IS_ENABLED(CONFIG_IPV6)
552 unregister_inet6addr_notifier(&ocrdma_inet6addr_notifier);
553#endif
554}
555
556static int __init ocrdma_init_module(void)
557{
558 int status;
559
560#if IS_ENABLED(CONFIG_IPV6)
561 status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
562 if (status)
563 return status;
564#endif
565
566 status = be_roce_register_driver(&ocrdma_drv);
567 if (status)
568 ocrdma_unregister_inet6addr_notifier();
569
570 return status;
571}
572
573static void __exit ocrdma_exit_module(void)
574{
575 be_roce_unregister_driver(&ocrdma_drv);
576 ocrdma_unregister_inet6addr_notifier();
577}
578
579module_init(ocrdma_init_module);
580module_exit(ocrdma_exit_module);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
deleted file mode 100644
index c75cbdfa87e..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ /dev/null
@@ -1,1675 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_SLI_H__
29#define __OCRDMA_SLI_H__
30
31#define Bit(_b) (1 << (_b))
32
33#define OCRDMA_GEN1_FAMILY 0xB
34#define OCRDMA_GEN2_FAMILY 0x2
35
36#define OCRDMA_SUBSYS_ROCE 10
37enum {
38 OCRDMA_CMD_QUERY_CONFIG = 1,
39 OCRDMA_CMD_ALLOC_PD,
40 OCRDMA_CMD_DEALLOC_PD,
41
42 OCRDMA_CMD_CREATE_AH_TBL,
43 OCRDMA_CMD_DELETE_AH_TBL,
44
45 OCRDMA_CMD_CREATE_QP,
46 OCRDMA_CMD_QUERY_QP,
47 OCRDMA_CMD_MODIFY_QP,
48 OCRDMA_CMD_DELETE_QP,
49
50 OCRDMA_CMD_RSVD1,
51 OCRDMA_CMD_ALLOC_LKEY,
52 OCRDMA_CMD_DEALLOC_LKEY,
53 OCRDMA_CMD_REGISTER_NSMR,
54 OCRDMA_CMD_REREGISTER_NSMR,
55 OCRDMA_CMD_REGISTER_NSMR_CONT,
56 OCRDMA_CMD_QUERY_NSMR,
57 OCRDMA_CMD_ALLOC_MW,
58 OCRDMA_CMD_QUERY_MW,
59
60 OCRDMA_CMD_CREATE_SRQ,
61 OCRDMA_CMD_QUERY_SRQ,
62 OCRDMA_CMD_MODIFY_SRQ,
63 OCRDMA_CMD_DELETE_SRQ,
64
65 OCRDMA_CMD_ATTACH_MCAST,
66 OCRDMA_CMD_DETACH_MCAST,
67
68 OCRDMA_CMD_MAX
69};
70
71#define OCRDMA_SUBSYS_COMMON 1
72enum {
73 OCRDMA_CMD_CREATE_CQ = 12,
74 OCRDMA_CMD_CREATE_EQ = 13,
75 OCRDMA_CMD_CREATE_MQ = 21,
76 OCRDMA_CMD_GET_FW_VER = 35,
77 OCRDMA_CMD_DELETE_MQ = 53,
78 OCRDMA_CMD_DELETE_CQ = 54,
79 OCRDMA_CMD_DELETE_EQ = 55,
80 OCRDMA_CMD_GET_FW_CONFIG = 58,
81 OCRDMA_CMD_CREATE_MQ_EXT = 90
82};
83
84enum {
85 QTYPE_EQ = 1,
86 QTYPE_CQ = 2,
87 QTYPE_MCCQ = 3
88};
89
90#define OCRDMA_MAX_SGID (8)
91
92#define OCRDMA_MAX_QP 2048
93#define OCRDMA_MAX_CQ 2048
94
95enum {
96 OCRDMA_DB_RQ_OFFSET = 0xE0,
97 OCRDMA_DB_GEN2_RQ1_OFFSET = 0x100,
98 OCRDMA_DB_GEN2_RQ2_OFFSET = 0xC0,
99 OCRDMA_DB_SQ_OFFSET = 0x60,
100 OCRDMA_DB_GEN2_SQ_OFFSET = 0x1C0,
101 OCRDMA_DB_SRQ_OFFSET = OCRDMA_DB_RQ_OFFSET,
102 OCRDMA_DB_GEN2_SRQ_OFFSET = OCRDMA_DB_GEN2_RQ1_OFFSET,
103 OCRDMA_DB_CQ_OFFSET = 0x120,
104 OCRDMA_DB_EQ_OFFSET = OCRDMA_DB_CQ_OFFSET,
105 OCRDMA_DB_MQ_OFFSET = 0x140
106};
107
108#define OCRDMA_DB_CQ_RING_ID_MASK 0x3FF /* bits 0 - 9 */
109#define OCRDMA_DB_CQ_RING_ID_EXT_MASK 0x0C00 /* bits 10-11 of qid at 12-11 */
110/* qid #2 msbits at 12-11 */
111#define OCRDMA_DB_CQ_RING_ID_EXT_MASK_SHIFT 0x1
112#define OCRDMA_DB_CQ_NUM_POPPED_SHIFT (16) /* bits 16 - 28 */
113/* Rearm bit */
114#define OCRDMA_DB_CQ_REARM_SHIFT (29) /* bit 29 */
115/* solicited bit */
116#define OCRDMA_DB_CQ_SOLICIT_SHIFT (31) /* bit 31 */
117
118#define OCRDMA_EQ_ID_MASK 0x1FF /* bits 0 - 8 */
119#define OCRDMA_EQ_ID_EXT_MASK 0x3e00 /* bits 9-13 */
120#define OCRDMA_EQ_ID_EXT_MASK_SHIFT (2) /* qid bits 9-13 at 11-15 */
121
122/* Clear the interrupt for this eq */
123#define OCRDMA_EQ_CLR_SHIFT (9) /* bit 9 */
124/* Must be 1 */
125#define OCRDMA_EQ_TYPE_SHIFT (10) /* bit 10 */
126/* Number of event entries processed */
127#define OCRDMA_NUM_EQE_SHIFT (16) /* bits 16 - 28 */
128/* Rearm bit */
129#define OCRDMA_REARM_SHIFT (29) /* bit 29 */
130
131#define OCRDMA_MQ_ID_MASK 0x7FF /* bits 0 - 10 */
132/* Number of entries posted */
133#define OCRDMA_MQ_NUM_MQE_SHIFT (16) /* bits 16 - 29 */
134
135#define OCRDMA_MIN_HPAGE_SIZE (4096)
136
137#define OCRDMA_MIN_Q_PAGE_SIZE (4096)
138#define OCRDMA_MAX_Q_PAGES (8)
139
140/*
141# 0: 4K Bytes
142# 1: 8K Bytes
143# 2: 16K Bytes
144# 3: 32K Bytes
145# 4: 64K Bytes
146*/
147#define OCRDMA_MAX_Q_PAGE_SIZE_CNT (5)
148#define OCRDMA_Q_PAGE_BASE_SIZE (OCRDMA_MIN_Q_PAGE_SIZE * OCRDMA_MAX_Q_PAGES)
149
150#define MAX_OCRDMA_QP_PAGES (8)
151#define OCRDMA_MAX_WQE_MEM_SIZE (MAX_OCRDMA_QP_PAGES * OCRDMA_MIN_HQ_PAGE_SIZE)
152
153#define OCRDMA_CREATE_CQ_MAX_PAGES (4)
154#define OCRDMA_DPP_CQE_SIZE (4)
155
156#define OCRDMA_GEN2_MAX_CQE 1024
157#define OCRDMA_GEN2_CQ_PAGE_SIZE 4096
158#define OCRDMA_GEN2_WQE_SIZE 256
159#define OCRDMA_MAX_CQE 4095
160#define OCRDMA_CQ_PAGE_SIZE 16384
161#define OCRDMA_WQE_SIZE 128
162#define OCRDMA_WQE_STRIDE 8
163#define OCRDMA_WQE_ALIGN_BYTES 16
164
165#define MAX_OCRDMA_SRQ_PAGES MAX_OCRDMA_QP_PAGES
166
167enum {
168 OCRDMA_MCH_OPCODE_SHIFT = 0,
169 OCRDMA_MCH_OPCODE_MASK = 0xFF,
170 OCRDMA_MCH_SUBSYS_SHIFT = 8,
171 OCRDMA_MCH_SUBSYS_MASK = 0xFF00
172};
173
174/* mailbox cmd header */
175struct ocrdma_mbx_hdr {
176 u32 subsys_op;
177 u32 timeout; /* in seconds */
178 u32 cmd_len;
179 u32 rsvd_version;
180} __packed;
181
182enum {
183 OCRDMA_MBX_RSP_OPCODE_SHIFT = 0,
184 OCRDMA_MBX_RSP_OPCODE_MASK = 0xFF,
185 OCRDMA_MBX_RSP_SUBSYS_SHIFT = 8,
186 OCRDMA_MBX_RSP_SUBSYS_MASK = 0xFF << OCRDMA_MBX_RSP_SUBSYS_SHIFT,
187
188 OCRDMA_MBX_RSP_STATUS_SHIFT = 0,
189 OCRDMA_MBX_RSP_STATUS_MASK = 0xFF,
190 OCRDMA_MBX_RSP_ASTATUS_SHIFT = 8,
191 OCRDMA_MBX_RSP_ASTATUS_MASK = 0xFF << OCRDMA_MBX_RSP_ASTATUS_SHIFT
192};
193
194/* mailbox cmd response */
195struct ocrdma_mbx_rsp {
196 u32 subsys_op;
197 u32 status;
198 u32 rsp_len;
199 u32 add_rsp_len;
200} __packed;
201
202enum {
203 OCRDMA_MQE_EMBEDDED = 1,
204 OCRDMA_MQE_NONEMBEDDED = 0
205};
206
207struct ocrdma_mqe_sge {
208 u32 pa_lo;
209 u32 pa_hi;
210 u32 len;
211} __packed;
212
213enum {
214 OCRDMA_MQE_HDR_EMB_SHIFT = 0,
215 OCRDMA_MQE_HDR_EMB_MASK = Bit(0),
216 OCRDMA_MQE_HDR_SGE_CNT_SHIFT = 3,
217 OCRDMA_MQE_HDR_SGE_CNT_MASK = 0x1F << OCRDMA_MQE_HDR_SGE_CNT_SHIFT,
218 OCRDMA_MQE_HDR_SPECIAL_SHIFT = 24,
219 OCRDMA_MQE_HDR_SPECIAL_MASK = 0xFF << OCRDMA_MQE_HDR_SPECIAL_SHIFT
220};
221
222struct ocrdma_mqe_hdr {
223 u32 spcl_sge_cnt_emb;
224 u32 pyld_len;
225 u32 tag_lo;
226 u32 tag_hi;
227 u32 rsvd3;
228} __packed;
229
230struct ocrdma_mqe_emb_cmd {
231 struct ocrdma_mbx_hdr mch;
232 u8 pyld[220];
233} __packed;
234
235struct ocrdma_mqe {
236 struct ocrdma_mqe_hdr hdr;
237 union {
238 struct ocrdma_mqe_emb_cmd emb_req;
239 struct {
240 struct ocrdma_mqe_sge sge[19];
241 } nonemb_req;
242 u8 cmd[236];
243 struct ocrdma_mbx_rsp rsp;
244 } u;
245} __packed;
246
247#define OCRDMA_EQ_LEN 4096
248#define OCRDMA_MQ_CQ_LEN 256
249#define OCRDMA_MQ_LEN 128
250
251#define PAGE_SHIFT_4K 12
252#define PAGE_SIZE_4K (1 << PAGE_SHIFT_4K)
253
254/* Returns number of pages spanned by the data starting at the given addr */
255#define PAGES_4K_SPANNED(_address, size) \
256 ((u32)((((size_t)(_address) & (PAGE_SIZE_4K - 1)) + \
257 (size) + (PAGE_SIZE_4K - 1)) >> PAGE_SHIFT_4K))
258
259struct ocrdma_delete_q_req {
260 struct ocrdma_mbx_hdr req;
261 u32 id;
262} __packed;
263
264struct ocrdma_pa {
265 u32 lo;
266 u32 hi;
267} __packed;
268
269#define MAX_OCRDMA_EQ_PAGES (8)
270struct ocrdma_create_eq_req {
271 struct ocrdma_mbx_hdr req;
272 u32 num_pages;
273 u32 valid;
274 u32 cnt;
275 u32 delay;
276 u32 rsvd;
277 struct ocrdma_pa pa[MAX_OCRDMA_EQ_PAGES];
278} __packed;
279
280enum {
281 OCRDMA_CREATE_EQ_VALID = Bit(29),
282 OCRDMA_CREATE_EQ_CNT_SHIFT = 26,
283 OCRDMA_CREATE_CQ_DELAY_SHIFT = 13,
284};
285
286struct ocrdma_create_eq_rsp {
287 struct ocrdma_mbx_rsp rsp;
288 u32 vector_eqid;
289};
290
291#define OCRDMA_EQ_MINOR_OTHER (0x1)
292
293enum {
294 OCRDMA_MCQE_STATUS_SHIFT = 0,
295 OCRDMA_MCQE_STATUS_MASK = 0xFFFF,
296 OCRDMA_MCQE_ESTATUS_SHIFT = 16,
297 OCRDMA_MCQE_ESTATUS_MASK = 0xFFFF << OCRDMA_MCQE_ESTATUS_SHIFT,
298 OCRDMA_MCQE_CONS_SHIFT = 27,
299 OCRDMA_MCQE_CONS_MASK = Bit(27),
300 OCRDMA_MCQE_CMPL_SHIFT = 28,
301 OCRDMA_MCQE_CMPL_MASK = Bit(28),
302 OCRDMA_MCQE_AE_SHIFT = 30,
303 OCRDMA_MCQE_AE_MASK = Bit(30),
304 OCRDMA_MCQE_VALID_SHIFT = 31,
305 OCRDMA_MCQE_VALID_MASK = Bit(31)
306};
307
308struct ocrdma_mcqe {
309 u32 status;
310 u32 tag_lo;
311 u32 tag_hi;
312 u32 valid_ae_cmpl_cons;
313} __packed;
314
315enum {
316 OCRDMA_AE_MCQE_QPVALID = Bit(31),
317 OCRDMA_AE_MCQE_QPID_MASK = 0xFFFF,
318
319 OCRDMA_AE_MCQE_CQVALID = Bit(31),
320 OCRDMA_AE_MCQE_CQID_MASK = 0xFFFF,
321 OCRDMA_AE_MCQE_VALID = Bit(31),
322 OCRDMA_AE_MCQE_AE = Bit(30),
323 OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT = 16,
324 OCRDMA_AE_MCQE_EVENT_TYPE_MASK =
325 0xFF << OCRDMA_AE_MCQE_EVENT_TYPE_SHIFT,
326 OCRDMA_AE_MCQE_EVENT_CODE_SHIFT = 8,
327 OCRDMA_AE_MCQE_EVENT_CODE_MASK =
328 0xFF << OCRDMA_AE_MCQE_EVENT_CODE_SHIFT
329};
330struct ocrdma_ae_mcqe {
331 u32 qpvalid_qpid;
332 u32 cqvalid_cqid;
333 u32 evt_tag;
334 u32 valid_ae_event;
335} __packed;
336
337enum {
338 OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT = 16,
339 OCRDMA_AE_MPA_MCQE_REQ_ID_MASK = 0xFFFF <<
340 OCRDMA_AE_MPA_MCQE_REQ_ID_SHIFT,
341
342 OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT = 8,
343 OCRDMA_AE_MPA_MCQE_EVENT_CODE_MASK = 0xFF <<
344 OCRDMA_AE_MPA_MCQE_EVENT_CODE_SHIFT,
345 OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT = 16,
346 OCRDMA_AE_MPA_MCQE_EVENT_TYPE_MASK = 0xFF <<
347 OCRDMA_AE_MPA_MCQE_EVENT_TYPE_SHIFT,
348 OCRDMA_AE_MPA_MCQE_EVENT_AE_SHIFT = 30,
349 OCRDMA_AE_MPA_MCQE_EVENT_AE_MASK = Bit(30),
350 OCRDMA_AE_MPA_MCQE_EVENT_VALID_SHIFT = 31,
351 OCRDMA_AE_MPA_MCQE_EVENT_VALID_MASK = Bit(31)
352};
353
354struct ocrdma_ae_mpa_mcqe {
355 u32 req_id;
356 u32 w1;
357 u32 w2;
358 u32 valid_ae_event;
359} __packed;
360
361enum {
362 OCRDMA_AE_QP_MCQE_NEW_QP_STATE_SHIFT = 0,
363 OCRDMA_AE_QP_MCQE_NEW_QP_STATE_MASK = 0xFFFF,
364 OCRDMA_AE_QP_MCQE_QP_ID_SHIFT = 16,
365 OCRDMA_AE_QP_MCQE_QP_ID_MASK = 0xFFFF <<
366 OCRDMA_AE_QP_MCQE_QP_ID_SHIFT,
367
368 OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT = 8,
369 OCRDMA_AE_QP_MCQE_EVENT_CODE_MASK = 0xFF <<
370 OCRDMA_AE_QP_MCQE_EVENT_CODE_SHIFT,
371 OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT = 16,
372 OCRDMA_AE_QP_MCQE_EVENT_TYPE_MASK = 0xFF <<
373 OCRDMA_AE_QP_MCQE_EVENT_TYPE_SHIFT,
374 OCRDMA_AE_QP_MCQE_EVENT_AE_SHIFT = 30,
375 OCRDMA_AE_QP_MCQE_EVENT_AE_MASK = Bit(30),
376 OCRDMA_AE_QP_MCQE_EVENT_VALID_SHIFT = 31,
377 OCRDMA_AE_QP_MCQE_EVENT_VALID_MASK = Bit(31)
378};
379
380struct ocrdma_ae_qp_mcqe {
381 u32 qp_id_state;
382 u32 w1;
383 u32 w2;
384 u32 valid_ae_event;
385} __packed;
386
387#define OCRDMA_ASYNC_EVE_CODE 0x14
388
389enum OCRDMA_ASYNC_EVENT_TYPE {
390 OCRDMA_CQ_ERROR = 0x00,
391 OCRDMA_CQ_OVERRUN_ERROR = 0x01,
392 OCRDMA_CQ_QPCAT_ERROR = 0x02,
393 OCRDMA_QP_ACCESS_ERROR = 0x03,
394 OCRDMA_QP_COMM_EST_EVENT = 0x04,
395 OCRDMA_SQ_DRAINED_EVENT = 0x05,
396 OCRDMA_DEVICE_FATAL_EVENT = 0x08,
397 OCRDMA_SRQCAT_ERROR = 0x0E,
398 OCRDMA_SRQ_LIMIT_EVENT = 0x0F,
399 OCRDMA_QP_LAST_WQE_EVENT = 0x10
400};
401
402/* mailbox command request and responses */
403enum {
404 OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_SHIFT = 2,
405 OCRDMA_MBX_QUERY_CFG_CQ_OVERFLOW_MASK = Bit(2),
406 OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_SHIFT = 3,
407 OCRDMA_MBX_QUERY_CFG_SRQ_SUPPORTED_MASK = Bit(3),
408 OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT = 8,
409 OCRDMA_MBX_QUERY_CFG_MAX_QP_MASK = 0xFFFFFF <<
410 OCRDMA_MBX_QUERY_CFG_MAX_QP_SHIFT,
411
412 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT = 16,
413 OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK = 0xFFFF <<
414 OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT,
415 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT = 8,
416 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK = 0xFF <<
417 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
418
419 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0,
420 OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF,
421 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16,
422 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF <<
423 OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT,
424
425 OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0,
426 OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF,
427 OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT = 16,
428 OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_MASK = 0xFFFF <<
429 OCRDMA_MBX_QUERY_CFG_MAX_IRD_PER_QP_SHIFT,
430
431 OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET = 24,
432 OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_MASK = 0xFF <<
433 OCRDMA_MBX_QUERY_CFG_MAX_WQE_SIZE_OFFSET,
434 OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET = 16,
435 OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_MASK = 0xFF <<
436 OCRDMA_MBX_QUERY_CFG_MAX_RQE_SIZE_OFFSET,
437 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET = 0,
438 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_MASK = 0xFFFF <<
439 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CQES_OFFSET,
440
441 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET = 16,
442 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK = 0xFFFF <<
443 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET,
444 OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET = 0,
445 OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_MASK = 0xFFFF <<
446 OCRDMA_MBX_QUERY_CFG_MAX_RPIR_QPS_OFFSET,
447
448 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET = 16,
449 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK = 0xFFFF <<
450 OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET,
451 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET = 0,
452 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_MASK = 0xFFFF <<
453 OCRDMA_MBX_QUERY_CFG_MAX_DPP_CREDITS_OFFSET,
454
455 OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET = 0,
456 OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_MASK = 0xFFFF <<
457 OCRDMA_MBX_QUERY_CFG_MAX_DPP_QPS_OFFSET,
458
459 OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET = 16,
460 OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_MASK = 0xFFFF <<
461 OCRDMA_MBX_QUERY_CFG_MAX_WQES_PER_WQ_OFFSET,
462 OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET = 0,
463 OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_MASK = 0xFFFF <<
464 OCRDMA_MBX_QUERY_CFG_MAX_RQES_PER_RQ_OFFSET,
465
466 OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET = 16,
467 OCRDMA_MBX_QUERY_CFG_MAX_CQ_MASK = 0xFFFF <<
468 OCRDMA_MBX_QUERY_CFG_MAX_CQ_OFFSET,
469 OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET = 0,
470 OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_MASK = 0xFFFF <<
471 OCRDMA_MBX_QUERY_CFG_MAX_CQES_PER_CQ_OFFSET,
472
473 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET = 16,
474 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_MASK = 0xFFFF <<
475 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_RQE_OFFSET,
476 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0,
477 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF <<
478 OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET,
479};
480
481struct ocrdma_mbx_query_config {
482 struct ocrdma_mqe_hdr hdr;
483 struct ocrdma_mbx_rsp rsp;
484 u32 qp_srq_cq_ird_ord;
485 u32 max_pd_ca_ack_delay;
486 u32 max_write_send_sge;
487 u32 max_ird_ord_per_qp;
488 u32 max_shared_ird_ord;
489 u32 max_mr;
490 u64 max_mr_size;
491 u32 max_num_mr_pbl;
492 u32 max_mw;
493 u32 max_fmr;
494 u32 max_pages_per_frmr;
495 u32 max_mcast_group;
496 u32 max_mcast_qp_attach;
497 u32 max_total_mcast_qp_attach;
498 u32 wqe_rqe_stride_max_dpp_cqs;
499 u32 max_srq_rpir_qps;
500 u32 max_dpp_pds_credits;
501 u32 max_dpp_credits_pds_per_pd;
502 u32 max_wqes_rqes_per_q;
503 u32 max_cq_cqes_per_cq;
504 u32 max_srq_rqe_sge;
505} __packed;
506
507struct ocrdma_fw_ver_rsp {
508 struct ocrdma_mqe_hdr hdr;
509 struct ocrdma_mbx_rsp rsp;
510
511 u8 running_ver[32];
512} __packed;
513
514struct ocrdma_fw_conf_rsp {
515 struct ocrdma_mqe_hdr hdr;
516 struct ocrdma_mbx_rsp rsp;
517
518 u32 config_num;
519 u32 asic_revision;
520 u32 phy_port;
521 u32 fn_mode;
522 struct {
523 u32 mode;
524 u32 nic_wqid_base;
525 u32 nic_wq_tot;
526 u32 prot_wqid_base;
527 u32 prot_wq_tot;
528 u32 prot_rqid_base;
529 u32 prot_rqid_tot;
530 u32 rsvd[6];
531 } ulp[2];
532 u32 fn_capabilities;
533 u32 rsvd1;
534 u32 rsvd2;
535 u32 base_eqid;
536 u32 max_eq;
537
538} __packed;
539
540enum {
541 OCRDMA_FN_MODE_RDMA = 0x4
542};
543
544enum {
545 OCRDMA_CREATE_CQ_VER2 = 2,
546
547 OCRDMA_CREATE_CQ_PAGE_CNT_MASK = 0xFFFF,
548 OCRDMA_CREATE_CQ_PAGE_SIZE_SHIFT = 16,
549 OCRDMA_CREATE_CQ_PAGE_SIZE_MASK = 0xFF,
550
551 OCRDMA_CREATE_CQ_COALESCWM_SHIFT = 12,
552 OCRDMA_CREATE_CQ_COALESCWM_MASK = Bit(13) | Bit(12),
553 OCRDMA_CREATE_CQ_FLAGS_NODELAY = Bit(14),
554 OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID = Bit(15),
555
556 OCRDMA_CREATE_CQ_EQ_ID_MASK = 0xFFFF,
557 OCRDMA_CREATE_CQ_CQE_COUNT_MASK = 0xFFFF
558};
559
560enum {
561 OCRDMA_CREATE_CQ_VER0 = 0,
562 OCRDMA_CREATE_CQ_DPP = 1,
563 OCRDMA_CREATE_CQ_TYPE_SHIFT = 24,
564 OCRDMA_CREATE_CQ_EQID_SHIFT = 22,
565
566 OCRDMA_CREATE_CQ_CNT_SHIFT = 27,
567 OCRDMA_CREATE_CQ_FLAGS_VALID = Bit(29),
568 OCRDMA_CREATE_CQ_FLAGS_EVENTABLE = Bit(31),
569 OCRDMA_CREATE_CQ_DEF_FLAGS = OCRDMA_CREATE_CQ_FLAGS_VALID |
570 OCRDMA_CREATE_CQ_FLAGS_EVENTABLE |
571 OCRDMA_CREATE_CQ_FLAGS_NODELAY
572};
573
574struct ocrdma_create_cq_cmd {
575 struct ocrdma_mbx_hdr req;
576 u32 pgsz_pgcnt;
577 u32 ev_cnt_flags;
578 u32 eqn;
579 u32 cqe_count;
580 u32 rsvd6;
581 struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
582};
583
584struct ocrdma_create_cq {
585 struct ocrdma_mqe_hdr hdr;
586 struct ocrdma_create_cq_cmd cmd;
587} __packed;
588
589enum {
590 OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
591};
592
593struct ocrdma_create_cq_cmd_rsp {
594 struct ocrdma_mbx_rsp rsp;
595 u32 cq_id;
596} __packed;
597
598struct ocrdma_create_cq_rsp {
599 struct ocrdma_mqe_hdr hdr;
600 struct ocrdma_create_cq_cmd_rsp rsp;
601} __packed;
602
603enum {
604 OCRDMA_CREATE_MQ_V0_CQ_ID_SHIFT = 22,
605 OCRDMA_CREATE_MQ_CQ_ID_SHIFT = 16,
606 OCRDMA_CREATE_MQ_RING_SIZE_SHIFT = 16,
607 OCRDMA_CREATE_MQ_VALID = Bit(31),
608 OCRDMA_CREATE_MQ_ASYNC_CQ_VALID = Bit(0)
609};
610
611struct ocrdma_create_mq_v0 {
612 u32 pages;
613 u32 cqid_ringsize;
614 u32 valid;
615 u32 async_cqid_valid;
616 u32 rsvd;
617 struct ocrdma_pa pa[8];
618} __packed;
619
620struct ocrdma_create_mq_v1 {
621 u32 cqid_pages;
622 u32 async_event_bitmap;
623 u32 async_cqid_ringsize;
624 u32 valid;
625 u32 async_cqid_valid;
626 u32 rsvd;
627 struct ocrdma_pa pa[8];
628} __packed;
629
630struct ocrdma_create_mq_req {
631 struct ocrdma_mbx_hdr req;
632 union {
633 struct ocrdma_create_mq_v0 v0;
634 struct ocrdma_create_mq_v1 v1;
635 };
636} __packed;
637
638struct ocrdma_create_mq_rsp {
639 struct ocrdma_mbx_rsp rsp;
640 u32 id;
641} __packed;
642
643enum {
644 OCRDMA_DESTROY_CQ_QID_SHIFT = 0,
645 OCRDMA_DESTROY_CQ_QID_MASK = 0xFFFF,
646 OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT = 16,
647 OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_MASK = 0xFFFF <<
648 OCRDMA_DESTROY_CQ_QID_BYPASS_FLUSH_SHIFT
649};
650
651struct ocrdma_destroy_cq {
652 struct ocrdma_mqe_hdr hdr;
653 struct ocrdma_mbx_hdr req;
654
655 u32 bypass_flush_qid;
656} __packed;
657
658struct ocrdma_destroy_cq_rsp {
659 struct ocrdma_mqe_hdr hdr;
660 struct ocrdma_mbx_rsp rsp;
661} __packed;
662
663enum {
664 OCRDMA_QPT_GSI = 1,
665 OCRDMA_QPT_RC = 2,
666 OCRDMA_QPT_UD = 4,
667};
668
669enum {
670 OCRDMA_CREATE_QP_REQ_PD_ID_SHIFT = 0,
671 OCRDMA_CREATE_QP_REQ_PD_ID_MASK = 0xFFFF,
672 OCRDMA_CREATE_QP_REQ_SQ_PAGE_SIZE_SHIFT = 16,
673 OCRDMA_CREATE_QP_REQ_RQ_PAGE_SIZE_SHIFT = 19,
674 OCRDMA_CREATE_QP_REQ_QPT_SHIFT = 29,
675 OCRDMA_CREATE_QP_REQ_QPT_MASK = Bit(31) | Bit(30) | Bit(29),
676
677 OCRDMA_CREATE_QP_REQ_MAX_RQE_SHIFT = 0,
678 OCRDMA_CREATE_QP_REQ_MAX_RQE_MASK = 0xFFFF,
679 OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT = 16,
680 OCRDMA_CREATE_QP_REQ_MAX_WQE_MASK = 0xFFFF <<
681 OCRDMA_CREATE_QP_REQ_MAX_WQE_SHIFT,
682
683 OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_SHIFT = 0,
684 OCRDMA_CREATE_QP_REQ_MAX_SGE_WRITE_MASK = 0xFFFF,
685 OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT = 16,
686 OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_MASK = 0xFFFF <<
687 OCRDMA_CREATE_QP_REQ_MAX_SGE_SEND_SHIFT,
688
689 OCRDMA_CREATE_QP_REQ_FMR_EN_SHIFT = 0,
690 OCRDMA_CREATE_QP_REQ_FMR_EN_MASK = Bit(0),
691 OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_SHIFT = 1,
692 OCRDMA_CREATE_QP_REQ_ZERO_LKEYEN_MASK = Bit(1),
693 OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_SHIFT = 2,
694 OCRDMA_CREATE_QP_REQ_BIND_MEMWIN_MASK = Bit(2),
695 OCRDMA_CREATE_QP_REQ_INB_WREN_SHIFT = 3,
696 OCRDMA_CREATE_QP_REQ_INB_WREN_MASK = Bit(3),
697 OCRDMA_CREATE_QP_REQ_INB_RDEN_SHIFT = 4,
698 OCRDMA_CREATE_QP_REQ_INB_RDEN_MASK = Bit(4),
699 OCRDMA_CREATE_QP_REQ_USE_SRQ_SHIFT = 5,
700 OCRDMA_CREATE_QP_REQ_USE_SRQ_MASK = Bit(5),
701 OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_SHIFT = 6,
702 OCRDMA_CREATE_QP_REQ_ENABLE_RPIR_MASK = Bit(6),
703 OCRDMA_CREATE_QP_REQ_ENABLE_DPP_SHIFT = 7,
704 OCRDMA_CREATE_QP_REQ_ENABLE_DPP_MASK = Bit(7),
705 OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_SHIFT = 8,
706 OCRDMA_CREATE_QP_REQ_ENABLE_DPP_CQ_MASK = Bit(8),
707 OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT = 16,
708 OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_MASK = 0xFFFF <<
709 OCRDMA_CREATE_QP_REQ_MAX_SGE_RECV_SHIFT,
710
711 OCRDMA_CREATE_QP_REQ_MAX_IRD_SHIFT = 0,
712 OCRDMA_CREATE_QP_REQ_MAX_IRD_MASK = 0xFFFF,
713 OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT = 16,
714 OCRDMA_CREATE_QP_REQ_MAX_ORD_MASK = 0xFFFF <<
715 OCRDMA_CREATE_QP_REQ_MAX_ORD_SHIFT,
716
717 OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_SHIFT = 0,
718 OCRDMA_CREATE_QP_REQ_NUM_RQ_PAGES_MASK = 0xFFFF,
719 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT = 16,
720 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_MASK = 0xFFFF <<
721 OCRDMA_CREATE_QP_REQ_NUM_WQ_PAGES_SHIFT,
722
723 OCRDMA_CREATE_QP_REQ_RQE_SIZE_SHIFT = 0,
724 OCRDMA_CREATE_QP_REQ_RQE_SIZE_MASK = 0xFFFF,
725 OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT = 16,
726 OCRDMA_CREATE_QP_REQ_WQE_SIZE_MASK = 0xFFFF <<
727 OCRDMA_CREATE_QP_REQ_WQE_SIZE_SHIFT,
728
729 OCRDMA_CREATE_QP_REQ_RQ_CQID_SHIFT = 0,
730 OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK = 0xFFFF,
731 OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT = 16,
732 OCRDMA_CREATE_QP_REQ_WQ_CQID_MASK = 0xFFFF <<
733 OCRDMA_CREATE_QP_REQ_WQ_CQID_SHIFT,
734
735 OCRDMA_CREATE_QP_REQ_DPP_CQPID_SHIFT = 0,
736 OCRDMA_CREATE_QP_REQ_DPP_CQPID_MASK = 0xFFFF,
737 OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT = 16,
738 OCRDMA_CREATE_QP_REQ_DPP_CREDIT_MASK = 0xFFFF <<
739 OCRDMA_CREATE_QP_REQ_DPP_CREDIT_SHIFT
740};
741
742enum {
743 OCRDMA_CREATE_QP_REQ_DPP_CREDIT_LIMIT = 16,
744 OCRDMA_CREATE_QP_RSP_DPP_PAGE_SHIFT = 1
745};
746
747#define MAX_OCRDMA_IRD_PAGES 4
748
749enum ocrdma_qp_flags {
750 OCRDMA_QP_MW_BIND = 1,
751 OCRDMA_QP_LKEY0 = (1 << 1),
752 OCRDMA_QP_FAST_REG = (1 << 2),
753 OCRDMA_QP_INB_RD = (1 << 6),
754 OCRDMA_QP_INB_WR = (1 << 7),
755};
756
757enum ocrdma_qp_state {
758 OCRDMA_QPS_RST = 0,
759 OCRDMA_QPS_INIT = 1,
760 OCRDMA_QPS_RTR = 2,
761 OCRDMA_QPS_RTS = 3,
762 OCRDMA_QPS_SQE = 4,
763 OCRDMA_QPS_SQ_DRAINING = 5,
764 OCRDMA_QPS_ERR = 6,
765 OCRDMA_QPS_SQD = 7
766};
767
768struct ocrdma_create_qp_req {
769 struct ocrdma_mqe_hdr hdr;
770 struct ocrdma_mbx_hdr req;
771
772 u32 type_pgsz_pdn;
773 u32 max_wqe_rqe;
774 u32 max_sge_send_write;
775 u32 max_sge_recv_flags;
776 u32 max_ord_ird;
777 u32 num_wq_rq_pages;
778 u32 wqe_rqe_size;
779 u32 wq_rq_cqid;
780 struct ocrdma_pa wq_addr[MAX_OCRDMA_QP_PAGES];
781 struct ocrdma_pa rq_addr[MAX_OCRDMA_QP_PAGES];
782 u32 dpp_credits_cqid;
783 u32 rpir_lkey;
784 struct ocrdma_pa ird_addr[MAX_OCRDMA_IRD_PAGES];
785} __packed;
786
787enum {
788 OCRDMA_CREATE_QP_RSP_QP_ID_SHIFT = 0,
789 OCRDMA_CREATE_QP_RSP_QP_ID_MASK = 0xFFFF,
790
791 OCRDMA_CREATE_QP_RSP_MAX_RQE_SHIFT = 0,
792 OCRDMA_CREATE_QP_RSP_MAX_RQE_MASK = 0xFFFF,
793 OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT = 16,
794 OCRDMA_CREATE_QP_RSP_MAX_WQE_MASK = 0xFFFF <<
795 OCRDMA_CREATE_QP_RSP_MAX_WQE_SHIFT,
796
797 OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_SHIFT = 0,
798 OCRDMA_CREATE_QP_RSP_MAX_SGE_WRITE_MASK = 0xFFFF,
799 OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT = 16,
800 OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_MASK = 0xFFFF <<
801 OCRDMA_CREATE_QP_RSP_MAX_SGE_SEND_SHIFT,
802
803 OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT = 16,
804 OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_MASK = 0xFFFF <<
805 OCRDMA_CREATE_QP_RSP_MAX_SGE_RECV_SHIFT,
806
807 OCRDMA_CREATE_QP_RSP_MAX_IRD_SHIFT = 0,
808 OCRDMA_CREATE_QP_RSP_MAX_IRD_MASK = 0xFFFF,
809 OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT = 16,
810 OCRDMA_CREATE_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
811 OCRDMA_CREATE_QP_RSP_MAX_ORD_SHIFT,
812
813 OCRDMA_CREATE_QP_RSP_RQ_ID_SHIFT = 0,
814 OCRDMA_CREATE_QP_RSP_RQ_ID_MASK = 0xFFFF,
815 OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT = 16,
816 OCRDMA_CREATE_QP_RSP_SQ_ID_MASK = 0xFFFF <<
817 OCRDMA_CREATE_QP_RSP_SQ_ID_SHIFT,
818
819 OCRDMA_CREATE_QP_RSP_DPP_ENABLED_MASK = Bit(0),
820 OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT = 1,
821 OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_MASK = 0x7FFF <<
822 OCRDMA_CREATE_QP_RSP_DPP_PAGE_OFFSET_SHIFT,
823 OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT = 16,
824 OCRDMA_CREATE_QP_RSP_DPP_CREDITS_MASK = 0xFFFF <<
825 OCRDMA_CREATE_QP_RSP_DPP_CREDITS_SHIFT,
826};
827
828struct ocrdma_create_qp_rsp {
829 struct ocrdma_mqe_hdr hdr;
830 struct ocrdma_mbx_rsp rsp;
831
832 u32 qp_id;
833 u32 max_wqe_rqe;
834 u32 max_sge_send_write;
835 u32 max_sge_recv;
836 u32 max_ord_ird;
837 u32 sq_rq_id;
838 u32 dpp_response;
839} __packed;
840
841struct ocrdma_destroy_qp {
842 struct ocrdma_mqe_hdr hdr;
843 struct ocrdma_mbx_hdr req;
844 u32 qp_id;
845} __packed;
846
847struct ocrdma_destroy_qp_rsp {
848 struct ocrdma_mqe_hdr hdr;
849 struct ocrdma_mbx_rsp rsp;
850} __packed;
851
852enum {
853 OCRDMA_MODIFY_QP_ID_SHIFT = 0,
854 OCRDMA_MODIFY_QP_ID_MASK = 0xFFFF,
855
856 OCRDMA_QP_PARA_QPS_VALID = Bit(0),
857 OCRDMA_QP_PARA_SQD_ASYNC_VALID = Bit(1),
858 OCRDMA_QP_PARA_PKEY_VALID = Bit(2),
859 OCRDMA_QP_PARA_QKEY_VALID = Bit(3),
860 OCRDMA_QP_PARA_PMTU_VALID = Bit(4),
861 OCRDMA_QP_PARA_ACK_TO_VALID = Bit(5),
862 OCRDMA_QP_PARA_RETRY_CNT_VALID = Bit(6),
863 OCRDMA_QP_PARA_RRC_VALID = Bit(7),
864 OCRDMA_QP_PARA_RQPSN_VALID = Bit(8),
865 OCRDMA_QP_PARA_MAX_IRD_VALID = Bit(9),
866 OCRDMA_QP_PARA_MAX_ORD_VALID = Bit(10),
867 OCRDMA_QP_PARA_RNT_VALID = Bit(11),
868 OCRDMA_QP_PARA_SQPSN_VALID = Bit(12),
869 OCRDMA_QP_PARA_DST_QPN_VALID = Bit(13),
870 OCRDMA_QP_PARA_MAX_WQE_VALID = Bit(14),
871 OCRDMA_QP_PARA_MAX_RQE_VALID = Bit(15),
872 OCRDMA_QP_PARA_SGE_SEND_VALID = Bit(16),
873 OCRDMA_QP_PARA_SGE_RECV_VALID = Bit(17),
874 OCRDMA_QP_PARA_SGE_WR_VALID = Bit(18),
875 OCRDMA_QP_PARA_INB_RDEN_VALID = Bit(19),
876 OCRDMA_QP_PARA_INB_WREN_VALID = Bit(20),
877 OCRDMA_QP_PARA_FLOW_LBL_VALID = Bit(21),
878 OCRDMA_QP_PARA_BIND_EN_VALID = Bit(22),
879 OCRDMA_QP_PARA_ZLKEY_EN_VALID = Bit(23),
880 OCRDMA_QP_PARA_FMR_EN_VALID = Bit(24),
881 OCRDMA_QP_PARA_INBAT_EN_VALID = Bit(25),
882 OCRDMA_QP_PARA_VLAN_EN_VALID = Bit(26),
883
884 OCRDMA_MODIFY_QP_FLAGS_RD = Bit(0),
885 OCRDMA_MODIFY_QP_FLAGS_WR = Bit(1),
886 OCRDMA_MODIFY_QP_FLAGS_SEND = Bit(2),
887 OCRDMA_MODIFY_QP_FLAGS_ATOMIC = Bit(3)
888};
889
890enum {
891 OCRDMA_QP_PARAMS_SRQ_ID_SHIFT = 0,
892 OCRDMA_QP_PARAMS_SRQ_ID_MASK = 0xFFFF,
893
894 OCRDMA_QP_PARAMS_MAX_RQE_SHIFT = 0,
895 OCRDMA_QP_PARAMS_MAX_RQE_MASK = 0xFFFF,
896 OCRDMA_QP_PARAMS_MAX_WQE_SHIFT = 16,
897 OCRDMA_QP_PARAMS_MAX_WQE_MASK = 0xFFFF <<
898 OCRDMA_QP_PARAMS_MAX_WQE_SHIFT,
899
900 OCRDMA_QP_PARAMS_MAX_SGE_WRITE_SHIFT = 0,
901 OCRDMA_QP_PARAMS_MAX_SGE_WRITE_MASK = 0xFFFF,
902 OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT = 16,
903 OCRDMA_QP_PARAMS_MAX_SGE_SEND_MASK = 0xFFFF <<
904 OCRDMA_QP_PARAMS_MAX_SGE_SEND_SHIFT,
905
906 OCRDMA_QP_PARAMS_FLAGS_FMR_EN = Bit(0),
907 OCRDMA_QP_PARAMS_FLAGS_LKEY_0_EN = Bit(1),
908 OCRDMA_QP_PARAMS_FLAGS_BIND_MW_EN = Bit(2),
909 OCRDMA_QP_PARAMS_FLAGS_INBWR_EN = Bit(3),
910 OCRDMA_QP_PARAMS_FLAGS_INBRD_EN = Bit(4),
911 OCRDMA_QP_PARAMS_STATE_SHIFT = 5,
912 OCRDMA_QP_PARAMS_STATE_MASK = Bit(5) | Bit(6) | Bit(7),
913 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC = Bit(8),
914 OCRDMA_QP_PARAMS_FLAGS_INB_ATEN = Bit(9),
915 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT = 16,
916 OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK = 0xFFFF <<
917 OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
918
919 OCRDMA_QP_PARAMS_MAX_IRD_SHIFT = 0,
920 OCRDMA_QP_PARAMS_MAX_IRD_MASK = 0xFFFF,
921 OCRDMA_QP_PARAMS_MAX_ORD_SHIFT = 16,
922 OCRDMA_QP_PARAMS_MAX_ORD_MASK = 0xFFFF <<
923 OCRDMA_QP_PARAMS_MAX_ORD_SHIFT,
924
925 OCRDMA_QP_PARAMS_RQ_CQID_SHIFT = 0,
926 OCRDMA_QP_PARAMS_RQ_CQID_MASK = 0xFFFF,
927 OCRDMA_QP_PARAMS_WQ_CQID_SHIFT = 16,
928 OCRDMA_QP_PARAMS_WQ_CQID_MASK = 0xFFFF <<
929 OCRDMA_QP_PARAMS_WQ_CQID_SHIFT,
930
931 OCRDMA_QP_PARAMS_RQ_PSN_SHIFT = 0,
932 OCRDMA_QP_PARAMS_RQ_PSN_MASK = 0xFFFFFF,
933 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT = 24,
934 OCRDMA_QP_PARAMS_HOP_LMT_MASK = 0xFF <<
935 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
936
937 OCRDMA_QP_PARAMS_SQ_PSN_SHIFT = 0,
938 OCRDMA_QP_PARAMS_SQ_PSN_MASK = 0xFFFFFF,
939 OCRDMA_QP_PARAMS_TCLASS_SHIFT = 24,
940 OCRDMA_QP_PARAMS_TCLASS_MASK = 0xFF <<
941 OCRDMA_QP_PARAMS_TCLASS_SHIFT,
942
943 OCRDMA_QP_PARAMS_DEST_QPN_SHIFT = 0,
944 OCRDMA_QP_PARAMS_DEST_QPN_MASK = 0xFFFFFF,
945 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT = 24,
946 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK = 0x7 <<
947 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT,
948 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT = 27,
949 OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK = 0x1F <<
950 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT,
951
952 OCRDMA_QP_PARAMS_PKEY_IDNEX_SHIFT = 0,
953 OCRDMA_QP_PARAMS_PKEY_INDEX_MASK = 0xFFFF,
954 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT = 18,
955 OCRDMA_QP_PARAMS_PATH_MTU_MASK = 0x3FFF <<
956 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT,
957
958 OCRDMA_QP_PARAMS_FLOW_LABEL_SHIFT = 0,
959 OCRDMA_QP_PARAMS_FLOW_LABEL_MASK = 0xFFFFF,
960 OCRDMA_QP_PARAMS_SL_SHIFT = 20,
961 OCRDMA_QP_PARAMS_SL_MASK = 0xF <<
962 OCRDMA_QP_PARAMS_SL_SHIFT,
963 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT = 24,
964 OCRDMA_QP_PARAMS_RETRY_CNT_MASK = 0x7 <<
965 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT,
966 OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT = 27,
967 OCRDMA_QP_PARAMS_RNR_NAK_TIMER_MASK = 0x1F <<
968 OCRDMA_QP_PARAMS_RNR_NAK_TIMER_SHIFT,
969
970 OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_SHIFT = 0,
971 OCRDMA_QP_PARAMS_DMAC_B4_TO_B5_MASK = 0xFFFF,
972 OCRDMA_QP_PARAMS_VLAN_SHIFT = 16,
973 OCRDMA_QP_PARAMS_VLAN_MASK = 0xFFFF <<
974 OCRDMA_QP_PARAMS_VLAN_SHIFT
975};
976
977struct ocrdma_qp_params {
978 u32 id;
979 u32 max_wqe_rqe;
980 u32 max_sge_send_write;
981 u32 max_sge_recv_flags;
982 u32 max_ord_ird;
983 u32 wq_rq_cqid;
984 u32 hop_lmt_rq_psn;
985 u32 tclass_sq_psn;
986 u32 ack_to_rnr_rtc_dest_qpn;
987 u32 path_mtu_pkey_indx;
988 u32 rnt_rc_sl_fl;
989 u8 sgid[16];
990 u8 dgid[16];
991 u32 dmac_b0_to_b3;
992 u32 vlan_dmac_b4_to_b5;
993 u32 qkey;
994} __packed;
995
996
997struct ocrdma_modify_qp {
998 struct ocrdma_mqe_hdr hdr;
999 struct ocrdma_mbx_hdr req;
1000
1001 struct ocrdma_qp_params params;
1002 u32 flags;
1003 u32 rdma_flags;
1004 u32 num_outstanding_atomic_rd;
1005} __packed;
1006
1007enum {
1008 OCRDMA_MODIFY_QP_RSP_MAX_RQE_SHIFT = 0,
1009 OCRDMA_MODIFY_QP_RSP_MAX_RQE_MASK = 0xFFFF,
1010 OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT = 16,
1011 OCRDMA_MODIFY_QP_RSP_MAX_WQE_MASK = 0xFFFF <<
1012 OCRDMA_MODIFY_QP_RSP_MAX_WQE_SHIFT,
1013
1014 OCRDMA_MODIFY_QP_RSP_MAX_IRD_SHIFT = 0,
1015 OCRDMA_MODIFY_QP_RSP_MAX_IRD_MASK = 0xFFFF,
1016 OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT = 16,
1017 OCRDMA_MODIFY_QP_RSP_MAX_ORD_MASK = 0xFFFF <<
1018 OCRDMA_MODIFY_QP_RSP_MAX_ORD_SHIFT
1019};
1020struct ocrdma_modify_qp_rsp {
1021 struct ocrdma_mqe_hdr hdr;
1022 struct ocrdma_mbx_rsp rsp;
1023
1024 u32 max_wqe_rqe;
1025 u32 max_ord_ird;
1026} __packed;
1027
1028struct ocrdma_query_qp {
1029 struct ocrdma_mqe_hdr hdr;
1030 struct ocrdma_mbx_hdr req;
1031
1032#define OCRDMA_QUERY_UP_QP_ID_SHIFT 0
1033#define OCRDMA_QUERY_UP_QP_ID_MASK 0xFFFFFF
1034 u32 qp_id;
1035} __packed;
1036
1037struct ocrdma_query_qp_rsp {
1038 struct ocrdma_mqe_hdr hdr;
1039 struct ocrdma_mbx_rsp rsp;
1040 struct ocrdma_qp_params params;
1041} __packed;
1042
1043enum {
1044 OCRDMA_CREATE_SRQ_PD_ID_SHIFT = 0,
1045 OCRDMA_CREATE_SRQ_PD_ID_MASK = 0xFFFF,
1046 OCRDMA_CREATE_SRQ_PG_SZ_SHIFT = 16,
1047 OCRDMA_CREATE_SRQ_PG_SZ_MASK = 0x3 <<
1048 OCRDMA_CREATE_SRQ_PG_SZ_SHIFT,
1049
1050 OCRDMA_CREATE_SRQ_MAX_RQE_SHIFT = 0,
1051 OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT = 16,
1052 OCRDMA_CREATE_SRQ_MAX_SGE_RECV_MASK = 0xFFFF <<
1053 OCRDMA_CREATE_SRQ_MAX_SGE_RECV_SHIFT,
1054
1055 OCRDMA_CREATE_SRQ_RQE_SIZE_SHIFT = 0,
1056 OCRDMA_CREATE_SRQ_RQE_SIZE_MASK = 0xFFFF,
1057 OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT = 16,
1058 OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_MASK = 0xFFFF <<
1059 OCRDMA_CREATE_SRQ_NUM_RQ_PAGES_SHIFT
1060};
1061
1062struct ocrdma_create_srq {
1063 struct ocrdma_mqe_hdr hdr;
1064 struct ocrdma_mbx_hdr req;
1065
1066 u32 pgsz_pdid;
1067 u32 max_sge_rqe;
1068 u32 pages_rqe_sz;
1069 struct ocrdma_pa rq_addr[MAX_OCRDMA_SRQ_PAGES];
1070} __packed;
1071
1072enum {
1073 OCRDMA_CREATE_SRQ_RSP_SRQ_ID_SHIFT = 0,
1074 OCRDMA_CREATE_SRQ_RSP_SRQ_ID_MASK = 0xFFFFFF,
1075
1076 OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_SHIFT = 0,
1077 OCRDMA_CREATE_SRQ_RSP_MAX_RQE_ALLOCATED_MASK = 0xFFFF,
1078 OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT = 16,
1079 OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_MASK = 0xFFFF <<
1080 OCRDMA_CREATE_SRQ_RSP_MAX_SGE_RECV_ALLOCATED_SHIFT
1081};
1082
1083struct ocrdma_create_srq_rsp {
1084 struct ocrdma_mqe_hdr hdr;
1085 struct ocrdma_mbx_rsp rsp;
1086
1087 u32 id;
1088 u32 max_sge_rqe_allocated;
1089} __packed;
1090
1091enum {
1092 OCRDMA_MODIFY_SRQ_ID_SHIFT = 0,
1093 OCRDMA_MODIFY_SRQ_ID_MASK = 0xFFFFFF,
1094
1095 OCRDMA_MODIFY_SRQ_MAX_RQE_SHIFT = 0,
1096 OCRDMA_MODIFY_SRQ_MAX_RQE_MASK = 0xFFFF,
1097 OCRDMA_MODIFY_SRQ_LIMIT_SHIFT = 16,
1098 OCRDMA_MODIFY_SRQ__LIMIT_MASK = 0xFFFF <<
1099 OCRDMA_MODIFY_SRQ_LIMIT_SHIFT
1100};
1101
1102struct ocrdma_modify_srq {
1103 struct ocrdma_mqe_hdr hdr;
1104 struct ocrdma_mbx_rsp rep;
1105
1106 u32 id;
1107 u32 limit_max_rqe;
1108} __packed;
1109
1110enum {
1111 OCRDMA_QUERY_SRQ_ID_SHIFT = 0,
1112 OCRDMA_QUERY_SRQ_ID_MASK = 0xFFFFFF
1113};
1114
1115struct ocrdma_query_srq {
1116 struct ocrdma_mqe_hdr hdr;
1117 struct ocrdma_mbx_rsp req;
1118
1119 u32 id;
1120} __packed;
1121
1122enum {
1123 OCRDMA_QUERY_SRQ_RSP_PD_ID_SHIFT = 0,
1124 OCRDMA_QUERY_SRQ_RSP_PD_ID_MASK = 0xFFFF,
1125 OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT = 16,
1126 OCRDMA_QUERY_SRQ_RSP_MAX_RQE_MASK = 0xFFFF <<
1127 OCRDMA_QUERY_SRQ_RSP_MAX_RQE_SHIFT,
1128
1129 OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_SHIFT = 0,
1130 OCRDMA_QUERY_SRQ_RSP_MAX_SGE_RECV_MASK = 0xFFFF,
1131 OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT = 16,
1132 OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_MASK = 0xFFFF <<
1133 OCRDMA_QUERY_SRQ_RSP_SRQ_LIMIT_SHIFT
1134};
1135
1136struct ocrdma_query_srq_rsp {
1137 struct ocrdma_mqe_hdr hdr;
1138 struct ocrdma_mbx_rsp req;
1139
1140 u32 max_rqe_pdid;
1141 u32 srq_lmt_max_sge;
1142} __packed;
1143
1144enum {
1145 OCRDMA_DESTROY_SRQ_ID_SHIFT = 0,
1146 OCRDMA_DESTROY_SRQ_ID_MASK = 0xFFFFFF
1147};
1148
1149struct ocrdma_destroy_srq {
1150 struct ocrdma_mqe_hdr hdr;
1151 struct ocrdma_mbx_rsp req;
1152
1153 u32 id;
1154} __packed;
1155
1156enum {
1157 OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
1158 OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
1159 OCRDMA_DPP_PAGE_SIZE = 4096
1160};
1161
1162struct ocrdma_alloc_pd {
1163 struct ocrdma_mqe_hdr hdr;
1164 struct ocrdma_mbx_hdr req;
1165 u32 enable_dpp_rsvd;
1166} __packed;
1167
1168enum {
1169 OCRDMA_ALLOC_PD_RSP_DPP = Bit(16),
1170 OCRDMA_ALLOC_PD_RSP_DPP_PAGE_SHIFT = 20,
1171 OCRDMA_ALLOC_PD_RSP_PDID_MASK = 0xFFFF,
1172};
1173
1174struct ocrdma_alloc_pd_rsp {
1175 struct ocrdma_mqe_hdr hdr;
1176 struct ocrdma_mbx_rsp rsp;
1177 u32 dpp_page_pdid;
1178} __packed;
1179
1180struct ocrdma_dealloc_pd {
1181 struct ocrdma_mqe_hdr hdr;
1182 struct ocrdma_mbx_hdr req;
1183 u32 id;
1184} __packed;
1185
1186struct ocrdma_dealloc_pd_rsp {
1187 struct ocrdma_mqe_hdr hdr;
1188 struct ocrdma_mbx_rsp rsp;
1189} __packed;
1190
1191enum {
1192 OCRDMA_ADDR_CHECK_ENABLE = 1,
1193 OCRDMA_ADDR_CHECK_DISABLE = 0
1194};
1195
1196enum {
1197 OCRDMA_ALLOC_LKEY_PD_ID_SHIFT = 0,
1198 OCRDMA_ALLOC_LKEY_PD_ID_MASK = 0xFFFF,
1199
1200 OCRDMA_ALLOC_LKEY_ADDR_CHECK_SHIFT = 0,
1201 OCRDMA_ALLOC_LKEY_ADDR_CHECK_MASK = Bit(0),
1202 OCRDMA_ALLOC_LKEY_FMR_SHIFT = 1,
1203 OCRDMA_ALLOC_LKEY_FMR_MASK = Bit(1),
1204 OCRDMA_ALLOC_LKEY_REMOTE_INV_SHIFT = 2,
1205 OCRDMA_ALLOC_LKEY_REMOTE_INV_MASK = Bit(2),
1206 OCRDMA_ALLOC_LKEY_REMOTE_WR_SHIFT = 3,
1207 OCRDMA_ALLOC_LKEY_REMOTE_WR_MASK = Bit(3),
1208 OCRDMA_ALLOC_LKEY_REMOTE_RD_SHIFT = 4,
1209 OCRDMA_ALLOC_LKEY_REMOTE_RD_MASK = Bit(4),
1210 OCRDMA_ALLOC_LKEY_LOCAL_WR_SHIFT = 5,
1211 OCRDMA_ALLOC_LKEY_LOCAL_WR_MASK = Bit(5),
1212 OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_MASK = Bit(6),
1213 OCRDMA_ALLOC_LKEY_REMOTE_ATOMIC_SHIFT = 6,
1214 OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT = 16,
1215 OCRDMA_ALLOC_LKEY_PBL_SIZE_MASK = 0xFFFF <<
1216 OCRDMA_ALLOC_LKEY_PBL_SIZE_SHIFT
1217};
1218
1219struct ocrdma_alloc_lkey {
1220 struct ocrdma_mqe_hdr hdr;
1221 struct ocrdma_mbx_hdr req;
1222
1223 u32 pdid;
1224 u32 pbl_sz_flags;
1225} __packed;
1226
1227struct ocrdma_alloc_lkey_rsp {
1228 struct ocrdma_mqe_hdr hdr;
1229 struct ocrdma_mbx_rsp rsp;
1230
1231 u32 lrkey;
1232 u32 num_pbl_rsvd;
1233} __packed;
1234
1235struct ocrdma_dealloc_lkey {
1236 struct ocrdma_mqe_hdr hdr;
1237 struct ocrdma_mbx_hdr req;
1238
1239 u32 lkey;
1240 u32 rsvd_frmr;
1241} __packed;
1242
1243struct ocrdma_dealloc_lkey_rsp {
1244 struct ocrdma_mqe_hdr hdr;
1245 struct ocrdma_mbx_rsp rsp;
1246} __packed;
1247
1248#define MAX_OCRDMA_NSMR_PBL (u32)22
1249#define MAX_OCRDMA_PBL_SIZE 65536
1250#define MAX_OCRDMA_PBL_PER_LKEY 32767
1251
1252enum {
1253 OCRDMA_REG_NSMR_LRKEY_INDEX_SHIFT = 0,
1254 OCRDMA_REG_NSMR_LRKEY_INDEX_MASK = 0xFFFFFF,
1255 OCRDMA_REG_NSMR_LRKEY_SHIFT = 24,
1256 OCRDMA_REG_NSMR_LRKEY_MASK = 0xFF <<
1257 OCRDMA_REG_NSMR_LRKEY_SHIFT,
1258
1259 OCRDMA_REG_NSMR_PD_ID_SHIFT = 0,
1260 OCRDMA_REG_NSMR_PD_ID_MASK = 0xFFFF,
1261 OCRDMA_REG_NSMR_NUM_PBL_SHIFT = 16,
1262 OCRDMA_REG_NSMR_NUM_PBL_MASK = 0xFFFF <<
1263 OCRDMA_REG_NSMR_NUM_PBL_SHIFT,
1264
1265 OCRDMA_REG_NSMR_PBE_SIZE_SHIFT = 0,
1266 OCRDMA_REG_NSMR_PBE_SIZE_MASK = 0xFFFF,
1267 OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT = 16,
1268 OCRDMA_REG_NSMR_HPAGE_SIZE_MASK = 0xFF <<
1269 OCRDMA_REG_NSMR_HPAGE_SIZE_SHIFT,
1270 OCRDMA_REG_NSMR_BIND_MEMWIN_SHIFT = 24,
1271 OCRDMA_REG_NSMR_BIND_MEMWIN_MASK = Bit(24),
1272 OCRDMA_REG_NSMR_ZB_SHIFT = 25,
1273 OCRDMA_REG_NSMR_ZB_SHIFT_MASK = Bit(25),
1274 OCRDMA_REG_NSMR_REMOTE_INV_SHIFT = 26,
1275 OCRDMA_REG_NSMR_REMOTE_INV_MASK = Bit(26),
1276 OCRDMA_REG_NSMR_REMOTE_WR_SHIFT = 27,
1277 OCRDMA_REG_NSMR_REMOTE_WR_MASK = Bit(27),
1278 OCRDMA_REG_NSMR_REMOTE_RD_SHIFT = 28,
1279 OCRDMA_REG_NSMR_REMOTE_RD_MASK = Bit(28),
1280 OCRDMA_REG_NSMR_LOCAL_WR_SHIFT = 29,
1281 OCRDMA_REG_NSMR_LOCAL_WR_MASK = Bit(29),
1282 OCRDMA_REG_NSMR_REMOTE_ATOMIC_SHIFT = 30,
1283 OCRDMA_REG_NSMR_REMOTE_ATOMIC_MASK = Bit(30),
1284 OCRDMA_REG_NSMR_LAST_SHIFT = 31,
1285 OCRDMA_REG_NSMR_LAST_MASK = Bit(31)
1286};
1287
1288struct ocrdma_reg_nsmr {
1289 struct ocrdma_mqe_hdr hdr;
1290 struct ocrdma_mbx_hdr cmd;
1291
1292 u32 lrkey_key_index;
1293 u32 num_pbl_pdid;
1294 u32 flags_hpage_pbe_sz;
1295 u32 totlen_low;
1296 u32 totlen_high;
1297 u32 fbo_low;
1298 u32 fbo_high;
1299 u32 va_loaddr;
1300 u32 va_hiaddr;
1301 struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
1302} __packed;
1303
1304enum {
1305 OCRDMA_REG_NSMR_CONT_PBL_SHIFT = 0,
1306 OCRDMA_REG_NSMR_CONT_PBL_SHIFT_MASK = 0xFFFF,
1307 OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT = 16,
1308 OCRDMA_REG_NSMR_CONT_NUM_PBL_MASK = 0xFFFF <<
1309 OCRDMA_REG_NSMR_CONT_NUM_PBL_SHIFT,
1310
1311 OCRDMA_REG_NSMR_CONT_LAST_SHIFT = 31,
1312 OCRDMA_REG_NSMR_CONT_LAST_MASK = Bit(31)
1313};
1314
1315struct ocrdma_reg_nsmr_cont {
1316 struct ocrdma_mqe_hdr hdr;
1317 struct ocrdma_mbx_hdr cmd;
1318
1319 u32 lrkey;
1320 u32 num_pbl_offset;
1321 u32 last;
1322
1323 struct ocrdma_pa pbl[MAX_OCRDMA_NSMR_PBL];
1324} __packed;
1325
1326struct ocrdma_pbe {
1327 u32 pa_hi;
1328 u32 pa_lo;
1329} __packed;
1330
1331enum {
1332 OCRDMA_REG_NSMR_RSP_NUM_PBL_SHIFT = 16,
1333 OCRDMA_REG_NSMR_RSP_NUM_PBL_MASK = 0xFFFF0000
1334};
1335struct ocrdma_reg_nsmr_rsp {
1336 struct ocrdma_mqe_hdr hdr;
1337 struct ocrdma_mbx_rsp rsp;
1338
1339 u32 lrkey;
1340 u32 num_pbl;
1341} __packed;
1342
1343enum {
1344 OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_SHIFT = 0,
1345 OCRDMA_REG_NSMR_CONT_RSP_LRKEY_INDEX_MASK = 0xFFFFFF,
1346 OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT = 24,
1347 OCRDMA_REG_NSMR_CONT_RSP_LRKEY_MASK = 0xFF <<
1348 OCRDMA_REG_NSMR_CONT_RSP_LRKEY_SHIFT,
1349
1350 OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT = 16,
1351 OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_MASK = 0xFFFF <<
1352 OCRDMA_REG_NSMR_CONT_RSP_NUM_PBL_SHIFT
1353};
1354
1355struct ocrdma_reg_nsmr_cont_rsp {
1356 struct ocrdma_mqe_hdr hdr;
1357 struct ocrdma_mbx_rsp rsp;
1358
1359 u32 lrkey_key_index;
1360 u32 num_pbl;
1361} __packed;
1362
1363enum {
1364 OCRDMA_ALLOC_MW_PD_ID_SHIFT = 0,
1365 OCRDMA_ALLOC_MW_PD_ID_MASK = 0xFFFF
1366};
1367
1368struct ocrdma_alloc_mw {
1369 struct ocrdma_mqe_hdr hdr;
1370 struct ocrdma_mbx_hdr req;
1371
1372 u32 pdid;
1373} __packed;
1374
1375enum {
1376 OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_SHIFT = 0,
1377 OCRDMA_ALLOC_MW_RSP_LRKEY_INDEX_MASK = 0xFFFFFF
1378};
1379
1380struct ocrdma_alloc_mw_rsp {
1381 struct ocrdma_mqe_hdr hdr;
1382 struct ocrdma_mbx_rsp rsp;
1383
1384 u32 lrkey_index;
1385} __packed;
1386
1387struct ocrdma_attach_mcast {
1388 struct ocrdma_mqe_hdr hdr;
1389 struct ocrdma_mbx_hdr req;
1390 u32 qp_id;
1391 u8 mgid[16];
1392 u32 mac_b0_to_b3;
1393 u32 vlan_mac_b4_to_b5;
1394} __packed;
1395
1396struct ocrdma_attach_mcast_rsp {
1397 struct ocrdma_mqe_hdr hdr;
1398 struct ocrdma_mbx_rsp rsp;
1399} __packed;
1400
1401struct ocrdma_detach_mcast {
1402 struct ocrdma_mqe_hdr hdr;
1403 struct ocrdma_mbx_hdr req;
1404 u32 qp_id;
1405 u8 mgid[16];
1406 u32 mac_b0_to_b3;
1407 u32 vlan_mac_b4_to_b5;
1408} __packed;
1409
1410struct ocrdma_detach_mcast_rsp {
1411 struct ocrdma_mqe_hdr hdr;
1412 struct ocrdma_mbx_rsp rsp;
1413} __packed;
1414
1415enum {
1416 OCRDMA_CREATE_AH_NUM_PAGES_SHIFT = 19,
1417 OCRDMA_CREATE_AH_NUM_PAGES_MASK = 0xF <<
1418 OCRDMA_CREATE_AH_NUM_PAGES_SHIFT,
1419
1420 OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT = 16,
1421 OCRDMA_CREATE_AH_PAGE_SIZE_MASK = 0x7 <<
1422 OCRDMA_CREATE_AH_PAGE_SIZE_SHIFT,
1423
1424 OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT = 23,
1425 OCRDMA_CREATE_AH_ENTRY_SIZE_MASK = 0x1FF <<
1426 OCRDMA_CREATE_AH_ENTRY_SIZE_SHIFT,
1427};
1428
1429#define OCRDMA_AH_TBL_PAGES 8
1430
1431struct ocrdma_create_ah_tbl {
1432 struct ocrdma_mqe_hdr hdr;
1433 struct ocrdma_mbx_hdr req;
1434
1435 u32 ah_conf;
1436 struct ocrdma_pa tbl_addr[8];
1437} __packed;
1438
1439struct ocrdma_create_ah_tbl_rsp {
1440 struct ocrdma_mqe_hdr hdr;
1441 struct ocrdma_mbx_rsp rsp;
1442 u32 ahid;
1443} __packed;
1444
1445struct ocrdma_delete_ah_tbl {
1446 struct ocrdma_mqe_hdr hdr;
1447 struct ocrdma_mbx_hdr req;
1448 u32 ahid;
1449} __packed;
1450
1451struct ocrdma_delete_ah_tbl_rsp {
1452 struct ocrdma_mqe_hdr hdr;
1453 struct ocrdma_mbx_rsp rsp;
1454} __packed;
1455
1456enum {
1457 OCRDMA_EQE_VALID_SHIFT = 0,
1458 OCRDMA_EQE_VALID_MASK = Bit(0),
1459 OCRDMA_EQE_FOR_CQE_MASK = 0xFFFE,
1460 OCRDMA_EQE_RESOURCE_ID_SHIFT = 16,
1461 OCRDMA_EQE_RESOURCE_ID_MASK = 0xFFFF <<
1462 OCRDMA_EQE_RESOURCE_ID_SHIFT,
1463};
1464
1465struct ocrdma_eqe {
1466 u32 id_valid;
1467} __packed;
1468
1469enum OCRDMA_CQE_STATUS {
1470 OCRDMA_CQE_SUCCESS = 0,
1471 OCRDMA_CQE_LOC_LEN_ERR,
1472 OCRDMA_CQE_LOC_QP_OP_ERR,
1473 OCRDMA_CQE_LOC_EEC_OP_ERR,
1474 OCRDMA_CQE_LOC_PROT_ERR,
1475 OCRDMA_CQE_WR_FLUSH_ERR,
1476 OCRDMA_CQE_MW_BIND_ERR,
1477 OCRDMA_CQE_BAD_RESP_ERR,
1478 OCRDMA_CQE_LOC_ACCESS_ERR,
1479 OCRDMA_CQE_REM_INV_REQ_ERR,
1480 OCRDMA_CQE_REM_ACCESS_ERR,
1481 OCRDMA_CQE_REM_OP_ERR,
1482 OCRDMA_CQE_RETRY_EXC_ERR,
1483 OCRDMA_CQE_RNR_RETRY_EXC_ERR,
1484 OCRDMA_CQE_LOC_RDD_VIOL_ERR,
1485 OCRDMA_CQE_REM_INV_RD_REQ_ERR,
1486 OCRDMA_CQE_REM_ABORT_ERR,
1487 OCRDMA_CQE_INV_EECN_ERR,
1488 OCRDMA_CQE_INV_EEC_STATE_ERR,
1489 OCRDMA_CQE_FATAL_ERR,
1490 OCRDMA_CQE_RESP_TIMEOUT_ERR,
1491 OCRDMA_CQE_GENERAL_ERR
1492};
1493
1494enum {
1495 /* w0 */
1496 OCRDMA_CQE_WQEIDX_SHIFT = 0,
1497 OCRDMA_CQE_WQEIDX_MASK = 0xFFFF,
1498
1499 /* w1 */
1500 OCRDMA_CQE_UD_XFER_LEN_SHIFT = 16,
1501 OCRDMA_CQE_PKEY_SHIFT = 0,
1502 OCRDMA_CQE_PKEY_MASK = 0xFFFF,
1503
1504 /* w2 */
1505 OCRDMA_CQE_QPN_SHIFT = 0,
1506 OCRDMA_CQE_QPN_MASK = 0x0000FFFF,
1507
1508 OCRDMA_CQE_BUFTAG_SHIFT = 16,
1509 OCRDMA_CQE_BUFTAG_MASK = 0xFFFF << OCRDMA_CQE_BUFTAG_SHIFT,
1510
1511 /* w3 */
1512 OCRDMA_CQE_UD_STATUS_SHIFT = 24,
1513 OCRDMA_CQE_UD_STATUS_MASK = 0x7 << OCRDMA_CQE_UD_STATUS_SHIFT,
1514 OCRDMA_CQE_STATUS_SHIFT = 16,
1515 OCRDMA_CQE_STATUS_MASK = 0xFF << OCRDMA_CQE_STATUS_SHIFT,
1516 OCRDMA_CQE_VALID = Bit(31),
1517 OCRDMA_CQE_INVALIDATE = Bit(30),
1518 OCRDMA_CQE_QTYPE = Bit(29),
1519 OCRDMA_CQE_IMM = Bit(28),
1520 OCRDMA_CQE_WRITE_IMM = Bit(27),
1521 OCRDMA_CQE_QTYPE_SQ = 0,
1522 OCRDMA_CQE_QTYPE_RQ = 1,
1523 OCRDMA_CQE_SRCQP_MASK = 0xFFFFFF
1524};
1525
1526struct ocrdma_cqe {
1527 union {
1528 /* w0 to w2 */
1529 struct {
1530 u32 wqeidx;
1531 u32 bytes_xfered;
1532 u32 qpn;
1533 } wq;
1534 struct {
1535 u32 lkey_immdt;
1536 u32 rxlen;
1537 u32 buftag_qpn;
1538 } rq;
1539 struct {
1540 u32 lkey_immdt;
1541 u32 rxlen_pkey;
1542 u32 buftag_qpn;
1543 } ud;
1544 struct {
1545 u32 word_0;
1546 u32 word_1;
1547 u32 qpn;
1548 } cmn;
1549 };
1550 u32 flags_status_srcqpn; /* w3 */
1551} __packed;
1552
1553#define is_cqe_valid(cq, cqe) \
1554 (((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_VALID)\
1555 == cq->phase) ? 1 : 0)
1556#define is_cqe_for_sq(cqe) \
1557 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 0 : 1)
1558#define is_cqe_for_rq(cqe) \
1559 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_QTYPE) ? 1 : 0)
1560#define is_cqe_invalidated(cqe) \
1561 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_INVALIDATE) ? \
1562 1 : 0)
1563#define is_cqe_imm(cqe) \
1564 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_IMM) ? 1 : 0)
1565#define is_cqe_wr_imm(cqe) \
1566 ((le32_to_cpu(cqe->flags_status_srcqpn) & OCRDMA_CQE_WRITE_IMM) ? 1 : 0)
1567
1568struct ocrdma_sge {
1569 u32 addr_hi;
1570 u32 addr_lo;
1571 u32 lrkey;
1572 u32 len;
1573} __packed;
1574
1575enum {
1576 OCRDMA_FLAG_SIG = 0x1,
1577 OCRDMA_FLAG_INV = 0x2,
1578 OCRDMA_FLAG_FENCE_L = 0x4,
1579 OCRDMA_FLAG_FENCE_R = 0x8,
1580 OCRDMA_FLAG_SOLICIT = 0x10,
1581 OCRDMA_FLAG_IMM = 0x20,
1582
1583 /* Stag flags */
1584 OCRDMA_LKEY_FLAG_LOCAL_WR = 0x1,
1585 OCRDMA_LKEY_FLAG_REMOTE_RD = 0x2,
1586 OCRDMA_LKEY_FLAG_REMOTE_WR = 0x4,
1587 OCRDMA_LKEY_FLAG_VATO = 0x8,
1588};
1589
1590enum OCRDMA_WQE_OPCODE {
1591 OCRDMA_WRITE = 0x06,
1592 OCRDMA_READ = 0x0C,
1593 OCRDMA_RESV0 = 0x02,
1594 OCRDMA_SEND = 0x00,
1595 OCRDMA_CMP_SWP = 0x14,
1596 OCRDMA_BIND_MW = 0x10,
1597 OCRDMA_RESV1 = 0x0A,
1598 OCRDMA_LKEY_INV = 0x15,
1599 OCRDMA_FETCH_ADD = 0x13,
1600 OCRDMA_POST_RQ = 0x12
1601};
1602
1603enum {
1604 OCRDMA_TYPE_INLINE = 0x0,
1605 OCRDMA_TYPE_LKEY = 0x1,
1606};
1607
1608enum {
1609 OCRDMA_WQE_OPCODE_SHIFT = 0,
1610 OCRDMA_WQE_OPCODE_MASK = 0x0000001F,
1611 OCRDMA_WQE_FLAGS_SHIFT = 5,
1612 OCRDMA_WQE_TYPE_SHIFT = 16,
1613 OCRDMA_WQE_TYPE_MASK = 0x00030000,
1614 OCRDMA_WQE_SIZE_SHIFT = 18,
1615 OCRDMA_WQE_SIZE_MASK = 0xFF,
1616 OCRDMA_WQE_NXT_WQE_SIZE_SHIFT = 25,
1617
1618 OCRDMA_WQE_LKEY_FLAGS_SHIFT = 0,
1619 OCRDMA_WQE_LKEY_FLAGS_MASK = 0xF
1620};
1621
1622/* header WQE for all the SQ and RQ operations */
1623struct ocrdma_hdr_wqe {
1624 u32 cw;
1625 union {
1626 u32 rsvd_tag;
1627 u32 rsvd_lkey_flags;
1628 };
1629 union {
1630 u32 immdt;
1631 u32 lkey;
1632 };
1633 u32 total_len;
1634} __packed;
1635
1636struct ocrdma_ewqe_ud_hdr {
1637 u32 rsvd_dest_qpn;
1638 u32 qkey;
1639 u32 rsvd_ahid;
1640 u32 rsvd;
1641} __packed;
1642
1643struct ocrdma_eth_basic {
1644 u8 dmac[6];
1645 u8 smac[6];
1646 __be16 eth_type;
1647} __packed;
1648
1649struct ocrdma_eth_vlan {
1650 u8 dmac[6];
1651 u8 smac[6];
1652 __be16 eth_type;
1653 __be16 vlan_tag;
1654#define OCRDMA_ROCE_ETH_TYPE 0x8915
1655 __be16 roce_eth_type;
1656} __packed;
1657
1658struct ocrdma_grh {
1659 __be32 tclass_flow;
1660 __be32 pdid_hoplimit;
1661 u8 sgid[16];
1662 u8 dgid[16];
1663 u16 rsvd;
1664} __packed;
1665
1666#define OCRDMA_AV_VALID Bit(0)
1667#define OCRDMA_AV_VLAN_VALID Bit(1)
1668
1669struct ocrdma_av {
1670 struct ocrdma_eth_vlan eth_hdr;
1671 struct ocrdma_grh grh;
1672 u32 valid;
1673} __packed;
1674
1675#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
deleted file mode 100644
index b29a4246ef4..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ /dev/null
@@ -1,2536 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#include <linux/dma-mapping.h>
29#include <rdma/ib_verbs.h>
30#include <rdma/ib_user_verbs.h>
31#include <rdma/iw_cm.h>
32#include <rdma/ib_umem.h>
33#include <rdma/ib_addr.h>
34
35#include "ocrdma.h"
36#include "ocrdma_hw.h"
37#include "ocrdma_verbs.h"
38#include "ocrdma_abi.h"
39
40int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
41{
42 if (index > 1)
43 return -EINVAL;
44
45 *pkey = 0xffff;
46 return 0;
47}
48
49int ocrdma_query_gid(struct ib_device *ibdev, u8 port,
50 int index, union ib_gid *sgid)
51{
52 struct ocrdma_dev *dev;
53
54 dev = get_ocrdma_dev(ibdev);
55 memset(sgid, 0, sizeof(*sgid));
56 if (index >= OCRDMA_MAX_SGID)
57 return -EINVAL;
58
59 memcpy(sgid, &dev->sgid_tbl[index], sizeof(*sgid));
60
61 return 0;
62}
63
64int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
65{
66 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
67
68 memset(attr, 0, sizeof *attr);
69 memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
70 min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
71 ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
72 attr->max_mr_size = ~0ull;
73 attr->page_size_cap = 0xffff000;
74 attr->vendor_id = dev->nic_info.pdev->vendor;
75 attr->vendor_part_id = dev->nic_info.pdev->device;
76 attr->hw_ver = 0;
77 attr->max_qp = dev->attr.max_qp;
78 attr->max_ah = dev->attr.max_qp;
79 attr->max_qp_wr = dev->attr.max_wqe;
80
81 attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
82 IB_DEVICE_RC_RNR_NAK_GEN |
83 IB_DEVICE_SHUTDOWN_PORT |
84 IB_DEVICE_SYS_IMAGE_GUID |
85 IB_DEVICE_LOCAL_DMA_LKEY;
86 attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_srq_sge);
87 attr->max_sge_rd = 0;
88 attr->max_cq = dev->attr.max_cq;
89 attr->max_cqe = dev->attr.max_cqe;
90 attr->max_mr = dev->attr.max_mr;
91 attr->max_mw = 0;
92 attr->max_pd = dev->attr.max_pd;
93 attr->atomic_cap = 0;
94 attr->max_fmr = 0;
95 attr->max_map_per_fmr = 0;
96 attr->max_qp_rd_atom =
97 min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
98 attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
99 attr->max_srq = (dev->attr.max_qp - 1);
100 attr->max_srq_sge = dev->attr.max_srq_sge;
101 attr->max_srq_wr = dev->attr.max_rqe;
102 attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
103 attr->max_fast_reg_page_list_len = 0;
104 attr->max_pkeys = 1;
105 return 0;
106}
107
108int ocrdma_query_port(struct ib_device *ibdev,
109 u8 port, struct ib_port_attr *props)
110{
111 enum ib_port_state port_state;
112 struct ocrdma_dev *dev;
113 struct net_device *netdev;
114
115 dev = get_ocrdma_dev(ibdev);
116 if (port > 1) {
117 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
118 dev->id, port);
119 return -EINVAL;
120 }
121 netdev = dev->nic_info.netdev;
122 if (netif_running(netdev) && netif_oper_up(netdev)) {
123 port_state = IB_PORT_ACTIVE;
124 props->phys_state = 5;
125 } else {
126 port_state = IB_PORT_DOWN;
127 props->phys_state = 3;
128 }
129 props->max_mtu = IB_MTU_4096;
130 props->active_mtu = iboe_get_mtu(netdev->mtu);
131 props->lid = 0;
132 props->lmc = 0;
133 props->sm_lid = 0;
134 props->sm_sl = 0;
135 props->state = port_state;
136 props->port_cap_flags =
137 IB_PORT_CM_SUP |
138 IB_PORT_REINIT_SUP |
139 IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP;
140 props->gid_tbl_len = OCRDMA_MAX_SGID;
141 props->pkey_tbl_len = 1;
142 props->bad_pkey_cntr = 0;
143 props->qkey_viol_cntr = 0;
144 props->active_width = IB_WIDTH_1X;
145 props->active_speed = 4;
146 props->max_msg_sz = 0x80000000;
147 props->max_vl_num = 4;
148 return 0;
149}
150
151int ocrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
152 struct ib_port_modify *props)
153{
154 struct ocrdma_dev *dev;
155
156 dev = get_ocrdma_dev(ibdev);
157 if (port > 1) {
158 ocrdma_err("%s(%d) invalid_port=0x%x\n", __func__,
159 dev->id, port);
160 return -EINVAL;
161 }
162 return 0;
163}
164
165static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
166 unsigned long len)
167{
168 struct ocrdma_mm *mm;
169
170 mm = kzalloc(sizeof(*mm), GFP_KERNEL);
171 if (mm == NULL)
172 return -ENOMEM;
173 mm->key.phy_addr = phy_addr;
174 mm->key.len = len;
175 INIT_LIST_HEAD(&mm->entry);
176
177 mutex_lock(&uctx->mm_list_lock);
178 list_add_tail(&mm->entry, &uctx->mm_head);
179 mutex_unlock(&uctx->mm_list_lock);
180 return 0;
181}
182
183static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
184 unsigned long len)
185{
186 struct ocrdma_mm *mm, *tmp;
187
188 mutex_lock(&uctx->mm_list_lock);
189 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
190 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
191 continue;
192
193 list_del(&mm->entry);
194 kfree(mm);
195 break;
196 }
197 mutex_unlock(&uctx->mm_list_lock);
198}
199
200static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
201 unsigned long len)
202{
203 bool found = false;
204 struct ocrdma_mm *mm;
205
206 mutex_lock(&uctx->mm_list_lock);
207 list_for_each_entry(mm, &uctx->mm_head, entry) {
208 if (len != mm->key.len || phy_addr != mm->key.phy_addr)
209 continue;
210
211 found = true;
212 break;
213 }
214 mutex_unlock(&uctx->mm_list_lock);
215 return found;
216}
217
218struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *ibdev,
219 struct ib_udata *udata)
220{
221 int status;
222 struct ocrdma_ucontext *ctx;
223 struct ocrdma_alloc_ucontext_resp resp;
224 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
225 struct pci_dev *pdev = dev->nic_info.pdev;
226 u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
227
228 if (!udata)
229 return ERR_PTR(-EFAULT);
230 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
231 if (!ctx)
232 return ERR_PTR(-ENOMEM);
233 ctx->dev = dev;
234 INIT_LIST_HEAD(&ctx->mm_head);
235 mutex_init(&ctx->mm_list_lock);
236
237 ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
238 &ctx->ah_tbl.pa, GFP_KERNEL);
239 if (!ctx->ah_tbl.va) {
240 kfree(ctx);
241 return ERR_PTR(-ENOMEM);
242 }
243 memset(ctx->ah_tbl.va, 0, map_len);
244 ctx->ah_tbl.len = map_len;
245
246 resp.ah_tbl_len = ctx->ah_tbl.len;
247 resp.ah_tbl_page = ctx->ah_tbl.pa;
248
249 status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
250 if (status)
251 goto map_err;
252 resp.dev_id = dev->id;
253 resp.max_inline_data = dev->attr.max_inline_data;
254 resp.wqe_size = dev->attr.wqe_size;
255 resp.rqe_size = dev->attr.rqe_size;
256 resp.dpp_wqe_size = dev->attr.wqe_size;
257 resp.rsvd = 0;
258
259 memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
260 status = ib_copy_to_udata(udata, &resp, sizeof(resp));
261 if (status)
262 goto cpy_err;
263 return &ctx->ibucontext;
264
265cpy_err:
266 ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
267map_err:
268 dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
269 ctx->ah_tbl.pa);
270 kfree(ctx);
271 return ERR_PTR(status);
272}
273
274int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
275{
276 struct ocrdma_mm *mm, *tmp;
277 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
278 struct pci_dev *pdev = uctx->dev->nic_info.pdev;
279
280 ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
281 dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
282 uctx->ah_tbl.pa);
283
284 list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
285 list_del(&mm->entry);
286 kfree(mm);
287 }
288 kfree(uctx);
289 return 0;
290}
291
292int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
293{
294 struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
295 struct ocrdma_dev *dev = ucontext->dev;
296 unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
297 u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
298 unsigned long len = (vma->vm_end - vma->vm_start);
299 int status = 0;
300 bool found;
301
302 if (vma->vm_start & (PAGE_SIZE - 1))
303 return -EINVAL;
304 found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
305 if (!found)
306 return -EINVAL;
307
308 if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
309 dev->nic_info.db_total_size)) &&
310 (len <= dev->nic_info.db_page_size)) {
311 /* doorbell mapping */
312 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
313 len, vma->vm_page_prot);
314 } else if (dev->nic_info.dpp_unmapped_len &&
315 (vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
316 (vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
317 dev->nic_info.dpp_unmapped_len)) &&
318 (len <= dev->nic_info.dpp_unmapped_len)) {
319 /* dpp area mapping */
320 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
321 status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
322 len, vma->vm_page_prot);
323 } else {
324 /* queue memory mapping */
325 status = remap_pfn_range(vma, vma->vm_start,
326 vma->vm_pgoff, len, vma->vm_page_prot);
327 }
328 return status;
329}
330
331static int ocrdma_copy_pd_uresp(struct ocrdma_pd *pd,
332 struct ib_ucontext *ib_ctx,
333 struct ib_udata *udata)
334{
335 int status;
336 u64 db_page_addr;
337 u64 dpp_page_addr = 0;
338 u32 db_page_size;
339 struct ocrdma_alloc_pd_uresp rsp;
340 struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ib_ctx);
341
342 rsp.id = pd->id;
343 rsp.dpp_enabled = pd->dpp_enabled;
344 db_page_addr = pd->dev->nic_info.unmapped_db +
345 (pd->id * pd->dev->nic_info.db_page_size);
346 db_page_size = pd->dev->nic_info.db_page_size;
347
348 status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
349 if (status)
350 return status;
351
352 if (pd->dpp_enabled) {
353 dpp_page_addr = pd->dev->nic_info.dpp_unmapped_addr +
354 (pd->id * OCRDMA_DPP_PAGE_SIZE);
355 status = ocrdma_add_mmap(uctx, dpp_page_addr,
356 OCRDMA_DPP_PAGE_SIZE);
357 if (status)
358 goto dpp_map_err;
359 rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
360 rsp.dpp_page_addr_lo = dpp_page_addr;
361 }
362
363 status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
364 if (status)
365 goto ucopy_err;
366
367 pd->uctx = uctx;
368 return 0;
369
370ucopy_err:
371 if (pd->dpp_enabled)
372 ocrdma_del_mmap(pd->uctx, dpp_page_addr, OCRDMA_DPP_PAGE_SIZE);
373dpp_map_err:
374 ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
375 return status;
376}
377
378struct ib_pd *ocrdma_alloc_pd(struct ib_device *ibdev,
379 struct ib_ucontext *context,
380 struct ib_udata *udata)
381{
382 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
383 struct ocrdma_pd *pd;
384 int status;
385
386 pd = kzalloc(sizeof(*pd), GFP_KERNEL);
387 if (!pd)
388 return ERR_PTR(-ENOMEM);
389 pd->dev = dev;
390 if (udata && context) {
391 pd->dpp_enabled = (dev->nic_info.dev_family ==
392 OCRDMA_GEN2_FAMILY) ? true : false;
393 pd->num_dpp_qp =
394 pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0;
395 }
396 status = ocrdma_mbx_alloc_pd(dev, pd);
397 if (status) {
398 kfree(pd);
399 return ERR_PTR(status);
400 }
401 atomic_set(&pd->use_cnt, 0);
402
403 if (udata && context) {
404 status = ocrdma_copy_pd_uresp(pd, context, udata);
405 if (status)
406 goto err;
407 }
408 return &pd->ibpd;
409
410err:
411 ocrdma_dealloc_pd(&pd->ibpd);
412 return ERR_PTR(status);
413}
414
415int ocrdma_dealloc_pd(struct ib_pd *ibpd)
416{
417 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
418 struct ocrdma_dev *dev = pd->dev;
419 int status;
420 u64 usr_db;
421
422 if (atomic_read(&pd->use_cnt)) {
423 ocrdma_err("%s(%d) pd=0x%x is in use.\n",
424 __func__, dev->id, pd->id);
425 status = -EFAULT;
426 goto dealloc_err;
427 }
428 status = ocrdma_mbx_dealloc_pd(dev, pd);
429 if (pd->uctx) {
430 u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
431 (pd->id * OCRDMA_DPP_PAGE_SIZE);
432 if (pd->dpp_enabled)
433 ocrdma_del_mmap(pd->uctx, dpp_db, OCRDMA_DPP_PAGE_SIZE);
434 usr_db = dev->nic_info.unmapped_db +
435 (pd->id * dev->nic_info.db_page_size);
436 ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
437 }
438 kfree(pd);
439dealloc_err:
440 return status;
441}
442
443static struct ocrdma_mr *ocrdma_alloc_lkey(struct ib_pd *ibpd,
444 int acc, u32 num_pbls,
445 u32 addr_check)
446{
447 int status;
448 struct ocrdma_mr *mr;
449 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
450 struct ocrdma_dev *dev = pd->dev;
451
452 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
453 ocrdma_err("%s(%d) leaving err, invalid access rights\n",
454 __func__, dev->id);
455 return ERR_PTR(-EINVAL);
456 }
457
458 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
459 if (!mr)
460 return ERR_PTR(-ENOMEM);
461 mr->hwmr.dev = dev;
462 mr->hwmr.fr_mr = 0;
463 mr->hwmr.local_rd = 1;
464 mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
465 mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
466 mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
467 mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
468 mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
469 mr->hwmr.num_pbls = num_pbls;
470
471 status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pd->id, addr_check);
472 if (status) {
473 kfree(mr);
474 return ERR_PTR(-ENOMEM);
475 }
476 mr->pd = pd;
477 atomic_inc(&pd->use_cnt);
478 mr->ibmr.lkey = mr->hwmr.lkey;
479 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
480 mr->ibmr.rkey = mr->hwmr.lkey;
481 return mr;
482}
483
484struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
485{
486 struct ocrdma_mr *mr;
487
488 mr = ocrdma_alloc_lkey(ibpd, acc, 0, OCRDMA_ADDR_CHECK_DISABLE);
489 if (IS_ERR(mr))
490 return ERR_CAST(mr);
491
492 return &mr->ibmr;
493}
494
495static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
496 struct ocrdma_hw_mr *mr)
497{
498 struct pci_dev *pdev = dev->nic_info.pdev;
499 int i = 0;
500
501 if (mr->pbl_table) {
502 for (i = 0; i < mr->num_pbls; i++) {
503 if (!mr->pbl_table[i].va)
504 continue;
505 dma_free_coherent(&pdev->dev, mr->pbl_size,
506 mr->pbl_table[i].va,
507 mr->pbl_table[i].pa);
508 }
509 kfree(mr->pbl_table);
510 mr->pbl_table = NULL;
511 }
512}
513
514static int ocrdma_get_pbl_info(struct ocrdma_mr *mr, u32 num_pbes)
515{
516 u32 num_pbls = 0;
517 u32 idx = 0;
518 int status = 0;
519 u32 pbl_size;
520
521 do {
522 pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
523 if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
524 status = -EFAULT;
525 break;
526 }
527 num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
528 num_pbls = num_pbls / (pbl_size / sizeof(u64));
529 idx++;
530 } while (num_pbls >= mr->hwmr.dev->attr.max_num_mr_pbl);
531
532 mr->hwmr.num_pbes = num_pbes;
533 mr->hwmr.num_pbls = num_pbls;
534 mr->hwmr.pbl_size = pbl_size;
535 return status;
536}
537
538static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
539{
540 int status = 0;
541 int i;
542 u32 dma_len = mr->pbl_size;
543 struct pci_dev *pdev = dev->nic_info.pdev;
544 void *va;
545 dma_addr_t pa;
546
547 mr->pbl_table = kzalloc(sizeof(struct ocrdma_pbl) *
548 mr->num_pbls, GFP_KERNEL);
549
550 if (!mr->pbl_table)
551 return -ENOMEM;
552
553 for (i = 0; i < mr->num_pbls; i++) {
554 va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
555 if (!va) {
556 ocrdma_free_mr_pbl_tbl(dev, mr);
557 status = -ENOMEM;
558 break;
559 }
560 memset(va, 0, dma_len);
561 mr->pbl_table[i].va = va;
562 mr->pbl_table[i].pa = pa;
563 }
564 return status;
565}
566
567static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
568 u32 num_pbes)
569{
570 struct ocrdma_pbe *pbe;
571 struct ib_umem_chunk *chunk;
572 struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
573 struct ib_umem *umem = mr->umem;
574 int i, shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
575
576 if (!mr->hwmr.num_pbes)
577 return;
578
579 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
580 pbe_cnt = 0;
581
582 shift = ilog2(umem->page_size);
583
584 list_for_each_entry(chunk, &umem->chunk_list, list) {
585 /* get all the dma regions from the chunk. */
586 for (i = 0; i < chunk->nmap; i++) {
587 pages = sg_dma_len(&chunk->page_list[i]) >> shift;
588 for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
589 /* store the page address in pbe */
590 pbe->pa_lo =
591 cpu_to_le32(sg_dma_address
592 (&chunk->page_list[i]) +
593 (umem->page_size * pg_cnt));
594 pbe->pa_hi =
595 cpu_to_le32(upper_32_bits
596 ((sg_dma_address
597 (&chunk->page_list[i]) +
598 umem->page_size * pg_cnt)));
599 pbe_cnt += 1;
600 total_num_pbes += 1;
601 pbe++;
602
603 /* if done building pbes, issue the mbx cmd. */
604 if (total_num_pbes == num_pbes)
605 return;
606
607 /* if the given pbl is full storing the pbes,
608 * move to next pbl.
609 */
610 if (pbe_cnt ==
611 (mr->hwmr.pbl_size / sizeof(u64))) {
612 pbl_tbl++;
613 pbe = (struct ocrdma_pbe *)pbl_tbl->va;
614 pbe_cnt = 0;
615 }
616 }
617 }
618 }
619}
620
621struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
622 u64 usr_addr, int acc, struct ib_udata *udata)
623{
624 int status = -ENOMEM;
625 struct ocrdma_dev *dev;
626 struct ocrdma_mr *mr;
627 struct ocrdma_pd *pd;
628 u32 num_pbes;
629
630 pd = get_ocrdma_pd(ibpd);
631 dev = pd->dev;
632
633 if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
634 return ERR_PTR(-EINVAL);
635
636 mr = kzalloc(sizeof(*mr), GFP_KERNEL);
637 if (!mr)
638 return ERR_PTR(status);
639 mr->hwmr.dev = dev;
640 mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
641 if (IS_ERR(mr->umem)) {
642 status = -EFAULT;
643 goto umem_err;
644 }
645 num_pbes = ib_umem_page_count(mr->umem);
646 status = ocrdma_get_pbl_info(mr, num_pbes);
647 if (status)
648 goto umem_err;
649
650 mr->hwmr.pbe_size = mr->umem->page_size;
651 mr->hwmr.fbo = mr->umem->offset;
652 mr->hwmr.va = usr_addr;
653 mr->hwmr.len = len;
654 mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
655 mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
656 mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
657 mr->hwmr.local_rd = 1;
658 mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
659 status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
660 if (status)
661 goto umem_err;
662 build_user_pbes(dev, mr, num_pbes);
663 status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
664 if (status)
665 goto mbx_err;
666 mr->pd = pd;
667 atomic_inc(&pd->use_cnt);
668 mr->ibmr.lkey = mr->hwmr.lkey;
669 if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
670 mr->ibmr.rkey = mr->hwmr.lkey;
671
672 return &mr->ibmr;
673
674mbx_err:
675 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
676umem_err:
677 kfree(mr);
678 return ERR_PTR(status);
679}
680
681int ocrdma_dereg_mr(struct ib_mr *ib_mr)
682{
683 struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
684 struct ocrdma_dev *dev = mr->hwmr.dev;
685 int status;
686
687 status = ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
688
689 if (mr->hwmr.fr_mr == 0)
690 ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
691
692 atomic_dec(&mr->pd->use_cnt);
693 /* it could be user registered memory. */
694 if (mr->umem)
695 ib_umem_release(mr->umem);
696 kfree(mr);
697 return status;
698}
699
700static int ocrdma_copy_cq_uresp(struct ocrdma_cq *cq, struct ib_udata *udata,
701 struct ib_ucontext *ib_ctx)
702{
703 int status;
704 struct ocrdma_ucontext *uctx;
705 struct ocrdma_create_cq_uresp uresp;
706
707 uresp.cq_id = cq->id;
708 uresp.page_size = cq->len;
709 uresp.num_pages = 1;
710 uresp.max_hw_cqe = cq->max_hw_cqe;
711 uresp.page_addr[0] = cq->pa;
712 uresp.db_page_addr = cq->dev->nic_info.unmapped_db;
713 uresp.db_page_size = cq->dev->nic_info.db_page_size;
714 uresp.phase_change = cq->phase_change ? 1 : 0;
715 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
716 if (status) {
717 ocrdma_err("%s(%d) copy error cqid=0x%x.\n",
718 __func__, cq->dev->id, cq->id);
719 goto err;
720 }
721 uctx = get_ocrdma_ucontext(ib_ctx);
722 status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
723 if (status)
724 goto err;
725 status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
726 if (status) {
727 ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
728 goto err;
729 }
730 cq->ucontext = uctx;
731err:
732 return status;
733}
734
735struct ib_cq *ocrdma_create_cq(struct ib_device *ibdev, int entries, int vector,
736 struct ib_ucontext *ib_ctx,
737 struct ib_udata *udata)
738{
739 struct ocrdma_cq *cq;
740 struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
741 int status;
742 struct ocrdma_create_cq_ureq ureq;
743
744 if (udata) {
745 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
746 return ERR_PTR(-EFAULT);
747 } else
748 ureq.dpp_cq = 0;
749 cq = kzalloc(sizeof(*cq), GFP_KERNEL);
750 if (!cq)
751 return ERR_PTR(-ENOMEM);
752
753 spin_lock_init(&cq->cq_lock);
754 spin_lock_init(&cq->comp_handler_lock);
755 atomic_set(&cq->use_cnt, 0);
756 INIT_LIST_HEAD(&cq->sq_head);
757 INIT_LIST_HEAD(&cq->rq_head);
758 cq->dev = dev;
759
760 status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq);
761 if (status) {
762 kfree(cq);
763 return ERR_PTR(status);
764 }
765 if (ib_ctx) {
766 status = ocrdma_copy_cq_uresp(cq, udata, ib_ctx);
767 if (status)
768 goto ctx_err;
769 }
770 cq->phase = OCRDMA_CQE_VALID;
771 cq->arm_needed = true;
772 dev->cq_tbl[cq->id] = cq;
773
774 return &cq->ibcq;
775
776ctx_err:
777 ocrdma_mbx_destroy_cq(dev, cq);
778 kfree(cq);
779 return ERR_PTR(status);
780}
781
782int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
783 struct ib_udata *udata)
784{
785 int status = 0;
786 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
787
788 if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
789 status = -EINVAL;
790 return status;
791 }
792 ibcq->cqe = new_cnt;
793 return status;
794}
795
796int ocrdma_destroy_cq(struct ib_cq *ibcq)
797{
798 int status;
799 struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
800 struct ocrdma_dev *dev = cq->dev;
801
802 if (atomic_read(&cq->use_cnt))
803 return -EINVAL;
804
805 status = ocrdma_mbx_destroy_cq(dev, cq);
806
807 if (cq->ucontext) {
808 ocrdma_del_mmap(cq->ucontext, (u64) cq->pa, cq->len);
809 ocrdma_del_mmap(cq->ucontext, dev->nic_info.unmapped_db,
810 dev->nic_info.db_page_size);
811 }
812 dev->cq_tbl[cq->id] = NULL;
813
814 kfree(cq);
815 return status;
816}
817
818static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
819{
820 int status = -EINVAL;
821
822 if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
823 dev->qp_tbl[qp->id] = qp;
824 status = 0;
825 }
826 return status;
827}
828
829static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
830{
831 dev->qp_tbl[qp->id] = NULL;
832}
833
834static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
835 struct ib_qp_init_attr *attrs)
836{
837 if (attrs->qp_type != IB_QPT_GSI &&
838 attrs->qp_type != IB_QPT_RC &&
839 attrs->qp_type != IB_QPT_UD) {
840 ocrdma_err("%s(%d) unsupported qp type=0x%x requested\n",
841 __func__, dev->id, attrs->qp_type);
842 return -EINVAL;
843 }
844 if (attrs->cap.max_send_wr > dev->attr.max_wqe) {
845 ocrdma_err("%s(%d) unsupported send_wr=0x%x requested\n",
846 __func__, dev->id, attrs->cap.max_send_wr);
847 ocrdma_err("%s(%d) supported send_wr=0x%x\n",
848 __func__, dev->id, dev->attr.max_wqe);
849 return -EINVAL;
850 }
851 if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
852 ocrdma_err("%s(%d) unsupported recv_wr=0x%x requested\n",
853 __func__, dev->id, attrs->cap.max_recv_wr);
854 ocrdma_err("%s(%d) supported recv_wr=0x%x\n",
855 __func__, dev->id, dev->attr.max_rqe);
856 return -EINVAL;
857 }
858 if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
859 ocrdma_err("%s(%d) unsupported inline data size=0x%x"
860 " requested\n", __func__, dev->id,
861 attrs->cap.max_inline_data);
862 ocrdma_err("%s(%d) supported inline data size=0x%x\n",
863 __func__, dev->id, dev->attr.max_inline_data);
864 return -EINVAL;
865 }
866 if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
867 ocrdma_err("%s(%d) unsupported send_sge=0x%x requested\n",
868 __func__, dev->id, attrs->cap.max_send_sge);
869 ocrdma_err("%s(%d) supported send_sge=0x%x\n",
870 __func__, dev->id, dev->attr.max_send_sge);
871 return -EINVAL;
872 }
873 if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
874 ocrdma_err("%s(%d) unsupported recv_sge=0x%x requested\n",
875 __func__, dev->id, attrs->cap.max_recv_sge);
876 ocrdma_err("%s(%d) supported recv_sge=0x%x\n",
877 __func__, dev->id, dev->attr.max_recv_sge);
878 return -EINVAL;
879 }
880 /* unprivileged user space cannot create special QP */
881 if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) {
882 ocrdma_err
883 ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
884 __func__, dev->id, attrs->qp_type);
885 return -EINVAL;
886 }
887 /* allow creating only one GSI type of QP */
888 if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
889 ocrdma_err("%s(%d) GSI special QPs already created.\n",
890 __func__, dev->id);
891 return -EINVAL;
892 }
893 /* verify consumer QPs are not trying to use GSI QP's CQ */
894 if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
895 if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
896 (dev->gsi_sqcq == get_ocrdma_cq(attrs->recv_cq)) ||
897 (dev->gsi_rqcq == get_ocrdma_cq(attrs->send_cq)) ||
898 (dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
899 ocrdma_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
900 __func__, dev->id);
901 return -EINVAL;
902 }
903 }
904 return 0;
905}
906
907static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
908 struct ib_udata *udata, int dpp_offset,
909 int dpp_credit_lmt, int srq)
910{
911 int status = 0;
912 u64 usr_db;
913 struct ocrdma_create_qp_uresp uresp;
914 struct ocrdma_dev *dev = qp->dev;
915 struct ocrdma_pd *pd = qp->pd;
916
917 memset(&uresp, 0, sizeof(uresp));
918 usr_db = dev->nic_info.unmapped_db +
919 (pd->id * dev->nic_info.db_page_size);
920 uresp.qp_id = qp->id;
921 uresp.sq_dbid = qp->sq.dbid;
922 uresp.num_sq_pages = 1;
923 uresp.sq_page_size = qp->sq.len;
924 uresp.sq_page_addr[0] = qp->sq.pa;
925 uresp.num_wqe_allocated = qp->sq.max_cnt;
926 if (!srq) {
927 uresp.rq_dbid = qp->rq.dbid;
928 uresp.num_rq_pages = 1;
929 uresp.rq_page_size = qp->rq.len;
930 uresp.rq_page_addr[0] = qp->rq.pa;
931 uresp.num_rqe_allocated = qp->rq.max_cnt;
932 }
933 uresp.db_page_addr = usr_db;
934 uresp.db_page_size = dev->nic_info.db_page_size;
935 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
936 uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
937 uresp.db_rq_offset = ((qp->id & 0xFFFF) < 128) ?
938 OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET;
939 uresp.db_shift = (qp->id < 128) ? 24 : 16;
940 } else {
941 uresp.db_sq_offset = OCRDMA_DB_SQ_OFFSET;
942 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
943 uresp.db_shift = 16;
944 }
945
946 if (qp->dpp_enabled) {
947 uresp.dpp_credit = dpp_credit_lmt;
948 uresp.dpp_offset = dpp_offset;
949 }
950 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
951 if (status) {
952 ocrdma_err("%s(%d) user copy error.\n", __func__, dev->id);
953 goto err;
954 }
955 status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
956 uresp.sq_page_size);
957 if (status)
958 goto err;
959
960 if (!srq) {
961 status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
962 uresp.rq_page_size);
963 if (status)
964 goto rq_map_err;
965 }
966 return status;
967rq_map_err:
968 ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
969err:
970 return status;
971}
972
973static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
974 struct ocrdma_pd *pd)
975{
976 if (dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
977 qp->sq_db = dev->nic_info.db +
978 (pd->id * dev->nic_info.db_page_size) +
979 OCRDMA_DB_GEN2_SQ_OFFSET;
980 qp->rq_db = dev->nic_info.db +
981 (pd->id * dev->nic_info.db_page_size) +
982 ((qp->id < 128) ?
983 OCRDMA_DB_GEN2_RQ1_OFFSET : OCRDMA_DB_GEN2_RQ2_OFFSET);
984 } else {
985 qp->sq_db = dev->nic_info.db +
986 (pd->id * dev->nic_info.db_page_size) +
987 OCRDMA_DB_SQ_OFFSET;
988 qp->rq_db = dev->nic_info.db +
989 (pd->id * dev->nic_info.db_page_size) +
990 OCRDMA_DB_RQ_OFFSET;
991 }
992}
993
994static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
995{
996 qp->wqe_wr_id_tbl =
997 kzalloc(sizeof(*(qp->wqe_wr_id_tbl)) * qp->sq.max_cnt,
998 GFP_KERNEL);
999 if (qp->wqe_wr_id_tbl == NULL)
1000 return -ENOMEM;
1001 qp->rqe_wr_id_tbl =
1002 kzalloc(sizeof(u64) * qp->rq.max_cnt, GFP_KERNEL);
1003 if (qp->rqe_wr_id_tbl == NULL)
1004 return -ENOMEM;
1005
1006 return 0;
1007}
1008
1009static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1010 struct ocrdma_pd *pd,
1011 struct ib_qp_init_attr *attrs)
1012{
1013 qp->pd = pd;
1014 spin_lock_init(&qp->q_lock);
1015 INIT_LIST_HEAD(&qp->sq_entry);
1016 INIT_LIST_HEAD(&qp->rq_entry);
1017
1018 qp->qp_type = attrs->qp_type;
1019 qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1020 qp->max_inline_data = attrs->cap.max_inline_data;
1021 qp->sq.max_sges = attrs->cap.max_send_sge;
1022 qp->rq.max_sges = attrs->cap.max_recv_sge;
1023 qp->state = OCRDMA_QPS_RST;
1024}
1025
1026static void ocrdma_set_qp_use_cnt(struct ocrdma_qp *qp, struct ocrdma_pd *pd)
1027{
1028 atomic_inc(&pd->use_cnt);
1029 atomic_inc(&qp->sq_cq->use_cnt);
1030 atomic_inc(&qp->rq_cq->use_cnt);
1031 if (qp->srq)
1032 atomic_inc(&qp->srq->use_cnt);
1033 qp->ibqp.qp_num = qp->id;
1034}
1035
1036static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1037 struct ib_qp_init_attr *attrs)
1038{
1039 if (attrs->qp_type == IB_QPT_GSI) {
1040 dev->gsi_qp_created = 1;
1041 dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1042 dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1043 }
1044}
1045
1046struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1047 struct ib_qp_init_attr *attrs,
1048 struct ib_udata *udata)
1049{
1050 int status;
1051 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1052 struct ocrdma_qp *qp;
1053 struct ocrdma_dev *dev = pd->dev;
1054 struct ocrdma_create_qp_ureq ureq;
1055 u16 dpp_credit_lmt, dpp_offset;
1056
1057 status = ocrdma_check_qp_params(ibpd, dev, attrs);
1058 if (status)
1059 goto gen_err;
1060
1061 memset(&ureq, 0, sizeof(ureq));
1062 if (udata) {
1063 if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1064 return ERR_PTR(-EFAULT);
1065 }
1066 qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1067 if (!qp) {
1068 status = -ENOMEM;
1069 goto gen_err;
1070 }
1071 qp->dev = dev;
1072 ocrdma_set_qp_init_params(qp, pd, attrs);
1073
1074 mutex_lock(&dev->dev_lock);
1075 status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1076 ureq.dpp_cq_id,
1077 &dpp_offset, &dpp_credit_lmt);
1078 if (status)
1079 goto mbx_err;
1080
1081 /* user space QP's wr_id table are managed in library */
1082 if (udata == NULL) {
1083 qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1084 OCRDMA_QP_FAST_REG);
1085 status = ocrdma_alloc_wr_id_tbl(qp);
1086 if (status)
1087 goto map_err;
1088 }
1089
1090 status = ocrdma_add_qpn_map(dev, qp);
1091 if (status)
1092 goto map_err;
1093 ocrdma_set_qp_db(dev, qp, pd);
1094 if (udata) {
1095 status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1096 dpp_credit_lmt,
1097 (attrs->srq != NULL));
1098 if (status)
1099 goto cpy_err;
1100 }
1101 ocrdma_store_gsi_qp_cq(dev, attrs);
1102 ocrdma_set_qp_use_cnt(qp, pd);
1103 mutex_unlock(&dev->dev_lock);
1104 return &qp->ibqp;
1105
1106cpy_err:
1107 ocrdma_del_qpn_map(dev, qp);
1108map_err:
1109 ocrdma_mbx_destroy_qp(dev, qp);
1110mbx_err:
1111 mutex_unlock(&dev->dev_lock);
1112 kfree(qp->wqe_wr_id_tbl);
1113 kfree(qp->rqe_wr_id_tbl);
1114 kfree(qp);
1115 ocrdma_err("%s(%d) error=%d\n", __func__, dev->id, status);
1116gen_err:
1117 return ERR_PTR(status);
1118}
1119
1120int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1121 int attr_mask)
1122{
1123 int status = 0;
1124 struct ocrdma_qp *qp;
1125 struct ocrdma_dev *dev;
1126 enum ib_qp_state old_qps;
1127
1128 qp = get_ocrdma_qp(ibqp);
1129 dev = qp->dev;
1130 if (attr_mask & IB_QP_STATE)
1131 status = ocrdma_qp_state_machine(qp, attr->qp_state, &old_qps);
1132 /* if new and previous states are same hw doesn't need to
1133 * know about it.
1134 */
1135 if (status < 0)
1136 return status;
1137 status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask, old_qps);
1138 return status;
1139}
1140
1141int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1142 int attr_mask, struct ib_udata *udata)
1143{
1144 unsigned long flags;
1145 int status = -EINVAL;
1146 struct ocrdma_qp *qp;
1147 struct ocrdma_dev *dev;
1148 enum ib_qp_state old_qps, new_qps;
1149
1150 qp = get_ocrdma_qp(ibqp);
1151 dev = qp->dev;
1152
1153 /* syncronize with multiple context trying to change, retrive qps */
1154 mutex_lock(&dev->dev_lock);
1155 /* syncronize with wqe, rqe posting and cqe processing contexts */
1156 spin_lock_irqsave(&qp->q_lock, flags);
1157 old_qps = get_ibqp_state(qp->state);
1158 if (attr_mask & IB_QP_STATE)
1159 new_qps = attr->qp_state;
1160 else
1161 new_qps = old_qps;
1162 spin_unlock_irqrestore(&qp->q_lock, flags);
1163
1164 if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1165 ocrdma_err("%s(%d) invalid attribute mask=0x%x specified for "
1166 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1167 __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1168 old_qps, new_qps);
1169 goto param_err;
1170 }
1171
1172 status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1173 if (status > 0)
1174 status = 0;
1175param_err:
1176 mutex_unlock(&dev->dev_lock);
1177 return status;
1178}
1179
1180static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1181{
1182 switch (mtu) {
1183 case 256:
1184 return IB_MTU_256;
1185 case 512:
1186 return IB_MTU_512;
1187 case 1024:
1188 return IB_MTU_1024;
1189 case 2048:
1190 return IB_MTU_2048;
1191 case 4096:
1192 return IB_MTU_4096;
1193 default:
1194 return IB_MTU_1024;
1195 }
1196}
1197
1198static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1199{
1200 int ib_qp_acc_flags = 0;
1201
1202 if (qp_cap_flags & OCRDMA_QP_INB_WR)
1203 ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1204 if (qp_cap_flags & OCRDMA_QP_INB_RD)
1205 ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1206 return ib_qp_acc_flags;
1207}
1208
1209int ocrdma_query_qp(struct ib_qp *ibqp,
1210 struct ib_qp_attr *qp_attr,
1211 int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1212{
1213 int status;
1214 u32 qp_state;
1215 struct ocrdma_qp_params params;
1216 struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1217 struct ocrdma_dev *dev = qp->dev;
1218
1219 memset(&params, 0, sizeof(params));
1220 mutex_lock(&dev->dev_lock);
1221 status = ocrdma_mbx_query_qp(dev, qp, &params);
1222 mutex_unlock(&dev->dev_lock);
1223 if (status)
1224 goto mbx_err;
1225 qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
1226 qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
1227 qp_attr->path_mtu =
1228 ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1229 OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1230 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1231 qp_attr->path_mig_state = IB_MIG_MIGRATED;
1232 qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1233 qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1234 qp_attr->dest_qp_num =
1235 params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1236
1237 qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1238 qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1239 qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1240 qp_attr->cap.max_send_sge = qp->sq.max_sges;
1241 qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1242 qp_attr->cap.max_inline_data = dev->attr.max_inline_data;
1243 qp_init_attr->cap = qp_attr->cap;
1244 memcpy(&qp_attr->ah_attr.grh.dgid, &params.dgid[0],
1245 sizeof(params.dgid));
1246 qp_attr->ah_attr.grh.flow_label = params.rnt_rc_sl_fl &
1247 OCRDMA_QP_PARAMS_FLOW_LABEL_MASK;
1248 qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx;
1249 qp_attr->ah_attr.grh.hop_limit = (params.hop_lmt_rq_psn &
1250 OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1251 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT;
1252 qp_attr->ah_attr.grh.traffic_class = (params.tclass_sq_psn &
1253 OCRDMA_QP_PARAMS_SQ_PSN_MASK) >>
1254 OCRDMA_QP_PARAMS_TCLASS_SHIFT;
1255
1256 qp_attr->ah_attr.ah_flags = IB_AH_GRH;
1257 qp_attr->ah_attr.port_num = 1;
1258 qp_attr->ah_attr.sl = (params.rnt_rc_sl_fl &
1259 OCRDMA_QP_PARAMS_SL_MASK) >>
1260 OCRDMA_QP_PARAMS_SL_SHIFT;
1261 qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1262 OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1263 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1264 qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1265 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1266 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1267 qp_attr->retry_cnt =
1268 (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1269 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1270 qp_attr->min_rnr_timer = 0;
1271 qp_attr->pkey_index = 0;
1272 qp_attr->port_num = 1;
1273 qp_attr->ah_attr.src_path_bits = 0;
1274 qp_attr->ah_attr.static_rate = 0;
1275 qp_attr->alt_pkey_index = 0;
1276 qp_attr->alt_port_num = 0;
1277 qp_attr->alt_timeout = 0;
1278 memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1279 qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1280 OCRDMA_QP_PARAMS_STATE_SHIFT;
1281 qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1282 qp_attr->max_dest_rd_atomic =
1283 params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1284 qp_attr->max_rd_atomic =
1285 params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1286 qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1287 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1288mbx_err:
1289 return status;
1290}
1291
1292static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, int idx)
1293{
1294 int i = idx / 32;
1295 unsigned int mask = (1 << (idx % 32));
1296
1297 if (srq->idx_bit_fields[i] & mask)
1298 srq->idx_bit_fields[i] &= ~mask;
1299 else
1300 srq->idx_bit_fields[i] |= mask;
1301}
1302
1303static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1304{
1305 int free_cnt;
1306 if (q->head >= q->tail)
1307 free_cnt = (q->max_cnt - q->head) + q->tail;
1308 else
1309 free_cnt = q->tail - q->head;
1310 return free_cnt;
1311}
1312
1313static int is_hw_sq_empty(struct ocrdma_qp *qp)
1314{
1315 return (qp->sq.tail == qp->sq.head &&
1316 ocrdma_hwq_free_cnt(&qp->sq) ? 1 : 0);
1317}
1318
1319static int is_hw_rq_empty(struct ocrdma_qp *qp)
1320{
1321 return (qp->rq.tail == qp->rq.head) ? 1 : 0;
1322}
1323
1324static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1325{
1326 return q->va + (q->head * q->entry_size);
1327}
1328
1329static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1330 u32 idx)
1331{
1332 return q->va + (idx * q->entry_size);
1333}
1334
1335static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1336{
1337 q->head = (q->head + 1) & q->max_wqe_idx;
1338}
1339
1340static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1341{
1342 q->tail = (q->tail + 1) & q->max_wqe_idx;
1343}
1344
1345/* discard the cqe for a given QP */
1346static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1347{
1348 unsigned long cq_flags;
1349 unsigned long flags;
1350 int discard_cnt = 0;
1351 u32 cur_getp, stop_getp;
1352 struct ocrdma_cqe *cqe;
1353 u32 qpn = 0;
1354
1355 spin_lock_irqsave(&cq->cq_lock, cq_flags);
1356
1357 /* traverse through the CQEs in the hw CQ,
1358 * find the matching CQE for a given qp,
1359 * mark the matching one discarded by clearing qpn.
1360 * ring the doorbell in the poll_cq() as
1361 * we don't complete out of order cqe.
1362 */
1363
1364 cur_getp = cq->getp;
1365 /* find upto when do we reap the cq. */
1366 stop_getp = cur_getp;
1367 do {
1368 if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1369 break;
1370
1371 cqe = cq->va + cur_getp;
1372 /* if (a) done reaping whole hw cq, or
1373 * (b) qp_xq becomes empty.
1374 * then exit
1375 */
1376 qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1377 /* if previously discarded cqe found, skip that too. */
1378 /* check for matching qp */
1379 if (qpn == 0 || qpn != qp->id)
1380 goto skip_cqe;
1381
1382 /* mark cqe discarded so that it is not picked up later
1383 * in the poll_cq().
1384 */
1385 discard_cnt += 1;
1386 cqe->cmn.qpn = 0;
1387 if (is_cqe_for_sq(cqe))
1388 ocrdma_hwq_inc_tail(&qp->sq);
1389 else {
1390 if (qp->srq) {
1391 spin_lock_irqsave(&qp->srq->q_lock, flags);
1392 ocrdma_hwq_inc_tail(&qp->srq->rq);
1393 ocrdma_srq_toggle_bit(qp->srq, cur_getp);
1394 spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1395
1396 } else
1397 ocrdma_hwq_inc_tail(&qp->rq);
1398 }
1399skip_cqe:
1400 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1401 } while (cur_getp != stop_getp);
1402 spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1403}
1404
1405static void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1406{
1407 int found = false;
1408 unsigned long flags;
1409 struct ocrdma_dev *dev = qp->dev;
1410 /* sync with any active CQ poll */
1411
1412 spin_lock_irqsave(&dev->flush_q_lock, flags);
1413 found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1414 if (found)
1415 list_del(&qp->sq_entry);
1416 if (!qp->srq) {
1417 found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1418 if (found)
1419 list_del(&qp->rq_entry);
1420 }
1421 spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1422}
1423
1424int ocrdma_destroy_qp(struct ib_qp *ibqp)
1425{
1426 int status;
1427 struct ocrdma_pd *pd;
1428 struct ocrdma_qp *qp;
1429 struct ocrdma_dev *dev;
1430 struct ib_qp_attr attrs;
1431 int attr_mask = IB_QP_STATE;
1432 unsigned long flags;
1433
1434 qp = get_ocrdma_qp(ibqp);
1435 dev = qp->dev;
1436
1437 attrs.qp_state = IB_QPS_ERR;
1438 pd = qp->pd;
1439
1440 /* change the QP state to ERROR */
1441 _ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1442
1443 /* ensure that CQEs for newly created QP (whose id may be same with
1444 * one which just getting destroyed are same), dont get
1445 * discarded until the old CQEs are discarded.
1446 */
1447 mutex_lock(&dev->dev_lock);
1448 status = ocrdma_mbx_destroy_qp(dev, qp);
1449
1450 /*
1451 * acquire CQ lock while destroy is in progress, in order to
1452 * protect against proessing in-flight CQEs for this QP.
1453 */
1454 spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1455 if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1456 spin_lock(&qp->rq_cq->cq_lock);
1457
1458 ocrdma_del_qpn_map(dev, qp);
1459
1460 if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
1461 spin_unlock(&qp->rq_cq->cq_lock);
1462 spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1463
1464 if (!pd->uctx) {
1465 ocrdma_discard_cqes(qp, qp->sq_cq);
1466 ocrdma_discard_cqes(qp, qp->rq_cq);
1467 }
1468 mutex_unlock(&dev->dev_lock);
1469
1470 if (pd->uctx) {
1471 ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa, qp->sq.len);
1472 if (!qp->srq)
1473 ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa, qp->rq.len);
1474 }
1475
1476 ocrdma_del_flush_qp(qp);
1477
1478 atomic_dec(&qp->pd->use_cnt);
1479 atomic_dec(&qp->sq_cq->use_cnt);
1480 atomic_dec(&qp->rq_cq->use_cnt);
1481 if (qp->srq)
1482 atomic_dec(&qp->srq->use_cnt);
1483 kfree(qp->wqe_wr_id_tbl);
1484 kfree(qp->rqe_wr_id_tbl);
1485 kfree(qp);
1486 return status;
1487}
1488
1489static int ocrdma_copy_srq_uresp(struct ocrdma_srq *srq, struct ib_udata *udata)
1490{
1491 int status;
1492 struct ocrdma_create_srq_uresp uresp;
1493
1494 uresp.rq_dbid = srq->rq.dbid;
1495 uresp.num_rq_pages = 1;
1496 uresp.rq_page_addr[0] = srq->rq.pa;
1497 uresp.rq_page_size = srq->rq.len;
1498 uresp.db_page_addr = srq->dev->nic_info.unmapped_db +
1499 (srq->pd->id * srq->dev->nic_info.db_page_size);
1500 uresp.db_page_size = srq->dev->nic_info.db_page_size;
1501 uresp.num_rqe_allocated = srq->rq.max_cnt;
1502 if (srq->dev->nic_info.dev_family == OCRDMA_GEN2_FAMILY) {
1503 uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ1_OFFSET;
1504 uresp.db_shift = 24;
1505 } else {
1506 uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1507 uresp.db_shift = 16;
1508 }
1509
1510 status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1511 if (status)
1512 return status;
1513 status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1514 uresp.rq_page_size);
1515 if (status)
1516 return status;
1517 return status;
1518}
1519
1520struct ib_srq *ocrdma_create_srq(struct ib_pd *ibpd,
1521 struct ib_srq_init_attr *init_attr,
1522 struct ib_udata *udata)
1523{
1524 int status = -ENOMEM;
1525 struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1526 struct ocrdma_dev *dev = pd->dev;
1527 struct ocrdma_srq *srq;
1528
1529 if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1530 return ERR_PTR(-EINVAL);
1531 if (init_attr->attr.max_wr > dev->attr.max_rqe)
1532 return ERR_PTR(-EINVAL);
1533
1534 srq = kzalloc(sizeof(*srq), GFP_KERNEL);
1535 if (!srq)
1536 return ERR_PTR(status);
1537
1538 spin_lock_init(&srq->q_lock);
1539 srq->dev = dev;
1540 srq->pd = pd;
1541 srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1542 status = ocrdma_mbx_create_srq(srq, init_attr, pd);
1543 if (status)
1544 goto err;
1545
1546 if (udata == NULL) {
1547 srq->rqe_wr_id_tbl = kzalloc(sizeof(u64) * srq->rq.max_cnt,
1548 GFP_KERNEL);
1549 if (srq->rqe_wr_id_tbl == NULL)
1550 goto arm_err;
1551
1552 srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1553 (srq->rq.max_cnt % 32 ? 1 : 0);
1554 srq->idx_bit_fields =
1555 kmalloc(srq->bit_fields_len * sizeof(u32), GFP_KERNEL);
1556 if (srq->idx_bit_fields == NULL)
1557 goto arm_err;
1558 memset(srq->idx_bit_fields, 0xff,
1559 srq->bit_fields_len * sizeof(u32));
1560 }
1561
1562 if (init_attr->attr.srq_limit) {
1563 status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1564 if (status)
1565 goto arm_err;
1566 }
1567
1568 atomic_set(&srq->use_cnt, 0);
1569 if (udata) {
1570 status = ocrdma_copy_srq_uresp(srq, udata);
1571 if (status)
1572 goto arm_err;
1573 }
1574
1575 atomic_inc(&pd->use_cnt);
1576 return &srq->ibsrq;
1577
1578arm_err:
1579 ocrdma_mbx_destroy_srq(dev, srq);
1580err:
1581 kfree(srq->rqe_wr_id_tbl);
1582 kfree(srq->idx_bit_fields);
1583 kfree(srq);
1584 return ERR_PTR(status);
1585}
1586
1587int ocrdma_modify_srq(struct ib_srq *ibsrq,
1588 struct ib_srq_attr *srq_attr,
1589 enum ib_srq_attr_mask srq_attr_mask,
1590 struct ib_udata *udata)
1591{
1592 int status = 0;
1593 struct ocrdma_srq *srq;
1594
1595 srq = get_ocrdma_srq(ibsrq);
1596 if (srq_attr_mask & IB_SRQ_MAX_WR)
1597 status = -EINVAL;
1598 else
1599 status = ocrdma_mbx_modify_srq(srq, srq_attr);
1600 return status;
1601}
1602
1603int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1604{
1605 int status;
1606 struct ocrdma_srq *srq;
1607
1608 srq = get_ocrdma_srq(ibsrq);
1609 status = ocrdma_mbx_query_srq(srq, srq_attr);
1610 return status;
1611}
1612
1613int ocrdma_destroy_srq(struct ib_srq *ibsrq)
1614{
1615 int status;
1616 struct ocrdma_srq *srq;
1617 struct ocrdma_dev *dev;
1618
1619 srq = get_ocrdma_srq(ibsrq);
1620 dev = srq->dev;
1621 if (atomic_read(&srq->use_cnt)) {
1622 ocrdma_err("%s(%d) err, srq=0x%x in use\n",
1623 __func__, dev->id, srq->id);
1624 return -EAGAIN;
1625 }
1626
1627 status = ocrdma_mbx_destroy_srq(dev, srq);
1628
1629 if (srq->pd->uctx)
1630 ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa, srq->rq.len);
1631
1632 atomic_dec(&srq->pd->use_cnt);
1633 kfree(srq->idx_bit_fields);
1634 kfree(srq->rqe_wr_id_tbl);
1635 kfree(srq);
1636 return status;
1637}
1638
1639/* unprivileged verbs and their support functions. */
1640static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1641 struct ocrdma_hdr_wqe *hdr,
1642 struct ib_send_wr *wr)
1643{
1644 struct ocrdma_ewqe_ud_hdr *ud_hdr =
1645 (struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1646 struct ocrdma_ah *ah = get_ocrdma_ah(wr->wr.ud.ah);
1647
1648 ud_hdr->rsvd_dest_qpn = wr->wr.ud.remote_qpn;
1649 if (qp->qp_type == IB_QPT_GSI)
1650 ud_hdr->qkey = qp->qkey;
1651 else
1652 ud_hdr->qkey = wr->wr.ud.remote_qkey;
1653 ud_hdr->rsvd_ahid = ah->id;
1654}
1655
1656static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1657 struct ocrdma_sge *sge, int num_sge,
1658 struct ib_sge *sg_list)
1659{
1660 int i;
1661
1662 for (i = 0; i < num_sge; i++) {
1663 sge[i].lrkey = sg_list[i].lkey;
1664 sge[i].addr_lo = sg_list[i].addr;
1665 sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1666 sge[i].len = sg_list[i].length;
1667 hdr->total_len += sg_list[i].length;
1668 }
1669 if (num_sge == 0)
1670 memset(sge, 0, sizeof(*sge));
1671}
1672
1673static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1674 struct ocrdma_hdr_wqe *hdr,
1675 struct ocrdma_sge *sge,
1676 struct ib_send_wr *wr, u32 wqe_size)
1677{
1678 if (wr->send_flags & IB_SEND_INLINE) {
1679 if (wr->sg_list[0].length > qp->max_inline_data) {
1680 ocrdma_err("%s() supported_len=0x%x,"
1681 " unspported len req=0x%x\n", __func__,
1682 qp->max_inline_data, wr->sg_list[0].length);
1683 return -EINVAL;
1684 }
1685 memcpy(sge,
1686 (void *)(unsigned long)wr->sg_list[0].addr,
1687 wr->sg_list[0].length);
1688 hdr->total_len = wr->sg_list[0].length;
1689 wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1690 hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1691 } else {
1692 ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1693 if (wr->num_sge)
1694 wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1695 else
1696 wqe_size += sizeof(struct ocrdma_sge);
1697 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1698 }
1699 hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1700 return 0;
1701}
1702
1703static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1704 struct ib_send_wr *wr)
1705{
1706 int status;
1707 struct ocrdma_sge *sge;
1708 u32 wqe_size = sizeof(*hdr);
1709
1710 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1711 ocrdma_build_ud_hdr(qp, hdr, wr);
1712 sge = (struct ocrdma_sge *)(hdr + 2);
1713 wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1714 } else
1715 sge = (struct ocrdma_sge *)(hdr + 1);
1716
1717 status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1718 return status;
1719}
1720
1721static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1722 struct ib_send_wr *wr)
1723{
1724 int status;
1725 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1726 struct ocrdma_sge *sge = ext_rw + 1;
1727 u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1728
1729 status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1730 if (status)
1731 return status;
1732 ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1733 ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1734 ext_rw->lrkey = wr->wr.rdma.rkey;
1735 ext_rw->len = hdr->total_len;
1736 return 0;
1737}
1738
1739static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1740 struct ib_send_wr *wr)
1741{
1742 struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1743 struct ocrdma_sge *sge = ext_rw + 1;
1744 u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
1745 sizeof(struct ocrdma_hdr_wqe);
1746
1747 ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1748 hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1749 hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
1750 hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1751
1752 ext_rw->addr_lo = wr->wr.rdma.remote_addr;
1753 ext_rw->addr_hi = upper_32_bits(wr->wr.rdma.remote_addr);
1754 ext_rw->lrkey = wr->wr.rdma.rkey;
1755 ext_rw->len = hdr->total_len;
1756}
1757
1758static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
1759{
1760 u32 val = qp->sq.dbid | (1 << 16);
1761
1762 iowrite32(val, qp->sq_db);
1763}
1764
1765int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1766 struct ib_send_wr **bad_wr)
1767{
1768 int status = 0;
1769 struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1770 struct ocrdma_hdr_wqe *hdr;
1771 unsigned long flags;
1772
1773 spin_lock_irqsave(&qp->q_lock, flags);
1774 if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
1775 spin_unlock_irqrestore(&qp->q_lock, flags);
1776 return -EINVAL;
1777 }
1778
1779 while (wr) {
1780 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
1781 wr->num_sge > qp->sq.max_sges) {
1782 status = -ENOMEM;
1783 break;
1784 }
1785 hdr = ocrdma_hwq_head(&qp->sq);
1786 hdr->cw = 0;
1787 if (wr->send_flags & IB_SEND_SIGNALED)
1788 hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1789 if (wr->send_flags & IB_SEND_FENCE)
1790 hdr->cw |=
1791 (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
1792 if (wr->send_flags & IB_SEND_SOLICITED)
1793 hdr->cw |=
1794 (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
1795 hdr->total_len = 0;
1796 switch (wr->opcode) {
1797 case IB_WR_SEND_WITH_IMM:
1798 hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1799 hdr->immdt = ntohl(wr->ex.imm_data);
1800 case IB_WR_SEND:
1801 hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1802 ocrdma_build_send(qp, hdr, wr);
1803 break;
1804 case IB_WR_SEND_WITH_INV:
1805 hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1806 hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
1807 hdr->lkey = wr->ex.invalidate_rkey;
1808 status = ocrdma_build_send(qp, hdr, wr);
1809 break;
1810 case IB_WR_RDMA_WRITE_WITH_IMM:
1811 hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
1812 hdr->immdt = ntohl(wr->ex.imm_data);
1813 case IB_WR_RDMA_WRITE:
1814 hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
1815 status = ocrdma_build_write(qp, hdr, wr);
1816 break;
1817 case IB_WR_RDMA_READ_WITH_INV:
1818 hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
1819 case IB_WR_RDMA_READ:
1820 ocrdma_build_read(qp, hdr, wr);
1821 break;
1822 case IB_WR_LOCAL_INV:
1823 hdr->cw |=
1824 (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
1825 hdr->cw |= (sizeof(struct ocrdma_hdr_wqe) /
1826 OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
1827 hdr->lkey = wr->ex.invalidate_rkey;
1828 break;
1829 default:
1830 status = -EINVAL;
1831 break;
1832 }
1833 if (status) {
1834 *bad_wr = wr;
1835 break;
1836 }
1837 if (wr->send_flags & IB_SEND_SIGNALED)
1838 qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
1839 else
1840 qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
1841 qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
1842 ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
1843 OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
1844 /* make sure wqe is written before adapter can access it */
1845 wmb();
1846 /* inform hw to start processing it */
1847 ocrdma_ring_sq_db(qp);
1848
1849 /* update pointer, counter for next wr */
1850 ocrdma_hwq_inc_head(&qp->sq);
1851 wr = wr->next;
1852 }
1853 spin_unlock_irqrestore(&qp->q_lock, flags);
1854 return status;
1855}
1856
1857static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
1858{
1859 u32 val = qp->rq.dbid | (1 << OCRDMA_GET_NUM_POSTED_SHIFT_VAL(qp));
1860
1861 iowrite32(val, qp->rq_db);
1862}
1863
1864static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe, struct ib_recv_wr *wr,
1865 u16 tag)
1866{
1867 u32 wqe_size = 0;
1868 struct ocrdma_sge *sge;
1869 if (wr->num_sge)
1870 wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
1871 else
1872 wqe_size = sizeof(*sge) + sizeof(*rqe);
1873
1874 rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
1875 OCRDMA_WQE_SIZE_SHIFT);
1876 rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
1877 rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1878 rqe->total_len = 0;
1879 rqe->rsvd_tag = tag;
1880 sge = (struct ocrdma_sge *)(rqe + 1);
1881 ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
1882 ocrdma_cpu_to_le32(rqe, wqe_size);
1883}
1884
1885int ocrdma_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1886 struct ib_recv_wr **bad_wr)
1887{
1888 int status = 0;
1889 unsigned long flags;
1890 struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1891 struct ocrdma_hdr_wqe *rqe;
1892
1893 spin_lock_irqsave(&qp->q_lock, flags);
1894 if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
1895 spin_unlock_irqrestore(&qp->q_lock, flags);
1896 *bad_wr = wr;
1897 return -EINVAL;
1898 }
1899 while (wr) {
1900 if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
1901 wr->num_sge > qp->rq.max_sges) {
1902 *bad_wr = wr;
1903 status = -ENOMEM;
1904 break;
1905 }
1906 rqe = ocrdma_hwq_head(&qp->rq);
1907 ocrdma_build_rqe(rqe, wr, 0);
1908
1909 qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
1910 /* make sure rqe is written before adapter can access it */
1911 wmb();
1912
1913 /* inform hw to start processing it */
1914 ocrdma_ring_rq_db(qp);
1915
1916 /* update pointer, counter for next wr */
1917 ocrdma_hwq_inc_head(&qp->rq);
1918 wr = wr->next;
1919 }
1920 spin_unlock_irqrestore(&qp->q_lock, flags);
1921 return status;
1922}
1923
1924/* cqe for srq's rqe can potentially arrive out of order.
1925 * index gives the entry in the shadow table where to store
1926 * the wr_id. tag/index is returned in cqe to reference back
1927 * for a given rqe.
1928 */
1929static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
1930{
1931 int row = 0;
1932 int indx = 0;
1933
1934 for (row = 0; row < srq->bit_fields_len; row++) {
1935 if (srq->idx_bit_fields[row]) {
1936 indx = ffs(srq->idx_bit_fields[row]);
1937 indx = (row * 32) + (indx - 1);
1938 if (indx >= srq->rq.max_cnt)
1939 BUG();
1940 ocrdma_srq_toggle_bit(srq, indx);
1941 break;
1942 }
1943 }
1944
1945 if (row == srq->bit_fields_len)
1946 BUG();
1947 return indx;
1948}
1949
1950static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
1951{
1952 u32 val = srq->rq.dbid | (1 << 16);
1953
1954 iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
1955}
1956
1957int ocrdma_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1958 struct ib_recv_wr **bad_wr)
1959{
1960 int status = 0;
1961 unsigned long flags;
1962 struct ocrdma_srq *srq;
1963 struct ocrdma_hdr_wqe *rqe;
1964 u16 tag;
1965
1966 srq = get_ocrdma_srq(ibsrq);
1967
1968 spin_lock_irqsave(&srq->q_lock, flags);
1969 while (wr) {
1970 if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
1971 wr->num_sge > srq->rq.max_sges) {
1972 status = -ENOMEM;
1973 *bad_wr = wr;
1974 break;
1975 }
1976 tag = ocrdma_srq_get_idx(srq);
1977 rqe = ocrdma_hwq_head(&srq->rq);
1978 ocrdma_build_rqe(rqe, wr, tag);
1979
1980 srq->rqe_wr_id_tbl[tag] = wr->wr_id;
1981 /* make sure rqe is written before adapter can perform DMA */
1982 wmb();
1983 /* inform hw to start processing it */
1984 ocrdma_ring_srq_db(srq);
1985 /* update pointer, counter for next wr */
1986 ocrdma_hwq_inc_head(&srq->rq);
1987 wr = wr->next;
1988 }
1989 spin_unlock_irqrestore(&srq->q_lock, flags);
1990 return status;
1991}
1992
1993static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
1994{
1995 enum ib_wc_status ibwc_status = IB_WC_GENERAL_ERR;
1996
1997 switch (status) {
1998 case OCRDMA_CQE_GENERAL_ERR:
1999 ibwc_status = IB_WC_GENERAL_ERR;
2000 break;
2001 case OCRDMA_CQE_LOC_LEN_ERR:
2002 ibwc_status = IB_WC_LOC_LEN_ERR;
2003 break;
2004 case OCRDMA_CQE_LOC_QP_OP_ERR:
2005 ibwc_status = IB_WC_LOC_QP_OP_ERR;
2006 break;
2007 case OCRDMA_CQE_LOC_EEC_OP_ERR:
2008 ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2009 break;
2010 case OCRDMA_CQE_LOC_PROT_ERR:
2011 ibwc_status = IB_WC_LOC_PROT_ERR;
2012 break;
2013 case OCRDMA_CQE_WR_FLUSH_ERR:
2014 ibwc_status = IB_WC_WR_FLUSH_ERR;
2015 break;
2016 case OCRDMA_CQE_MW_BIND_ERR:
2017 ibwc_status = IB_WC_MW_BIND_ERR;
2018 break;
2019 case OCRDMA_CQE_BAD_RESP_ERR:
2020 ibwc_status = IB_WC_BAD_RESP_ERR;
2021 break;
2022 case OCRDMA_CQE_LOC_ACCESS_ERR:
2023 ibwc_status = IB_WC_LOC_ACCESS_ERR;
2024 break;
2025 case OCRDMA_CQE_REM_INV_REQ_ERR:
2026 ibwc_status = IB_WC_REM_INV_REQ_ERR;
2027 break;
2028 case OCRDMA_CQE_REM_ACCESS_ERR:
2029 ibwc_status = IB_WC_REM_ACCESS_ERR;
2030 break;
2031 case OCRDMA_CQE_REM_OP_ERR:
2032 ibwc_status = IB_WC_REM_OP_ERR;
2033 break;
2034 case OCRDMA_CQE_RETRY_EXC_ERR:
2035 ibwc_status = IB_WC_RETRY_EXC_ERR;
2036 break;
2037 case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2038 ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2039 break;
2040 case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2041 ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2042 break;
2043 case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2044 ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2045 break;
2046 case OCRDMA_CQE_REM_ABORT_ERR:
2047 ibwc_status = IB_WC_REM_ABORT_ERR;
2048 break;
2049 case OCRDMA_CQE_INV_EECN_ERR:
2050 ibwc_status = IB_WC_INV_EECN_ERR;
2051 break;
2052 case OCRDMA_CQE_INV_EEC_STATE_ERR:
2053 ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2054 break;
2055 case OCRDMA_CQE_FATAL_ERR:
2056 ibwc_status = IB_WC_FATAL_ERR;
2057 break;
2058 case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2059 ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2060 break;
2061 default:
2062 ibwc_status = IB_WC_GENERAL_ERR;
2063 break;
2064 };
2065 return ibwc_status;
2066}
2067
2068static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2069 u32 wqe_idx)
2070{
2071 struct ocrdma_hdr_wqe *hdr;
2072 struct ocrdma_sge *rw;
2073 int opcode;
2074
2075 hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2076
2077 ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2078 /* Undo the hdr->cw swap */
2079 opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2080 switch (opcode) {
2081 case OCRDMA_WRITE:
2082 ibwc->opcode = IB_WC_RDMA_WRITE;
2083 break;
2084 case OCRDMA_READ:
2085 rw = (struct ocrdma_sge *)(hdr + 1);
2086 ibwc->opcode = IB_WC_RDMA_READ;
2087 ibwc->byte_len = rw->len;
2088 break;
2089 case OCRDMA_SEND:
2090 ibwc->opcode = IB_WC_SEND;
2091 break;
2092 case OCRDMA_LKEY_INV:
2093 ibwc->opcode = IB_WC_LOCAL_INV;
2094 break;
2095 default:
2096 ibwc->status = IB_WC_GENERAL_ERR;
2097 ocrdma_err("%s() invalid opcode received = 0x%x\n",
2098 __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2099 break;
2100 };
2101}
2102
2103static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2104 struct ocrdma_cqe *cqe)
2105{
2106 if (is_cqe_for_sq(cqe)) {
2107 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2108 cqe->flags_status_srcqpn) &
2109 ~OCRDMA_CQE_STATUS_MASK);
2110 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2111 cqe->flags_status_srcqpn) |
2112 (OCRDMA_CQE_WR_FLUSH_ERR <<
2113 OCRDMA_CQE_STATUS_SHIFT));
2114 } else {
2115 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2116 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2117 cqe->flags_status_srcqpn) &
2118 ~OCRDMA_CQE_UD_STATUS_MASK);
2119 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2120 cqe->flags_status_srcqpn) |
2121 (OCRDMA_CQE_WR_FLUSH_ERR <<
2122 OCRDMA_CQE_UD_STATUS_SHIFT));
2123 } else {
2124 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2125 cqe->flags_status_srcqpn) &
2126 ~OCRDMA_CQE_STATUS_MASK);
2127 cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2128 cqe->flags_status_srcqpn) |
2129 (OCRDMA_CQE_WR_FLUSH_ERR <<
2130 OCRDMA_CQE_STATUS_SHIFT));
2131 }
2132 }
2133}
2134
2135static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2136 struct ocrdma_qp *qp, int status)
2137{
2138 bool expand = false;
2139
2140 ibwc->byte_len = 0;
2141 ibwc->qp = &qp->ibqp;
2142 ibwc->status = ocrdma_to_ibwc_err(status);
2143
2144 ocrdma_flush_qp(qp);
2145 ocrdma_qp_state_machine(qp, IB_QPS_ERR, NULL);
2146
2147 /* if wqe/rqe pending for which cqe needs to be returned,
2148 * trigger inflating it.
2149 */
2150 if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2151 expand = true;
2152 ocrdma_set_cqe_status_flushed(qp, cqe);
2153 }
2154 return expand;
2155}
2156
2157static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2158 struct ocrdma_qp *qp, int status)
2159{
2160 ibwc->opcode = IB_WC_RECV;
2161 ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2162 ocrdma_hwq_inc_tail(&qp->rq);
2163
2164 return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2165}
2166
2167static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2168 struct ocrdma_qp *qp, int status)
2169{
2170 ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2171 ocrdma_hwq_inc_tail(&qp->sq);
2172
2173 return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2174}
2175
2176
2177static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2178 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2179 bool *polled, bool *stop)
2180{
2181 bool expand;
2182 int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2183 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2184
2185 /* when hw sq is empty, but rq is not empty, so we continue
2186 * to keep the cqe in order to get the cq event again.
2187 */
2188 if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2189 /* when cq for rq and sq is same, it is safe to return
2190 * flush cqe for RQEs.
2191 */
2192 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2193 *polled = true;
2194 status = OCRDMA_CQE_WR_FLUSH_ERR;
2195 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2196 } else {
2197 /* stop processing further cqe as this cqe is used for
2198 * triggering cq event on buddy cq of RQ.
2199 * When QP is destroyed, this cqe will be removed
2200 * from the cq's hardware q.
2201 */
2202 *polled = false;
2203 *stop = true;
2204 expand = false;
2205 }
2206 } else {
2207 *polled = true;
2208 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2209 }
2210 return expand;
2211}
2212
2213static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2214 struct ocrdma_cqe *cqe,
2215 struct ib_wc *ibwc, bool *polled)
2216{
2217 bool expand = false;
2218 int tail = qp->sq.tail;
2219 u32 wqe_idx;
2220
2221 if (!qp->wqe_wr_id_tbl[tail].signaled) {
2222 *polled = false; /* WC cannot be consumed yet */
2223 } else {
2224 ibwc->status = IB_WC_SUCCESS;
2225 ibwc->wc_flags = 0;
2226 ibwc->qp = &qp->ibqp;
2227 ocrdma_update_wc(qp, ibwc, tail);
2228 *polled = true;
2229 }
2230 wqe_idx = le32_to_cpu(cqe->wq.wqeidx) & OCRDMA_CQE_WQEIDX_MASK;
2231 if (tail != wqe_idx)
2232 expand = true; /* Coalesced CQE can't be consumed yet */
2233
2234 ocrdma_hwq_inc_tail(&qp->sq);
2235 return expand;
2236}
2237
2238static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2239 struct ib_wc *ibwc, bool *polled, bool *stop)
2240{
2241 int status;
2242 bool expand;
2243
2244 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2245 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2246
2247 if (status == OCRDMA_CQE_SUCCESS)
2248 expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2249 else
2250 expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2251 return expand;
2252}
2253
2254static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
2255{
2256 int status;
2257
2258 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2259 OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2260 ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2261 OCRDMA_CQE_SRCQP_MASK;
2262 ibwc->pkey_index = le32_to_cpu(cqe->ud.rxlen_pkey) &
2263 OCRDMA_CQE_PKEY_MASK;
2264 ibwc->wc_flags = IB_WC_GRH;
2265 ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2266 OCRDMA_CQE_UD_XFER_LEN_SHIFT);
2267 return status;
2268}
2269
2270static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2271 struct ocrdma_cqe *cqe,
2272 struct ocrdma_qp *qp)
2273{
2274 unsigned long flags;
2275 struct ocrdma_srq *srq;
2276 u32 wqe_idx;
2277
2278 srq = get_ocrdma_srq(qp->ibqp.srq);
2279 wqe_idx = le32_to_cpu(cqe->rq.buftag_qpn) >> OCRDMA_CQE_BUFTAG_SHIFT;
2280 ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2281 spin_lock_irqsave(&srq->q_lock, flags);
2282 ocrdma_srq_toggle_bit(srq, wqe_idx);
2283 spin_unlock_irqrestore(&srq->q_lock, flags);
2284 ocrdma_hwq_inc_tail(&srq->rq);
2285}
2286
2287static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2288 struct ib_wc *ibwc, bool *polled, bool *stop,
2289 int status)
2290{
2291 bool expand;
2292
2293 /* when hw_rq is empty, but wq is not empty, so continue
2294 * to keep the cqe to get the cq event again.
2295 */
2296 if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2297 if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2298 *polled = true;
2299 status = OCRDMA_CQE_WR_FLUSH_ERR;
2300 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2301 } else {
2302 *polled = false;
2303 *stop = true;
2304 expand = false;
2305 }
2306 } else {
2307 *polled = true;
2308 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2309 }
2310 return expand;
2311}
2312
2313static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2314 struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2315{
2316 ibwc->opcode = IB_WC_RECV;
2317 ibwc->qp = &qp->ibqp;
2318 ibwc->status = IB_WC_SUCCESS;
2319
2320 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2321 ocrdma_update_ud_rcqe(ibwc, cqe);
2322 else
2323 ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2324
2325 if (is_cqe_imm(cqe)) {
2326 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2327 ibwc->wc_flags |= IB_WC_WITH_IMM;
2328 } else if (is_cqe_wr_imm(cqe)) {
2329 ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2330 ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2331 ibwc->wc_flags |= IB_WC_WITH_IMM;
2332 } else if (is_cqe_invalidated(cqe)) {
2333 ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2334 ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2335 }
2336 if (qp->ibqp.srq)
2337 ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2338 else {
2339 ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2340 ocrdma_hwq_inc_tail(&qp->rq);
2341 }
2342}
2343
2344static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2345 struct ib_wc *ibwc, bool *polled, bool *stop)
2346{
2347 int status;
2348 bool expand = false;
2349
2350 ibwc->wc_flags = 0;
2351 if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2352 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2353 OCRDMA_CQE_UD_STATUS_MASK) >>
2354 OCRDMA_CQE_UD_STATUS_SHIFT;
2355 else
2356 status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2357 OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2358
2359 if (status == OCRDMA_CQE_SUCCESS) {
2360 *polled = true;
2361 ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2362 } else {
2363 expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2364 status);
2365 }
2366 return expand;
2367}
2368
2369static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2370 u16 cur_getp)
2371{
2372 if (cq->phase_change) {
2373 if (cur_getp == 0)
2374 cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2375 } else
2376 /* clear valid bit */
2377 cqe->flags_status_srcqpn = 0;
2378}
2379
2380static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2381 struct ib_wc *ibwc)
2382{
2383 u16 qpn = 0;
2384 int i = 0;
2385 bool expand = false;
2386 int polled_hw_cqes = 0;
2387 struct ocrdma_qp *qp = NULL;
2388 struct ocrdma_dev *dev = cq->dev;
2389 struct ocrdma_cqe *cqe;
2390 u16 cur_getp; bool polled = false; bool stop = false;
2391
2392 cur_getp = cq->getp;
2393 while (num_entries) {
2394 cqe = cq->va + cur_getp;
2395 /* check whether valid cqe or not */
2396 if (!is_cqe_valid(cq, cqe))
2397 break;
2398 qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2399 /* ignore discarded cqe */
2400 if (qpn == 0)
2401 goto skip_cqe;
2402 qp = dev->qp_tbl[qpn];
2403 BUG_ON(qp == NULL);
2404
2405 if (is_cqe_for_sq(cqe)) {
2406 expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2407 &stop);
2408 } else {
2409 expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2410 &stop);
2411 }
2412 if (expand)
2413 goto expand_cqe;
2414 if (stop)
2415 goto stop_cqe;
2416 /* clear qpn to avoid duplicate processing by discard_cqe() */
2417 cqe->cmn.qpn = 0;
2418skip_cqe:
2419 polled_hw_cqes += 1;
2420 cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2421 ocrdma_change_cq_phase(cq, cqe, cur_getp);
2422expand_cqe:
2423 if (polled) {
2424 num_entries -= 1;
2425 i += 1;
2426 ibwc = ibwc + 1;
2427 polled = false;
2428 }
2429 }
2430stop_cqe:
2431 cq->getp = cur_getp;
2432 if (polled_hw_cqes || expand || stop) {
2433 ocrdma_ring_cq_db(dev, cq->id, cq->armed, cq->solicited,
2434 polled_hw_cqes);
2435 }
2436 return i;
2437}
2438
2439/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2440static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2441 struct ocrdma_qp *qp, struct ib_wc *ibwc)
2442{
2443 int err_cqes = 0;
2444
2445 while (num_entries) {
2446 if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2447 break;
2448 if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2449 ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2450 ocrdma_hwq_inc_tail(&qp->sq);
2451 } else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2452 ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2453 ocrdma_hwq_inc_tail(&qp->rq);
2454 } else
2455 return err_cqes;
2456 ibwc->byte_len = 0;
2457 ibwc->status = IB_WC_WR_FLUSH_ERR;
2458 ibwc = ibwc + 1;
2459 err_cqes += 1;
2460 num_entries -= 1;
2461 }
2462 return err_cqes;
2463}
2464
2465int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2466{
2467 int cqes_to_poll = num_entries;
2468 struct ocrdma_cq *cq = NULL;
2469 unsigned long flags;
2470 struct ocrdma_dev *dev;
2471 int num_os_cqe = 0, err_cqes = 0;
2472 struct ocrdma_qp *qp;
2473
2474 cq = get_ocrdma_cq(ibcq);
2475 dev = cq->dev;
2476
2477 /* poll cqes from adapter CQ */
2478 spin_lock_irqsave(&cq->cq_lock, flags);
2479 num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2480 spin_unlock_irqrestore(&cq->cq_lock, flags);
2481 cqes_to_poll -= num_os_cqe;
2482
2483 if (cqes_to_poll) {
2484 wc = wc + num_os_cqe;
2485 /* adapter returns single error cqe when qp moves to
2486 * error state. So insert error cqes with wc_status as
2487 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2488 * respectively which uses this CQ.
2489 */
2490 spin_lock_irqsave(&dev->flush_q_lock, flags);
2491 list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2492 if (cqes_to_poll == 0)
2493 break;
2494 err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2495 cqes_to_poll -= err_cqes;
2496 num_os_cqe += err_cqes;
2497 wc = wc + err_cqes;
2498 }
2499 spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2500 }
2501 return num_os_cqe;
2502}
2503
2504int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2505{
2506 struct ocrdma_cq *cq;
2507 unsigned long flags;
2508 struct ocrdma_dev *dev;
2509 u16 cq_id;
2510 u16 cur_getp;
2511 struct ocrdma_cqe *cqe;
2512
2513 cq = get_ocrdma_cq(ibcq);
2514 cq_id = cq->id;
2515 dev = cq->dev;
2516
2517 spin_lock_irqsave(&cq->cq_lock, flags);
2518 if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2519 cq->armed = true;
2520 if (cq_flags & IB_CQ_SOLICITED)
2521 cq->solicited = true;
2522
2523 cur_getp = cq->getp;
2524 cqe = cq->va + cur_getp;
2525
2526 /* check whether any valid cqe exist or not, if not then safe to
2527 * arm. If cqe is not yet consumed, then let it get consumed and then
2528 * we arm it to avoid false interrupts.
2529 */
2530 if (!is_cqe_valid(cq, cqe) || cq->arm_needed) {
2531 cq->arm_needed = false;
2532 ocrdma_ring_cq_db(dev, cq_id, cq->armed, cq->solicited, 0);
2533 }
2534 spin_unlock_irqrestore(&cq->cq_lock, flags);
2535 return 0;
2536}
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
deleted file mode 100644
index 633f03d8027..00000000000
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.h
+++ /dev/null
@@ -1,93 +0,0 @@
1/*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
6 * www.emulex.com *
7 * *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
19 *
20 * Contact Information:
21 * linux-drivers@emulex.com
22 *
23 * Emulex
24 * 3333 Susan Street
25 * Costa Mesa, CA 92626
26 *******************************************************************/
27
28#ifndef __OCRDMA_VERBS_H__
29#define __OCRDMA_VERBS_H__
30
31int ocrdma_post_send(struct ib_qp *, struct ib_send_wr *,
32 struct ib_send_wr **bad_wr);
33int ocrdma_post_recv(struct ib_qp *, struct ib_recv_wr *,
34 struct ib_recv_wr **bad_wr);
35
36int ocrdma_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc);
37int ocrdma_arm_cq(struct ib_cq *, enum ib_cq_notify_flags flags);
38
39int ocrdma_query_device(struct ib_device *, struct ib_device_attr *props);
40int ocrdma_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
41int ocrdma_modify_port(struct ib_device *, u8 port, int mask,
42 struct ib_port_modify *props);
43
44void ocrdma_get_guid(struct ocrdma_dev *, u8 *guid);
45int ocrdma_query_gid(struct ib_device *, u8 port,
46 int index, union ib_gid *gid);
47int ocrdma_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
48
49struct ib_ucontext *ocrdma_alloc_ucontext(struct ib_device *,
50 struct ib_udata *);
51int ocrdma_dealloc_ucontext(struct ib_ucontext *);
52
53int ocrdma_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
54
55struct ib_pd *ocrdma_alloc_pd(struct ib_device *,
56 struct ib_ucontext *, struct ib_udata *);
57int ocrdma_dealloc_pd(struct ib_pd *pd);
58
59struct ib_cq *ocrdma_create_cq(struct ib_device *, int entries, int vector,
60 struct ib_ucontext *, struct ib_udata *);
61int ocrdma_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
62int ocrdma_destroy_cq(struct ib_cq *);
63
64struct ib_qp *ocrdma_create_qp(struct ib_pd *,
65 struct ib_qp_init_attr *attrs,
66 struct ib_udata *);
67int _ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
68 int attr_mask);
69int ocrdma_modify_qp(struct ib_qp *, struct ib_qp_attr *attr,
70 int attr_mask, struct ib_udata *udata);
71int ocrdma_query_qp(struct ib_qp *,
72 struct ib_qp_attr *qp_attr,
73 int qp_attr_mask, struct ib_qp_init_attr *);
74int ocrdma_destroy_qp(struct ib_qp *);
75
76struct ib_srq *ocrdma_create_srq(struct ib_pd *, struct ib_srq_init_attr *,
77 struct ib_udata *);
78int ocrdma_modify_srq(struct ib_srq *, struct ib_srq_attr *,
79 enum ib_srq_attr_mask, struct ib_udata *);
80int ocrdma_query_srq(struct ib_srq *, struct ib_srq_attr *);
81int ocrdma_destroy_srq(struct ib_srq *);
82int ocrdma_post_srq_recv(struct ib_srq *, struct ib_recv_wr *,
83 struct ib_recv_wr **bad_recv_wr);
84
85int ocrdma_dereg_mr(struct ib_mr *);
86struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *, int acc);
87struct ib_mr *ocrdma_reg_kernel_mr(struct ib_pd *,
88 struct ib_phys_buf *buffer_list,
89 int num_phys_buf, int acc, u64 *iova_start);
90struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *, u64 start, u64 length,
91 u64 virt, int acc, struct ib_udata *);
92
93#endif /* __OCRDMA_VERBS_H__ */
diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h
index 4d11575c201..c9624ea8720 100644
--- a/drivers/infiniband/hw/qib/qib.h
+++ b/drivers/infiniband/hw/qib/qib.h
@@ -1,8 +1,8 @@
1#ifndef _QIB_KERNEL_H 1#ifndef _QIB_KERNEL_H
2#define _QIB_KERNEL_H 2#define _QIB_KERNEL_H
3/* 3/*
4 * Copyright (c) 2012 Intel Corporation. All rights reserved. 4 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
5 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 5 * All rights reserved.
6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 6 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
7 * 7 *
8 * This software is available to you under a choice of one of two 8 * This software is available to you under a choice of one of two
@@ -87,7 +87,7 @@ struct qlogic_ib_stats {
87}; 87};
88 88
89extern struct qlogic_ib_stats qib_stats; 89extern struct qlogic_ib_stats qib_stats;
90extern const struct pci_error_handlers qib_pci_err_handler; 90extern struct pci_error_handlers qib_pci_err_handler;
91extern struct pci_driver qib_driver; 91extern struct pci_driver qib_driver;
92 92
93#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ 93#define QIB_CHIP_SWVERSION QIB_CHIP_VERS_MAJ
@@ -171,9 +171,7 @@ struct qib_ctxtdata {
171 /* how many alloc_pages() chunks in rcvegrbuf_pages */ 171 /* how many alloc_pages() chunks in rcvegrbuf_pages */
172 u32 rcvegrbuf_chunks; 172 u32 rcvegrbuf_chunks;
173 /* how many egrbufs per chunk */ 173 /* how many egrbufs per chunk */
174 u16 rcvegrbufs_perchunk; 174 u32 rcvegrbufs_perchunk;
175 /* ilog2 of above */
176 u16 rcvegrbufs_perchunk_shift;
177 /* order for rcvegrbuf_pages */ 175 /* order for rcvegrbuf_pages */
178 size_t rcvegrbuf_size; 176 size_t rcvegrbuf_size;
179 /* rcvhdrq size (for freeing) */ 177 /* rcvhdrq size (for freeing) */
@@ -223,9 +221,6 @@ struct qib_ctxtdata {
223 /* ctxt rcvhdrq head offset */ 221 /* ctxt rcvhdrq head offset */
224 u32 head; 222 u32 head;
225 u32 pkt_count; 223 u32 pkt_count;
226 /* lookaside fields */
227 struct qib_qp *lookaside_qp;
228 u32 lookaside_qpn;
229 /* QPs waiting for context processing */ 224 /* QPs waiting for context processing */
230 struct list_head qp_wait_list; 225 struct list_head qp_wait_list;
231}; 226};
@@ -427,14 +422,6 @@ struct qib_verbs_txreq {
427/* how often we check for packet activity for "power on hours (in seconds) */ 422/* how often we check for packet activity for "power on hours (in seconds) */
428#define ACTIVITY_TIMER 5 423#define ACTIVITY_TIMER 5
429 424
430#define MAX_NAME_SIZE 64
431struct qib_msix_entry {
432 struct msix_entry msix;
433 void *arg;
434 char name[MAX_NAME_SIZE];
435 cpumask_var_t mask;
436};
437
438/* Below is an opaque struct. Each chip (device) can maintain 425/* Below is an opaque struct. Each chip (device) can maintain
439 * private data needed for its operation, but not germane to the 426 * private data needed for its operation, but not germane to the
440 * rest of the driver. For convenience, we define another that 427 * rest of the driver. For convenience, we define another that
@@ -519,7 +506,6 @@ struct qib_pportdata {
519 struct qib_devdata *dd; 506 struct qib_devdata *dd;
520 struct qib_chippport_specific *cpspec; /* chip-specific per-port */ 507 struct qib_chippport_specific *cpspec; /* chip-specific per-port */
521 struct kobject pport_kobj; 508 struct kobject pport_kobj;
522 struct kobject pport_cc_kobj;
523 struct kobject sl2vl_kobj; 509 struct kobject sl2vl_kobj;
524 struct kobject diagc_kobj; 510 struct kobject diagc_kobj;
525 511
@@ -531,6 +517,8 @@ struct qib_pportdata {
531 /* qib_lflags driver is waiting for */ 517 /* qib_lflags driver is waiting for */
532 u32 state_wanted; 518 u32 state_wanted;
533 spinlock_t lflags_lock; 519 spinlock_t lflags_lock;
520 /* number of (port-specific) interrupts for this port -- saturates... */
521 u32 int_counter;
534 522
535 /* ref count for each pkey */ 523 /* ref count for each pkey */
536 atomic_t pkeyrefs[4]; 524 atomic_t pkeyrefs[4];
@@ -542,27 +530,24 @@ struct qib_pportdata {
542 u64 *statusp; 530 u64 *statusp;
543 531
544 /* SendDMA related entries */ 532 /* SendDMA related entries */
545 533 spinlock_t sdma_lock;
546 /* read mostly */
547 struct qib_sdma_desc *sdma_descq;
548 struct workqueue_struct *qib_wq;
549 struct qib_sdma_state sdma_state; 534 struct qib_sdma_state sdma_state;
550 dma_addr_t sdma_descq_phys; 535 unsigned long sdma_buf_jiffies;
551 volatile __le64 *sdma_head_dma; /* DMA'ed by chip */ 536 struct qib_sdma_desc *sdma_descq;
552 dma_addr_t sdma_head_phys;
553 u16 sdma_descq_cnt;
554
555 /* read/write using lock */
556 spinlock_t sdma_lock ____cacheline_aligned_in_smp;
557 struct list_head sdma_activelist;
558 u64 sdma_descq_added; 537 u64 sdma_descq_added;
559 u64 sdma_descq_removed; 538 u64 sdma_descq_removed;
539 u16 sdma_descq_cnt;
560 u16 sdma_descq_tail; 540 u16 sdma_descq_tail;
561 u16 sdma_descq_head; 541 u16 sdma_descq_head;
542 u16 sdma_next_intr;
543 u16 sdma_reset_wait;
562 u8 sdma_generation; 544 u8 sdma_generation;
545 struct tasklet_struct sdma_sw_clean_up_task;
546 struct list_head sdma_activelist;
563 547
564 struct tasklet_struct sdma_sw_clean_up_task 548 dma_addr_t sdma_descq_phys;
565 ____cacheline_aligned_in_smp; 549 volatile __le64 *sdma_head_dma; /* DMA'ed by chip */
550 dma_addr_t sdma_head_phys;
566 551
567 wait_queue_head_t state_wait; /* for state_wanted */ 552 wait_queue_head_t state_wait; /* for state_wanted */
568 553
@@ -639,39 +624,6 @@ struct qib_pportdata {
639 struct timer_list led_override_timer; 624 struct timer_list led_override_timer;
640 struct xmit_wait cong_stats; 625 struct xmit_wait cong_stats;
641 struct timer_list symerr_clear_timer; 626 struct timer_list symerr_clear_timer;
642
643 /* Synchronize access between driver writes and sysfs reads */
644 spinlock_t cc_shadow_lock
645 ____cacheline_aligned_in_smp;
646
647 /* Shadow copy of the congestion control table */
648 struct cc_table_shadow *ccti_entries_shadow;
649
650 /* Shadow copy of the congestion control entries */
651 struct ib_cc_congestion_setting_attr_shadow *congestion_entries_shadow;
652
653 /* List of congestion control table entries */
654 struct ib_cc_table_entry_shadow *ccti_entries;
655
656 /* 16 congestion entries with each entry corresponding to a SL */
657 struct ib_cc_congestion_entry_shadow *congestion_entries;
658
659 /* Maximum number of congestion control entries that the agent expects
660 * the manager to send.
661 */
662 u16 cc_supported_table_entries;
663
664 /* Total number of congestion control table entries */
665 u16 total_cct_entry;
666
667 /* Bit map identifying service level */
668 u16 cc_sl_control_map;
669
670 /* maximum congestion control table index */
671 u16 ccti_limit;
672
673 /* CA's max number of 64 entry units in the congestion control table */
674 u8 cc_max_table_entries;
675}; 627};
676 628
677/* Observers. Not to be taken lightly, possibly not to ship. */ 629/* Observers. Not to be taken lightly, possibly not to ship. */
@@ -855,10 +807,6 @@ struct qib_devdata {
855 * supports, less gives more pio bufs/ctxt, etc. 807 * supports, less gives more pio bufs/ctxt, etc.
856 */ 808 */
857 u32 cfgctxts; 809 u32 cfgctxts;
858 /*
859 * number of ctxts available for PSM open
860 */
861 u32 freectxts;
862 810
863 /* 811 /*
864 * hint that we should update pioavailshadow before 812 * hint that we should update pioavailshadow before
@@ -908,14 +856,7 @@ struct qib_devdata {
908 * pio_writing. 856 * pio_writing.
909 */ 857 */
910 spinlock_t pioavail_lock; 858 spinlock_t pioavail_lock;
911 /* 859
912 * index of last buffer to optimize search for next
913 */
914 u32 last_pio;
915 /*
916 * min kernel pio buffer to optimize search
917 */
918 u32 min_kernel_pio;
919 /* 860 /*
920 * Shadow copies of registers; size indicates read access size. 861 * Shadow copies of registers; size indicates read access size.
921 * Most of them are readonly, but some are write-only register, 862 * Most of them are readonly, but some are write-only register,
@@ -995,9 +936,7 @@ struct qib_devdata {
995 /* chip address space used by 4k pio buffers */ 936 /* chip address space used by 4k pio buffers */
996 u32 align4k; 937 u32 align4k;
997 /* size of each rcvegrbuffer */ 938 /* size of each rcvegrbuffer */
998 u16 rcvegrbufsize; 939 u32 rcvegrbufsize;
999 /* log2 of above */
1000 u16 rcvegrbufsize_shift;
1001 /* localbus width (1, 2,4,8,16,32) from config space */ 940 /* localbus width (1, 2,4,8,16,32) from config space */
1002 u32 lbus_width; 941 u32 lbus_width;
1003 /* localbus speed in MHz */ 942 /* localbus speed in MHz */
@@ -1112,7 +1051,6 @@ extern u32 qib_cpulist_count;
1112extern unsigned long *qib_cpulist; 1051extern unsigned long *qib_cpulist;
1113 1052
1114extern unsigned qib_wc_pat; 1053extern unsigned qib_wc_pat;
1115extern unsigned qib_cc_table_size;
1116int qib_init(struct qib_devdata *, int); 1054int qib_init(struct qib_devdata *, int);
1117int init_chip_wc_pat(struct qib_devdata *dd, u32); 1055int init_chip_wc_pat(struct qib_devdata *dd, u32);
1118int qib_enable_wc(struct qib_devdata *dd); 1056int qib_enable_wc(struct qib_devdata *dd);
@@ -1303,11 +1241,6 @@ int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
1303/* ppd->sdma_lock should be locked before calling this. */ 1241/* ppd->sdma_lock should be locked before calling this. */
1304int qib_sdma_make_progress(struct qib_pportdata *dd); 1242int qib_sdma_make_progress(struct qib_pportdata *dd);
1305 1243
1306static inline int qib_sdma_empty(const struct qib_pportdata *ppd)
1307{
1308 return ppd->sdma_descq_added == ppd->sdma_descq_removed;
1309}
1310
1311/* must be called under qib_sdma_lock */ 1244/* must be called under qib_sdma_lock */
1312static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd) 1245static inline u16 qib_sdma_descq_freecnt(const struct qib_pportdata *ppd)
1313{ 1246{
@@ -1411,7 +1344,7 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *);
1411int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, 1344int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *,
1412 const struct pci_device_id *); 1345 const struct pci_device_id *);
1413void qib_pcie_ddcleanup(struct qib_devdata *); 1346void qib_pcie_ddcleanup(struct qib_devdata *);
1414int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); 1347int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *);
1415int qib_reinit_intr(struct qib_devdata *); 1348int qib_reinit_intr(struct qib_devdata *);
1416void qib_enable_intx(struct pci_dev *); 1349void qib_enable_intx(struct pci_dev *);
1417void qib_nomsi(struct qib_devdata *); 1350void qib_nomsi(struct qib_devdata *);
diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h
index a5356cb4252..21f374aa063 100644
--- a/drivers/infiniband/hw/qib/qib_7220.h
+++ b/drivers/infiniband/hw/qib/qib_7220.h
@@ -97,7 +97,7 @@ struct qib_chippport_specific {
97 u64 iblnkerrsnap; 97 u64 iblnkerrsnap;
98 u64 ibcctrl; /* kr_ibcctrl shadow */ 98 u64 ibcctrl; /* kr_ibcctrl shadow */
99 u64 ibcddrctrl; /* kr_ibcddrctrl shadow */ 99 u64 ibcddrctrl; /* kr_ibcddrctrl shadow */
100 unsigned long chase_end; 100 u64 chase_end;
101 u32 last_delay_mult; 101 u32 last_delay_mult;
102}; 102};
103 103
diff --git a/drivers/infiniband/hw/qib/qib_common.h b/drivers/infiniband/hw/qib/qib_common.h
index d39e0183ff8..145da404088 100644
--- a/drivers/infiniband/hw/qib/qib_common.h
+++ b/drivers/infiniband/hw/qib/qib_common.h
@@ -285,6 +285,7 @@ struct qib_base_info {
285 285
286#ifndef QIB_KERN_TYPE 286#ifndef QIB_KERN_TYPE
287#define QIB_KERN_TYPE 0 287#define QIB_KERN_TYPE 0
288#define QIB_IDSTR "QLogic kernel.org driver"
288#endif 289#endif
289 290
290/* 291/*
@@ -301,19 +302,6 @@ struct qib_base_info {
301#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION) 302#define QIB_KERN_SWVERSION ((QIB_KERN_TYPE << 31) | QIB_USER_SWVERSION)
302 303
303/* 304/*
304 * Define the driver version number. This is something that refers only
305 * to the driver itself, not the software interfaces it supports.
306 */
307#define QIB_DRIVER_VERSION_BASE "1.11"
308
309/* create the final driver version string */
310#ifdef QIB_IDSTR
311#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE " " QIB_IDSTR
312#else
313#define QIB_DRIVER_VERSION QIB_DRIVER_VERSION_BASE
314#endif
315
316/*
317 * If the unit is specified via open, HCA choice is fixed. If port is 305 * If the unit is specified via open, HCA choice is fixed. If port is
318 * specified, it's also fixed. Otherwise we try to spread contexts 306 * specified, it's also fixed. Otherwise we try to spread contexts
319 * across ports and HCAs, using different algorithims. WITHIN is 307 * across ports and HCAs, using different algorithims. WITHIN is
diff --git a/drivers/infiniband/hw/qib/qib_diag.c b/drivers/infiniband/hw/qib/qib_diag.c
index 1686fd4bda8..204c4dd9dce 100644
--- a/drivers/infiniband/hw/qib/qib_diag.c
+++ b/drivers/infiniband/hw/qib/qib_diag.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -46,16 +46,12 @@
46#include <linux/pci.h> 46#include <linux/pci.h>
47#include <linux/poll.h> 47#include <linux/poll.h>
48#include <linux/vmalloc.h> 48#include <linux/vmalloc.h>
49#include <linux/export.h>
50#include <linux/fs.h> 49#include <linux/fs.h>
51#include <linux/uaccess.h> 50#include <linux/uaccess.h>
52 51
53#include "qib.h" 52#include "qib.h"
54#include "qib_common.h" 53#include "qib_common.h"
55 54
56#undef pr_fmt
57#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
58
59/* 55/*
60 * Each client that opens the diag device must read then write 56 * Each client that opens the diag device must read then write
61 * offset 0, to prevent lossage from random cat or od. diag_state 57 * offset 0, to prevent lossage from random cat or od. diag_state
@@ -601,8 +597,8 @@ static ssize_t qib_diagpkt_write(struct file *fp,
601 } 597 }
602 tmpbuf = vmalloc(plen); 598 tmpbuf = vmalloc(plen);
603 if (!tmpbuf) { 599 if (!tmpbuf) {
604 qib_devinfo(dd->pcidev, 600 qib_devinfo(dd->pcidev, "Unable to allocate tmp buffer, "
605 "Unable to allocate tmp buffer, failing\n"); 601 "failing\n");
606 ret = -ENOMEM; 602 ret = -ENOMEM;
607 goto bail; 603 goto bail;
608 } 604 }
@@ -696,7 +692,7 @@ int qib_register_observer(struct qib_devdata *dd,
696 ret = -ENOMEM; 692 ret = -ENOMEM;
697 olp = vmalloc(sizeof *olp); 693 olp = vmalloc(sizeof *olp);
698 if (!olp) { 694 if (!olp) {
699 pr_err("vmalloc for observer failed\n"); 695 printk(KERN_ERR QIB_DRV_NAME ": vmalloc for observer failed\n");
700 goto bail; 696 goto bail;
701 } 697 }
702 if (olp) { 698 if (olp) {
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c
index 5423edcab51..23e584f4c36 100644
--- a/drivers/infiniband/hw/qib/qib_driver.c
+++ b/drivers/infiniband/hw/qib/qib_driver.c
@@ -37,8 +37,6 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/netdevice.h> 38#include <linux/netdevice.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/module.h>
41#include <linux/prefetch.h>
42 40
43#include "qib.h" 41#include "qib.h"
44 42
@@ -46,7 +44,7 @@
46 * The size has to be longer than this string, so we can append 44 * The size has to be longer than this string, so we can append
47 * board/chip information to it in the init code. 45 * board/chip information to it in the init code.
48 */ 46 */
49const char ib_qib_version[] = QIB_DRIVER_VERSION "\n"; 47const char ib_qib_version[] = QIB_IDSTR "\n";
50 48
51DEFINE_SPINLOCK(qib_devs_lock); 49DEFINE_SPINLOCK(qib_devs_lock);
52LIST_HEAD(qib_dev_list); 50LIST_HEAD(qib_dev_list);
@@ -65,7 +63,6 @@ MODULE_PARM_DESC(compat_ddr_negotiate,
65MODULE_LICENSE("Dual BSD/GPL"); 63MODULE_LICENSE("Dual BSD/GPL");
66MODULE_AUTHOR("QLogic <support@qlogic.com>"); 64MODULE_AUTHOR("QLogic <support@qlogic.com>");
67MODULE_DESCRIPTION("QLogic IB driver"); 65MODULE_DESCRIPTION("QLogic IB driver");
68MODULE_VERSION(QIB_DRIVER_VERSION);
69 66
70/* 67/*
71 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our 68 * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our
@@ -282,10 +279,10 @@ bail:
282 */ 279 */
283static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) 280static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail)
284{ 281{
285 const u32 chunk = etail >> rcd->rcvegrbufs_perchunk_shift; 282 const u32 chunk = etail / rcd->rcvegrbufs_perchunk;
286 const u32 idx = etail & ((u32)rcd->rcvegrbufs_perchunk - 1); 283 const u32 idx = etail % rcd->rcvegrbufs_perchunk;
287 284
288 return rcd->rcvegrbuf[chunk] + (idx << rcd->dd->rcvegrbufsize_shift); 285 return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize;
289} 286}
290 287
291/* 288/*
@@ -313,6 +310,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
313 u32 opcode; 310 u32 opcode;
314 u32 psn; 311 u32 psn;
315 int diff; 312 int diff;
313 unsigned long flags;
316 314
317 /* Sanity check packet */ 315 /* Sanity check packet */
318 if (tlen < 24) 316 if (tlen < 24)
@@ -367,14 +365,19 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
367 365
368 switch (qp->ibqp.qp_type) { 366 switch (qp->ibqp.qp_type) {
369 case IB_QPT_RC: 367 case IB_QPT_RC:
368 spin_lock_irqsave(&qp->s_lock, flags);
370 ruc_res = 369 ruc_res =
371 qib_ruc_check_hdr( 370 qib_ruc_check_hdr(
372 ibp, hdr, 371 ibp, hdr,
373 lnh == QIB_LRH_GRH, 372 lnh == QIB_LRH_GRH,
374 qp, 373 qp,
375 be32_to_cpu(ohdr->bth[0])); 374 be32_to_cpu(ohdr->bth[0]));
376 if (ruc_res) 375 if (ruc_res) {
376 spin_unlock_irqrestore(&qp->s_lock,
377 flags);
377 goto unlock; 378 goto unlock;
379 }
380 spin_unlock_irqrestore(&qp->s_lock, flags);
378 381
379 /* Only deal with RDMA Writes for now */ 382 /* Only deal with RDMA Writes for now */
380 if (opcode < 383 if (opcode <
@@ -483,10 +486,8 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
483 etail = qib_hdrget_index(rhf_addr); 486 etail = qib_hdrget_index(rhf_addr);
484 updegr = 1; 487 updegr = 1;
485 if (tlen > sizeof(*hdr) || 488 if (tlen > sizeof(*hdr) ||
486 etype >= RCVHQ_RCV_TYPE_NON_KD) { 489 etype >= RCVHQ_RCV_TYPE_NON_KD)
487 ebuf = qib_get_egrbuf(rcd, etail); 490 ebuf = qib_get_egrbuf(rcd, etail);
488 prefetch_range(ebuf, tlen - sizeof(*hdr));
489 }
490 } 491 }
491 if (!eflags) { 492 if (!eflags) {
492 u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; 493 u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2;
@@ -546,15 +547,6 @@ move_along:
546 updegr = 0; 547 updegr = 0;
547 } 548 }
548 } 549 }
549 /*
550 * Notify qib_destroy_qp() if it is waiting
551 * for lookaside_qp to finish.
552 */
553 if (rcd->lookaside_qp) {
554 if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
555 wake_up(&rcd->lookaside_qp->wait);
556 rcd->lookaside_qp = NULL;
557 }
558 550
559 rcd->head = l; 551 rcd->head = l;
560 rcd->pkt_count += i; 552 rcd->pkt_count += i;
@@ -765,9 +757,8 @@ int qib_reset_device(int unit)
765 qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); 757 qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit);
766 758
767 if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { 759 if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) {
768 qib_devinfo(dd->pcidev, 760 qib_devinfo(dd->pcidev, "Invalid unit number %u or "
769 "Invalid unit number %u or not initialized or not present\n", 761 "not initialized or not present\n", unit);
770 unit);
771 ret = -ENXIO; 762 ret = -ENXIO;
772 goto bail; 763 goto bail;
773 } 764 }
@@ -804,13 +795,11 @@ int qib_reset_device(int unit)
804 else 795 else
805 ret = -EAGAIN; 796 ret = -EAGAIN;
806 if (ret) 797 if (ret)
807 qib_dev_err(dd, 798 qib_dev_err(dd, "Reinitialize unit %u after "
808 "Reinitialize unit %u after reset failed with %d\n", 799 "reset failed with %d\n", unit, ret);
809 unit, ret);
810 else 800 else
811 qib_devinfo(dd->pcidev, 801 qib_devinfo(dd->pcidev, "Reinitialized unit %u after "
812 "Reinitialized unit %u after resetting\n", 802 "resetting\n", unit);
813 unit);
814 803
815bail: 804bail:
816 return ret; 805 return ret;
diff --git a/drivers/infiniband/hw/qib/qib_eeprom.c b/drivers/infiniband/hw/qib/qib_eeprom.c
index 4d5d71aaa2b..92d9cfe98a6 100644
--- a/drivers/infiniband/hw/qib/qib_eeprom.c
+++ b/drivers/infiniband/hw/qib/qib_eeprom.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 4 *
6 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -161,9 +160,10 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
161 if (oguid > bguid[7]) { 160 if (oguid > bguid[7]) {
162 if (bguid[6] == 0xff) { 161 if (bguid[6] == 0xff) {
163 if (bguid[5] == 0xff) { 162 if (bguid[5] == 0xff) {
164 qib_dev_err(dd, 163 qib_dev_err(dd, "Can't set %s GUID"
165 "Can't set %s GUID from base, wraps to OUI!\n", 164 " from base, wraps to"
166 qib_get_unit_name(t)); 165 " OUI!\n",
166 qib_get_unit_name(t));
167 dd->base_guid = 0; 167 dd->base_guid = 0;
168 goto bail; 168 goto bail;
169 } 169 }
@@ -182,9 +182,8 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
182 len = sizeof(struct qib_flash); 182 len = sizeof(struct qib_flash);
183 buf = vmalloc(len); 183 buf = vmalloc(len);
184 if (!buf) { 184 if (!buf) {
185 qib_dev_err(dd, 185 qib_dev_err(dd, "Couldn't allocate memory to read %u "
186 "Couldn't allocate memory to read %u bytes from eeprom for GUID\n", 186 "bytes from eeprom for GUID\n", len);
187 len);
188 goto bail; 187 goto bail;
189 } 188 }
190 189
@@ -202,25 +201,23 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
202 201
203 csum = flash_csum(ifp, 0); 202 csum = flash_csum(ifp, 0);
204 if (csum != ifp->if_csum) { 203 if (csum != ifp->if_csum) {
205 qib_devinfo(dd->pcidev, 204 qib_devinfo(dd->pcidev, "Bad I2C flash checksum: "
206 "Bad I2C flash checksum: 0x%x, not 0x%x\n", 205 "0x%x, not 0x%x\n", csum, ifp->if_csum);
207 csum, ifp->if_csum);
208 goto done; 206 goto done;
209 } 207 }
210 if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) || 208 if (*(__be64 *) ifp->if_guid == cpu_to_be64(0) ||
211 *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) { 209 *(__be64 *) ifp->if_guid == ~cpu_to_be64(0)) {
212 qib_dev_err(dd, 210 qib_dev_err(dd, "Invalid GUID %llx from flash; ignoring\n",
213 "Invalid GUID %llx from flash; ignoring\n", 211 *(unsigned long long *) ifp->if_guid);
214 *(unsigned long long *) ifp->if_guid);
215 /* don't allow GUID if all 0 or all 1's */ 212 /* don't allow GUID if all 0 or all 1's */
216 goto done; 213 goto done;
217 } 214 }
218 215
219 /* complain, but allow it */ 216 /* complain, but allow it */
220 if (*(u64 *) ifp->if_guid == 0x100007511000000ULL) 217 if (*(u64 *) ifp->if_guid == 0x100007511000000ULL)
221 qib_devinfo(dd->pcidev, 218 qib_devinfo(dd->pcidev, "Warning, GUID %llx is "
222 "Warning, GUID %llx is default, probably not correct!\n", 219 "default, probably not correct!\n",
223 *(unsigned long long *) ifp->if_guid); 220 *(unsigned long long *) ifp->if_guid);
224 221
225 bguid = ifp->if_guid; 222 bguid = ifp->if_guid;
226 if (!bguid[0] && !bguid[1] && !bguid[2]) { 223 if (!bguid[0] && !bguid[1] && !bguid[2]) {
@@ -263,9 +260,8 @@ void qib_get_eeprom_info(struct qib_devdata *dd)
263 memcpy(dd->serial, ifp->if_serial, 260 memcpy(dd->serial, ifp->if_serial,
264 sizeof ifp->if_serial); 261 sizeof ifp->if_serial);
265 if (!strstr(ifp->if_comment, "Tested successfully")) 262 if (!strstr(ifp->if_comment, "Tested successfully"))
266 qib_dev_err(dd, 263 qib_dev_err(dd, "Board SN %s did not pass functional "
267 "Board SN %s did not pass functional test: %s\n", 264 "test: %s\n", dd->serial, ifp->if_comment);
268 dd->serial, ifp->if_comment);
269 265
270 memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT); 266 memcpy(&dd->eep_st_errs, &ifp->if_errcntp, QIB_EEP_LOG_CNT);
271 /* 267 /*
@@ -327,9 +323,8 @@ int qib_update_eeprom_log(struct qib_devdata *dd)
327 buf = vmalloc(len); 323 buf = vmalloc(len);
328 ret = 1; 324 ret = 1;
329 if (!buf) { 325 if (!buf) {
330 qib_dev_err(dd, 326 qib_dev_err(dd, "Couldn't allocate memory to read %u "
331 "Couldn't allocate memory to read %u bytes from eeprom for logging\n", 327 "bytes from eeprom for logging\n", len);
332 len);
333 goto bail; 328 goto bail;
334 } 329 }
335 330
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 959a5c4ff81..26253039d2c 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -43,15 +43,11 @@
43#include <linux/jiffies.h> 43#include <linux/jiffies.h>
44#include <asm/pgtable.h> 44#include <asm/pgtable.h>
45#include <linux/delay.h> 45#include <linux/delay.h>
46#include <linux/export.h>
47 46
48#include "qib.h" 47#include "qib.h"
49#include "qib_common.h" 48#include "qib_common.h"
50#include "qib_user_sdma.h" 49#include "qib_user_sdma.h"
51 50
52#undef pr_fmt
53#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
54
55static int qib_open(struct inode *, struct file *); 51static int qib_open(struct inode *, struct file *);
56static int qib_close(struct inode *, struct file *); 52static int qib_close(struct inode *, struct file *);
57static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *); 53static ssize_t qib_write(struct file *, const char __user *, size_t, loff_t *);
@@ -318,9 +314,8 @@ static int qib_tid_update(struct qib_ctxtdata *rcd, struct file *fp,
318 } 314 }
319 if (cnt > tidcnt) { 315 if (cnt > tidcnt) {
320 /* make sure it all fits in tid_pg_list */ 316 /* make sure it all fits in tid_pg_list */
321 qib_devinfo(dd->pcidev, 317 qib_devinfo(dd->pcidev, "Process tried to allocate %u "
322 "Process tried to allocate %u TIDs, only trying max (%u)\n", 318 "TIDs, only trying max (%u)\n", cnt, tidcnt);
323 cnt, tidcnt);
324 cnt = tidcnt; 319 cnt = tidcnt;
325 } 320 }
326 pagep = (struct page **) rcd->tid_pg_list; 321 pagep = (struct page **) rcd->tid_pg_list;
@@ -754,9 +749,9 @@ static int qib_mmap_mem(struct vm_area_struct *vma, struct qib_ctxtdata *rcd,
754 ret = remap_pfn_range(vma, vma->vm_start, pfn, 749 ret = remap_pfn_range(vma, vma->vm_start, pfn,
755 len, vma->vm_page_prot); 750 len, vma->vm_page_prot);
756 if (ret) 751 if (ret)
757 qib_devinfo(dd->pcidev, 752 qib_devinfo(dd->pcidev, "%s ctxt%u mmap of %lx, %x "
758 "%s ctxt%u mmap of %lx, %x bytes failed: %d\n", 753 "bytes failed: %d\n", what, rcd->ctxt,
759 what, rcd->ctxt, pfn, len, ret); 754 pfn, len, ret);
760bail: 755bail:
761 return ret; 756 return ret;
762} 757}
@@ -775,9 +770,8 @@ static int mmap_ureg(struct vm_area_struct *vma, struct qib_devdata *dd,
775 */ 770 */
776 sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE; 771 sz = dd->flags & QIB_HAS_HDRSUPP ? 2 * PAGE_SIZE : PAGE_SIZE;
777 if ((vma->vm_end - vma->vm_start) > sz) { 772 if ((vma->vm_end - vma->vm_start) > sz) {
778 qib_devinfo(dd->pcidev, 773 qib_devinfo(dd->pcidev, "FAIL mmap userreg: reqlen "
779 "FAIL mmap userreg: reqlen %lx > PAGE\n", 774 "%lx > PAGE\n", vma->vm_end - vma->vm_start);
780 vma->vm_end - vma->vm_start);
781 ret = -EFAULT; 775 ret = -EFAULT;
782 } else { 776 } else {
783 phys = dd->physaddr + ureg; 777 phys = dd->physaddr + ureg;
@@ -807,8 +801,8 @@ static int mmap_piobufs(struct vm_area_struct *vma,
807 * for it. 801 * for it.
808 */ 802 */
809 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) { 803 if ((vma->vm_end - vma->vm_start) > (piocnt * dd->palign)) {
810 qib_devinfo(dd->pcidev, 804 qib_devinfo(dd->pcidev, "FAIL mmap piobufs: "
811 "FAIL mmap piobufs: reqlen %lx > PAGE\n", 805 "reqlen %lx > PAGE\n",
812 vma->vm_end - vma->vm_start); 806 vma->vm_end - vma->vm_start);
813 ret = -EINVAL; 807 ret = -EINVAL;
814 goto bail; 808 goto bail;
@@ -852,8 +846,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
852 size = rcd->rcvegrbuf_size; 846 size = rcd->rcvegrbuf_size;
853 total_size = rcd->rcvegrbuf_chunks * size; 847 total_size = rcd->rcvegrbuf_chunks * size;
854 if ((vma->vm_end - vma->vm_start) > total_size) { 848 if ((vma->vm_end - vma->vm_start) > total_size) {
855 qib_devinfo(dd->pcidev, 849 qib_devinfo(dd->pcidev, "FAIL on egr bufs: "
856 "FAIL on egr bufs: reqlen %lx > actual %lx\n", 850 "reqlen %lx > actual %lx\n",
857 vma->vm_end - vma->vm_start, 851 vma->vm_end - vma->vm_start,
858 (unsigned long) total_size); 852 (unsigned long) total_size);
859 ret = -EINVAL; 853 ret = -EINVAL;
@@ -861,9 +855,8 @@ static int mmap_rcvegrbufs(struct vm_area_struct *vma,
861 } 855 }
862 856
863 if (vma->vm_flags & VM_WRITE) { 857 if (vma->vm_flags & VM_WRITE) {
864 qib_devinfo(dd->pcidev, 858 qib_devinfo(dd->pcidev, "Can't map eager buffers as "
865 "Can't map eager buffers as writable (flags=%lx)\n", 859 "writable (flags=%lx)\n", vma->vm_flags);
866 vma->vm_flags);
867 ret = -EPERM; 860 ret = -EPERM;
868 goto bail; 861 goto bail;
869 } 862 }
@@ -971,7 +964,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
971 964
972 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT; 965 vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
973 vma->vm_ops = &qib_file_vm_ops; 966 vma->vm_ops = &qib_file_vm_ops;
974 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 967 vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
975 ret = 1; 968 ret = 1;
976 969
977bail: 970bail:
@@ -1276,8 +1269,8 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1276 GFP_KERNEL); 1269 GFP_KERNEL);
1277 1270
1278 if (!rcd || !ptmp) { 1271 if (!rcd || !ptmp) {
1279 qib_dev_err(dd, 1272 qib_dev_err(dd, "Unable to allocate ctxtdata "
1280 "Unable to allocate ctxtdata memory, failing open\n"); 1273 "memory, failing open\n");
1281 ret = -ENOMEM; 1274 ret = -ENOMEM;
1282 goto bailerr; 1275 goto bailerr;
1283 } 1276 }
@@ -1291,7 +1284,6 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt,
1291 strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); 1284 strlcpy(rcd->comm, current->comm, sizeof(rcd->comm));
1292 ctxt_fp(fp) = rcd; 1285 ctxt_fp(fp) = rcd;
1293 qib_stats.sps_ctxts++; 1286 qib_stats.sps_ctxts++;
1294 dd->freectxts--;
1295 ret = 0; 1287 ret = 0;
1296 goto bail; 1288 goto bail;
1297 1289
@@ -1566,10 +1558,10 @@ done_chk_sdma:
1566 } else if (weight == 1 && 1558 } else if (weight == 1 &&
1567 test_bit(cpumask_first(tsk_cpus_allowed(current)), 1559 test_bit(cpumask_first(tsk_cpus_allowed(current)),
1568 qib_cpulist)) 1560 qib_cpulist))
1569 qib_devinfo(dd->pcidev, 1561 qib_devinfo(dd->pcidev, "%s PID %u affinity "
1570 "%s PID %u affinity set to cpu %d; already allocated\n", 1562 "set to cpu %d; already allocated\n",
1571 current->comm, current->pid, 1563 current->comm, current->pid,
1572 cpumask_first(tsk_cpus_allowed(current))); 1564 cpumask_first(tsk_cpus_allowed(current)));
1573 } 1565 }
1574 1566
1575 mutex_unlock(&qib_mutex); 1567 mutex_unlock(&qib_mutex);
@@ -1800,7 +1792,6 @@ static int qib_close(struct inode *in, struct file *fp)
1800 if (dd->pageshadow) 1792 if (dd->pageshadow)
1801 unlock_expected_tids(rcd); 1793 unlock_expected_tids(rcd);
1802 qib_stats.sps_ctxts--; 1794 qib_stats.sps_ctxts--;
1803 dd->freectxts++;
1804 } 1795 }
1805 1796
1806 mutex_unlock(&qib_mutex); 1797 mutex_unlock(&qib_mutex);
@@ -2191,7 +2182,8 @@ int qib_cdev_init(int minor, const char *name,
2191 2182
2192 cdev = cdev_alloc(); 2183 cdev = cdev_alloc();
2193 if (!cdev) { 2184 if (!cdev) {
2194 pr_err("Could not allocate cdev for minor %d, %s\n", 2185 printk(KERN_ERR QIB_DRV_NAME
2186 ": Could not allocate cdev for minor %d, %s\n",
2195 minor, name); 2187 minor, name);
2196 ret = -ENOMEM; 2188 ret = -ENOMEM;
2197 goto done; 2189 goto done;
@@ -2203,7 +2195,8 @@ int qib_cdev_init(int minor, const char *name,
2203 2195
2204 ret = cdev_add(cdev, dev, 1); 2196 ret = cdev_add(cdev, dev, 1);
2205 if (ret < 0) { 2197 if (ret < 0) {
2206 pr_err("Could not add cdev for minor %d, %s (err %d)\n", 2198 printk(KERN_ERR QIB_DRV_NAME
2199 ": Could not add cdev for minor %d, %s (err %d)\n",
2207 minor, name, -ret); 2200 minor, name, -ret);
2208 goto err_cdev; 2201 goto err_cdev;
2209 } 2202 }
@@ -2213,7 +2206,8 @@ int qib_cdev_init(int minor, const char *name,
2213 goto done; 2206 goto done;
2214 ret = PTR_ERR(device); 2207 ret = PTR_ERR(device);
2215 device = NULL; 2208 device = NULL;
2216 pr_err("Could not create device for minor %d, %s (err %d)\n", 2209 printk(KERN_ERR QIB_DRV_NAME ": Could not create "
2210 "device for minor %d, %s (err %d)\n",
2217 minor, name, -ret); 2211 minor, name, -ret);
2218err_cdev: 2212err_cdev:
2219 cdev_del(cdev); 2213 cdev_del(cdev);
@@ -2248,14 +2242,16 @@ int __init qib_dev_init(void)
2248 2242
2249 ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME); 2243 ret = alloc_chrdev_region(&qib_dev, 0, QIB_NMINORS, QIB_DRV_NAME);
2250 if (ret < 0) { 2244 if (ret < 0) {
2251 pr_err("Could not allocate chrdev region (err %d)\n", -ret); 2245 printk(KERN_ERR QIB_DRV_NAME ": Could not allocate "
2246 "chrdev region (err %d)\n", -ret);
2252 goto done; 2247 goto done;
2253 } 2248 }
2254 2249
2255 qib_class = class_create(THIS_MODULE, "ipath"); 2250 qib_class = class_create(THIS_MODULE, "ipath");
2256 if (IS_ERR(qib_class)) { 2251 if (IS_ERR(qib_class)) {
2257 ret = PTR_ERR(qib_class); 2252 ret = PTR_ERR(qib_class);
2258 pr_err("Could not create device class (err %d)\n", -ret); 2253 printk(KERN_ERR QIB_DRV_NAME ": Could not create "
2254 "device class (err %d)\n", -ret);
2259 unregister_chrdev_region(qib_dev, QIB_NMINORS); 2255 unregister_chrdev_region(qib_dev, QIB_NMINORS);
2260 } 2256 }
2261 2257
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index 65a2a23f6f8..df7fa251dcd 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
5 * 4 *
6 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -48,7 +47,7 @@ static struct super_block *qib_super;
48#define private2dd(file) ((file)->f_dentry->d_inode->i_private) 47#define private2dd(file) ((file)->f_dentry->d_inode->i_private)
49 48
50static int qibfs_mknod(struct inode *dir, struct dentry *dentry, 49static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
51 umode_t mode, const struct file_operations *fops, 50 int mode, const struct file_operations *fops,
52 void *data) 51 void *data)
53{ 52{
54 int error; 53 int error;
@@ -61,14 +60,14 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
61 60
62 inode->i_ino = get_next_ino(); 61 inode->i_ino = get_next_ino();
63 inode->i_mode = mode; 62 inode->i_mode = mode;
64 inode->i_uid = GLOBAL_ROOT_UID; 63 inode->i_uid = 0;
65 inode->i_gid = GLOBAL_ROOT_GID; 64 inode->i_gid = 0;
66 inode->i_blocks = 0; 65 inode->i_blocks = 0;
67 inode->i_atime = CURRENT_TIME; 66 inode->i_atime = CURRENT_TIME;
68 inode->i_mtime = inode->i_atime; 67 inode->i_mtime = inode->i_atime;
69 inode->i_ctime = inode->i_atime; 68 inode->i_ctime = inode->i_atime;
70 inode->i_private = data; 69 inode->i_private = data;
71 if (S_ISDIR(mode)) { 70 if ((mode & S_IFMT) == S_IFDIR) {
72 inode->i_op = &simple_dir_inode_operations; 71 inode->i_op = &simple_dir_inode_operations;
73 inc_nlink(inode); 72 inc_nlink(inode);
74 inc_nlink(dir); 73 inc_nlink(dir);
@@ -83,7 +82,7 @@ bail:
83 return error; 82 return error;
84} 83}
85 84
86static int create_file(const char *name, umode_t mode, 85static int create_file(const char *name, mode_t mode,
87 struct dentry *parent, struct dentry **dentry, 86 struct dentry *parent, struct dentry **dentry,
88 const struct file_operations *fops, void *data) 87 const struct file_operations *fops, void *data)
89{ 88{
@@ -383,7 +382,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
383 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir, 382 ret = create_file(unit, S_IFDIR|S_IRUGO|S_IXUGO, sb->s_root, &dir,
384 &simple_dir_operations, dd); 383 &simple_dir_operations, dd);
385 if (ret) { 384 if (ret) {
386 pr_err("create_file(%s) failed: %d\n", unit, ret); 385 printk(KERN_ERR "create_file(%s) failed: %d\n", unit, ret);
387 goto bail; 386 goto bail;
388 } 387 }
389 388
@@ -391,21 +390,21 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
391 ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp, 390 ret = create_file("counters", S_IFREG|S_IRUGO, dir, &tmp,
392 &cntr_ops[0], dd); 391 &cntr_ops[0], dd);
393 if (ret) { 392 if (ret) {
394 pr_err("create_file(%s/counters) failed: %d\n", 393 printk(KERN_ERR "create_file(%s/counters) failed: %d\n",
395 unit, ret); 394 unit, ret);
396 goto bail; 395 goto bail;
397 } 396 }
398 ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp, 397 ret = create_file("counter_names", S_IFREG|S_IRUGO, dir, &tmp,
399 &cntr_ops[1], dd); 398 &cntr_ops[1], dd);
400 if (ret) { 399 if (ret) {
401 pr_err("create_file(%s/counter_names) failed: %d\n", 400 printk(KERN_ERR "create_file(%s/counter_names) failed: %d\n",
402 unit, ret); 401 unit, ret);
403 goto bail; 402 goto bail;
404 } 403 }
405 ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp, 404 ret = create_file("portcounter_names", S_IFREG|S_IRUGO, dir, &tmp,
406 &portcntr_ops[0], dd); 405 &portcntr_ops[0], dd);
407 if (ret) { 406 if (ret) {
408 pr_err("create_file(%s/%s) failed: %d\n", 407 printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
409 unit, "portcounter_names", ret); 408 unit, "portcounter_names", ret);
410 goto bail; 409 goto bail;
411 } 410 }
@@ -417,7 +416,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
417 ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, 416 ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
418 &portcntr_ops[i], dd); 417 &portcntr_ops[i], dd);
419 if (ret) { 418 if (ret) {
420 pr_err("create_file(%s/%s) failed: %d\n", 419 printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
421 unit, fname, ret); 420 unit, fname, ret);
422 goto bail; 421 goto bail;
423 } 422 }
@@ -427,7 +426,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
427 ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp, 426 ret = create_file(fname, S_IFREG|S_IRUGO, dir, &tmp,
428 &qsfp_ops[i - 1], dd); 427 &qsfp_ops[i - 1], dd);
429 if (ret) { 428 if (ret) {
430 pr_err("create_file(%s/%s) failed: %d\n", 429 printk(KERN_ERR "create_file(%s/%s) failed: %d\n",
431 unit, fname, ret); 430 unit, fname, ret);
432 goto bail; 431 goto bail;
433 } 432 }
@@ -436,7 +435,7 @@ static int add_cntr_files(struct super_block *sb, struct qib_devdata *dd)
436 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp, 435 ret = create_file("flash", S_IFREG|S_IWUSR|S_IRUGO, dir, &tmp,
437 &flash_ops, dd); 436 &flash_ops, dd);
438 if (ret) 437 if (ret)
439 pr_err("create_file(%s/flash) failed: %d\n", 438 printk(KERN_ERR "create_file(%s/flash) failed: %d\n",
440 unit, ret); 439 unit, ret);
441bail: 440bail:
442 return ret; 441 return ret;
@@ -487,7 +486,7 @@ static int remove_device_files(struct super_block *sb,
487 486
488 if (IS_ERR(dir)) { 487 if (IS_ERR(dir)) {
489 ret = PTR_ERR(dir); 488 ret = PTR_ERR(dir);
490 pr_err("Lookup of %s failed\n", unit); 489 printk(KERN_ERR "Lookup of %s failed\n", unit);
491 goto bail; 490 goto bail;
492 } 491 }
493 492
@@ -533,7 +532,7 @@ static int qibfs_fill_super(struct super_block *sb, void *data, int silent)
533 532
534 ret = simple_fill_super(sb, QIBFS_MAGIC, files); 533 ret = simple_fill_super(sb, QIBFS_MAGIC, files);
535 if (ret) { 534 if (ret) {
536 pr_err("simple_fill_super failed: %d\n", ret); 535 printk(KERN_ERR "simple_fill_super failed: %d\n", ret);
537 goto bail; 536 goto bail;
538 } 537 }
539 538
diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index a099ac171e2..65df26ce538 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -753,8 +753,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
753 if (!hwerrs) 753 if (!hwerrs)
754 return; 754 return;
755 if (hwerrs == ~0ULL) { 755 if (hwerrs == ~0ULL) {
756 qib_dev_err(dd, 756 qib_dev_err(dd, "Read of hardware error status failed "
757 "Read of hardware error status failed (all bits set); ignoring\n"); 757 "(all bits set); ignoring\n");
758 return; 758 return;
759 } 759 }
760 qib_stats.sps_hwerrs++; 760 qib_stats.sps_hwerrs++;
@@ -779,14 +779,13 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
779 * or it's occurred within the last 5 seconds. 779 * or it's occurred within the last 5 seconds.
780 */ 780 */
781 if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID)) 781 if (hwerrs & ~(TXE_PIO_PARITY | RXEMEMPARITYERR_EAGERTID))
782 qib_devinfo(dd->pcidev, 782 qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
783 "Hardware error: hwerr=0x%llx (cleared)\n", 783 "(cleared)\n", (unsigned long long) hwerrs);
784 (unsigned long long) hwerrs);
785 784
786 if (hwerrs & ~IB_HWE_BITSEXTANT) 785 if (hwerrs & ~IB_HWE_BITSEXTANT)
787 qib_dev_err(dd, 786 qib_dev_err(dd, "hwerror interrupt with unknown errors "
788 "hwerror interrupt with unknown errors %llx set\n", 787 "%llx set\n", (unsigned long long)
789 (unsigned long long)(hwerrs & ~IB_HWE_BITSEXTANT)); 788 (hwerrs & ~IB_HWE_BITSEXTANT));
790 789
791 ctrl = qib_read_kreg32(dd, kr_control); 790 ctrl = qib_read_kreg32(dd, kr_control);
792 if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) { 791 if ((ctrl & QLOGIC_IB_C_FREEZEMODE) && !dd->diag_client) {
@@ -816,9 +815,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
816 815
817 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { 816 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
818 isfatal = 1; 817 isfatal = 1;
819 strlcat(msg, 818 strlcat(msg, "[Memory BIST test failed, InfiniPath hardware"
820 "[Memory BIST test failed, InfiniPath hardware unusable]", 819 " unusable]", msgl);
821 msgl);
822 /* ignore from now on, so disable until driver reloaded */ 820 /* ignore from now on, so disable until driver reloaded */
823 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); 821 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
824 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); 822 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -870,9 +868,8 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
870 *msg = 0; /* recovered from all of them */ 868 *msg = 0; /* recovered from all of them */
871 869
872 if (isfatal && !dd->diag_client) { 870 if (isfatal && !dd->diag_client) {
873 qib_dev_err(dd, 871 qib_dev_err(dd, "Fatal Hardware Error, no longer"
874 "Fatal Hardware Error, no longer usable, SN %.16s\n", 872 " usable, SN %.16s\n", dd->serial);
875 dd->serial);
876 /* 873 /*
877 * for /sys status file and user programs to print; if no 874 * for /sys status file and user programs to print; if no
878 * trailing brace is copied, we'll know it was truncated. 875 * trailing brace is copied, we'll know it was truncated.
@@ -1020,9 +1017,9 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
1020 qib_inc_eeprom_err(dd, log_idx, 1); 1017 qib_inc_eeprom_err(dd, log_idx, 1);
1021 1018
1022 if (errs & ~IB_E_BITSEXTANT) 1019 if (errs & ~IB_E_BITSEXTANT)
1023 qib_dev_err(dd, 1020 qib_dev_err(dd, "error interrupt with unknown errors "
1024 "error interrupt with unknown errors %llx set\n", 1021 "%llx set\n",
1025 (unsigned long long) (errs & ~IB_E_BITSEXTANT)); 1022 (unsigned long long) (errs & ~IB_E_BITSEXTANT));
1026 1023
1027 if (errs & E_SUM_ERRS) { 1024 if (errs & E_SUM_ERRS) {
1028 qib_disarm_6120_senderrbufs(ppd); 1025 qib_disarm_6120_senderrbufs(ppd);
@@ -1092,8 +1089,8 @@ static void handle_6120_errors(struct qib_devdata *dd, u64 errs)
1092 } 1089 }
1093 1090
1094 if (errs & ERR_MASK(ResetNegated)) { 1091 if (errs & ERR_MASK(ResetNegated)) {
1095 qib_dev_err(dd, 1092 qib_dev_err(dd, "Got reset, requires re-init "
1096 "Got reset, requires re-init (unload and reload driver)\n"); 1093 "(unload and reload driver)\n");
1097 dd->flags &= ~QIB_INITTED; /* needs re-init */ 1094 dd->flags &= ~QIB_INITTED; /* needs re-init */
1098 /* mark as having had error */ 1095 /* mark as having had error */
1099 *dd->devstatusp |= QIB_STATUS_HWERROR; 1096 *dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1544,9 +1541,8 @@ static noinline void unlikely_6120_intr(struct qib_devdata *dd, u64 istat)
1544 qib_stats.sps_errints++; 1541 qib_stats.sps_errints++;
1545 estat = qib_read_kreg64(dd, kr_errstatus); 1542 estat = qib_read_kreg64(dd, kr_errstatus);
1546 if (!estat) 1543 if (!estat)
1547 qib_devinfo(dd->pcidev, 1544 qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
1548 "error interrupt (%Lx), but no error bits set!\n", 1545 "but no error bits set!\n", istat);
1549 istat);
1550 handle_6120_errors(dd, estat); 1546 handle_6120_errors(dd, estat);
1551 } 1547 }
1552 1548
@@ -1719,16 +1715,16 @@ static void qib_setup_6120_interrupt(struct qib_devdata *dd)
1719 } 1715 }
1720 1716
1721 if (!dd->cspec->irq) 1717 if (!dd->cspec->irq)
1722 qib_dev_err(dd, 1718 qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
1723 "irq is 0, BIOS error? Interrupts won't work\n"); 1719 "work\n");
1724 else { 1720 else {
1725 int ret; 1721 int ret;
1726 ret = request_irq(dd->cspec->irq, qib_6120intr, 0, 1722 ret = request_irq(dd->cspec->irq, qib_6120intr, 0,
1727 QIB_DRV_NAME, dd); 1723 QIB_DRV_NAME, dd);
1728 if (ret) 1724 if (ret)
1729 qib_dev_err(dd, 1725 qib_dev_err(dd, "Couldn't setup interrupt "
1730 "Couldn't setup interrupt (irq=%d): %d\n", 1726 "(irq=%d): %d\n", dd->cspec->irq,
1731 dd->cspec->irq, ret); 1727 ret);
1732 } 1728 }
1733} 1729}
1734 1730
@@ -1763,9 +1759,8 @@ static void pe_boardname(struct qib_devdata *dd)
1763 snprintf(dd->boardname, namelen, "%s", n); 1759 snprintf(dd->boardname, namelen, "%s", n);
1764 1760
1765 if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2) 1761 if (dd->majrev != 4 || !dd->minrev || dd->minrev > 2)
1766 qib_dev_err(dd, 1762 qib_dev_err(dd, "Unsupported InfiniPath hardware revision "
1767 "Unsupported InfiniPath hardware revision %u.%u!\n", 1763 "%u.%u!\n", dd->majrev, dd->minrev);
1768 dd->majrev, dd->minrev);
1769 1764
1770 snprintf(dd->boardversion, sizeof(dd->boardversion), 1765 snprintf(dd->boardversion, sizeof(dd->boardversion),
1771 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", 1766 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -1838,8 +1833,8 @@ static int qib_6120_setup_reset(struct qib_devdata *dd)
1838bail: 1833bail:
1839 if (ret) { 1834 if (ret) {
1840 if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) 1835 if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
1841 qib_dev_err(dd, 1836 qib_dev_err(dd, "Reset failed to setup PCIe or "
1842 "Reset failed to setup PCIe or interrupts; continuing anyway\n"); 1837 "interrupts; continuing anyway\n");
1843 /* clear the reset error, init error/hwerror mask */ 1838 /* clear the reset error, init error/hwerror mask */
1844 qib_6120_init_hwerrors(dd); 1839 qib_6120_init_hwerrors(dd);
1845 /* for Rev2 error interrupts; nop for rev 1 */ 1840 /* for Rev2 error interrupts; nop for rev 1 */
@@ -1881,9 +1876,8 @@ static void qib_6120_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
1881 } 1876 }
1882 pa >>= 11; 1877 pa >>= 11;
1883 if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { 1878 if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
1884 qib_dev_err(dd, 1879 qib_dev_err(dd, "Physical page address 0x%lx "
1885 "Physical page address 0x%lx larger than supported\n", 1880 "larger than supported\n", pa);
1886 pa);
1887 return; 1881 return;
1888 } 1882 }
1889 1883
@@ -1947,9 +1941,8 @@ static void qib_6120_put_tid_2(struct qib_devdata *dd, u64 __iomem *tidptr,
1947 } 1941 }
1948 pa >>= 11; 1942 pa >>= 11;
1949 if (pa & ~QLOGIC_IB_RT_ADDR_MASK) { 1943 if (pa & ~QLOGIC_IB_RT_ADDR_MASK) {
1950 qib_dev_err(dd, 1944 qib_dev_err(dd, "Physical page address 0x%lx "
1951 "Physical page address 0x%lx larger than supported\n", 1945 "larger than supported\n", pa);
1952 pa);
1953 return; 1946 return;
1954 } 1947 }
1955 1948
@@ -2112,7 +2105,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd)
2112 dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, 2105 dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev,
2113 dd->rcd[0]->rcvhdrq_size, 2106 dd->rcd[0]->rcvhdrq_size,
2114 &dd->cspec->dummy_hdrq_phys, 2107 &dd->cspec->dummy_hdrq_phys,
2115 GFP_ATOMIC | __GFP_COMP); 2108 GFP_KERNEL | __GFP_COMP);
2116 if (!dd->cspec->dummy_hdrq) { 2109 if (!dd->cspec->dummy_hdrq) {
2117 qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); 2110 qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n");
2118 /* fallback to just 0'ing */ 2111 /* fallback to just 0'ing */
@@ -2935,9 +2928,8 @@ static int qib_6120_set_loopback(struct qib_pportdata *ppd, const char *what)
2935 ppd->dd->unit, ppd->port); 2928 ppd->dd->unit, ppd->port);
2936 } else if (!strncmp(what, "off", 3)) { 2929 } else if (!strncmp(what, "off", 3)) {
2937 ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); 2930 ppd->dd->cspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
2938 qib_devinfo(ppd->dd->pcidev, 2931 qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
2939 "Disabling IB%u:%u IBC loopback (normal)\n", 2932 "(normal)\n", ppd->dd->unit, ppd->port);
2940 ppd->dd->unit, ppd->port);
2941 } else 2933 } else
2942 ret = -EINVAL; 2934 ret = -EINVAL;
2943 if (!ret) { 2935 if (!ret) {
@@ -3140,7 +3132,6 @@ static void get_6120_chip_params(struct qib_devdata *dd)
3140 val = qib_read_kreg64(dd, kr_sendpiobufcnt); 3132 val = qib_read_kreg64(dd, kr_sendpiobufcnt);
3141 dd->piobcnt2k = val & ~0U; 3133 dd->piobcnt2k = val & ~0U;
3142 dd->piobcnt4k = val >> 32; 3134 dd->piobcnt4k = val >> 32;
3143 dd->last_pio = dd->piobcnt4k + dd->piobcnt2k - 1;
3144 /* these may be adjusted in init_chip_wc_pat() */ 3135 /* these may be adjusted in init_chip_wc_pat() */
3145 dd->pio2kbase = (u32 __iomem *) 3136 dd->pio2kbase = (u32 __iomem *)
3146 (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase); 3137 (((char __iomem *)dd->kregbase) + dd->pio2k_bufbase);
@@ -3194,10 +3185,11 @@ static int qib_late_6120_initreg(struct qib_devdata *dd)
3194 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); 3185 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
3195 val = qib_read_kreg64(dd, kr_sendpioavailaddr); 3186 val = qib_read_kreg64(dd, kr_sendpioavailaddr);
3196 if (val != dd->pioavailregs_phys) { 3187 if (val != dd->pioavailregs_phys) {
3197 qib_dev_err(dd, 3188 qib_dev_err(dd, "Catastrophic software error, "
3198 "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", 3189 "SendPIOAvailAddr written as %lx, "
3199 (unsigned long) dd->pioavailregs_phys, 3190 "read back as %llx\n",
3200 (unsigned long long) val); 3191 (unsigned long) dd->pioavailregs_phys,
3192 (unsigned long long) val);
3201 ret = -EINVAL; 3193 ret = -EINVAL;
3202 } 3194 }
3203 return ret; 3195 return ret;
@@ -3225,8 +3217,8 @@ static int init_6120_variables(struct qib_devdata *dd)
3225 dd->revision = readq(&dd->kregbase[kr_revision]); 3217 dd->revision = readq(&dd->kregbase[kr_revision]);
3226 3218
3227 if ((dd->revision & 0xffffffffU) == 0xffffffffU) { 3219 if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
3228 qib_dev_err(dd, 3220 qib_dev_err(dd, "Revision register read failure, "
3229 "Revision register read failure, giving up initialization\n"); 3221 "giving up initialization\n");
3230 ret = -ENODEV; 3222 ret = -ENODEV;
3231 goto bail; 3223 goto bail;
3232 } 3224 }
@@ -3283,8 +3275,6 @@ static int init_6120_variables(struct qib_devdata *dd)
3283 /* we always allocate at least 2048 bytes for eager buffers */ 3275 /* we always allocate at least 2048 bytes for eager buffers */
3284 ret = ib_mtu_enum_to_int(qib_ibmtu); 3276 ret = ib_mtu_enum_to_int(qib_ibmtu);
3285 dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; 3277 dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
3286 BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
3287 dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
3288 3278
3289 qib_6120_tidtemplate(dd); 3279 qib_6120_tidtemplate(dd);
3290 3280
@@ -3558,8 +3548,8 @@ struct qib_devdata *qib_init_iba6120_funcs(struct pci_dev *pdev,
3558 goto bail; 3548 goto bail;
3559 3549
3560 if (qib_pcie_params(dd, 8, NULL, NULL)) 3550 if (qib_pcie_params(dd, 8, NULL, NULL))
3561 qib_dev_err(dd, 3551 qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
3562 "Failed to setup PCIe or interrupts; continuing anyway\n"); 3552 "continuing anyway\n");
3563 dd->cspec->irq = pdev->irq; /* save IRQ */ 3553 dd->cspec->irq = pdev->irq; /* save IRQ */
3564 3554
3565 /* clear diagctrl register, in case diags were running and crashed */ 3555 /* clear diagctrl register, in case diags were running and crashed */
diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c
index 64d0ecb90cd..4250c05a14f 100644
--- a/drivers/infiniband/hw/qib/qib_iba7220.c
+++ b/drivers/infiniband/hw/qib/qib_iba7220.c
@@ -39,7 +39,6 @@
39#include <linux/interrupt.h> 39#include <linux/interrupt.h>
40#include <linux/pci.h> 40#include <linux/pci.h>
41#include <linux/delay.h> 41#include <linux/delay.h>
42#include <linux/module.h>
43#include <linux/io.h> 42#include <linux/io.h>
44#include <rdma/ib_verbs.h> 43#include <rdma/ib_verbs.h>
45 44
@@ -1051,7 +1050,7 @@ static void reenable_7220_chase(unsigned long opaque)
1051static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) 1050static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst)
1052{ 1051{
1053 u8 ibclt; 1052 u8 ibclt;
1054 unsigned long tnow; 1053 u64 tnow;
1055 1054
1056 ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState); 1055 ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState);
1057 1056
@@ -1066,9 +1065,9 @@ static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst)
1066 case IB_7220_LT_STATE_CFGWAITRMT: 1065 case IB_7220_LT_STATE_CFGWAITRMT:
1067 case IB_7220_LT_STATE_TXREVLANES: 1066 case IB_7220_LT_STATE_TXREVLANES:
1068 case IB_7220_LT_STATE_CFGENH: 1067 case IB_7220_LT_STATE_CFGENH:
1069 tnow = jiffies; 1068 tnow = get_jiffies_64();
1070 if (ppd->cpspec->chase_end && 1069 if (ppd->cpspec->chase_end &&
1071 time_after(tnow, ppd->cpspec->chase_end)) { 1070 time_after64(tnow, ppd->cpspec->chase_end)) {
1072 ppd->cpspec->chase_end = 0; 1071 ppd->cpspec->chase_end = 0;
1073 qib_set_ib_7220_lstate(ppd, 1072 qib_set_ib_7220_lstate(ppd,
1074 QLOGIC_IB_IBCC_LINKCMD_DOWN, 1073 QLOGIC_IB_IBCC_LINKCMD_DOWN,
@@ -1111,9 +1110,9 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
1111 sdma_7220_errors(ppd, errs); 1110 sdma_7220_errors(ppd, errs);
1112 1111
1113 if (errs & ~IB_E_BITSEXTANT) 1112 if (errs & ~IB_E_BITSEXTANT)
1114 qib_dev_err(dd, 1113 qib_dev_err(dd, "error interrupt with unknown errors "
1115 "error interrupt with unknown errors %llx set\n", 1114 "%llx set\n", (unsigned long long)
1116 (unsigned long long) (errs & ~IB_E_BITSEXTANT)); 1115 (errs & ~IB_E_BITSEXTANT));
1117 1116
1118 if (errs & E_SUM_ERRS) { 1117 if (errs & E_SUM_ERRS) {
1119 qib_disarm_7220_senderrbufs(ppd); 1118 qib_disarm_7220_senderrbufs(ppd);
@@ -1192,8 +1191,8 @@ static void handle_7220_errors(struct qib_devdata *dd, u64 errs)
1192 } 1191 }
1193 1192
1194 if (errs & ERR_MASK(ResetNegated)) { 1193 if (errs & ERR_MASK(ResetNegated)) {
1195 qib_dev_err(dd, 1194 qib_dev_err(dd, "Got reset, requires re-init "
1196 "Got reset, requires re-init (unload and reload driver)\n"); 1195 "(unload and reload driver)\n");
1197 dd->flags &= ~QIB_INITTED; /* needs re-init */ 1196 dd->flags &= ~QIB_INITTED; /* needs re-init */
1198 /* mark as having had error */ 1197 /* mark as having had error */
1199 *dd->devstatusp |= QIB_STATUS_HWERROR; 1198 *dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1305,8 +1304,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
1305 if (!hwerrs) 1304 if (!hwerrs)
1306 goto bail; 1305 goto bail;
1307 if (hwerrs == ~0ULL) { 1306 if (hwerrs == ~0ULL) {
1308 qib_dev_err(dd, 1307 qib_dev_err(dd, "Read of hardware error status failed "
1309 "Read of hardware error status failed (all bits set); ignoring\n"); 1308 "(all bits set); ignoring\n");
1310 goto bail; 1309 goto bail;
1311 } 1310 }
1312 qib_stats.sps_hwerrs++; 1311 qib_stats.sps_hwerrs++;
@@ -1329,14 +1328,13 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
1329 qib_inc_eeprom_err(dd, log_idx, 1); 1328 qib_inc_eeprom_err(dd, log_idx, 1);
1330 if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC | 1329 if (hwerrs & ~(TXEMEMPARITYERR_PIOBUF | TXEMEMPARITYERR_PIOPBC |
1331 RXE_PARITY)) 1330 RXE_PARITY))
1332 qib_devinfo(dd->pcidev, 1331 qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
1333 "Hardware error: hwerr=0x%llx (cleared)\n", 1332 "(cleared)\n", (unsigned long long) hwerrs);
1334 (unsigned long long) hwerrs);
1335 1333
1336 if (hwerrs & ~IB_HWE_BITSEXTANT) 1334 if (hwerrs & ~IB_HWE_BITSEXTANT)
1337 qib_dev_err(dd, 1335 qib_dev_err(dd, "hwerror interrupt with unknown errors "
1338 "hwerror interrupt with unknown errors %llx set\n", 1336 "%llx set\n", (unsigned long long)
1339 (unsigned long long) (hwerrs & ~IB_HWE_BITSEXTANT)); 1337 (hwerrs & ~IB_HWE_BITSEXTANT));
1340 1338
1341 if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) 1339 if (hwerrs & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR)
1342 qib_sd7220_clr_ibpar(dd); 1340 qib_sd7220_clr_ibpar(dd);
@@ -1363,9 +1361,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
1363 1361
1364 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { 1362 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
1365 isfatal = 1; 1363 isfatal = 1;
1366 strlcat(msg, 1364 strlcat(msg, "[Memory BIST test failed, "
1367 "[Memory BIST test failed, InfiniPath hardware unusable]", 1365 "InfiniPath hardware unusable]", msgl);
1368 msgl);
1369 /* ignore from now on, so disable until driver reloaded */ 1366 /* ignore from now on, so disable until driver reloaded */
1370 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); 1367 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
1371 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); 1368 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -1411,9 +1408,8 @@ static void qib_7220_handle_hwerrors(struct qib_devdata *dd, char *msg,
1411 qib_dev_err(dd, "%s hardware error\n", msg); 1408 qib_dev_err(dd, "%s hardware error\n", msg);
1412 1409
1413 if (isfatal && !dd->diag_client) { 1410 if (isfatal && !dd->diag_client) {
1414 qib_dev_err(dd, 1411 qib_dev_err(dd, "Fatal Hardware Error, no longer"
1415 "Fatal Hardware Error, no longer usable, SN %.16s\n", 1412 " usable, SN %.16s\n", dd->serial);
1416 dd->serial);
1417 /* 1413 /*
1418 * For /sys status file and user programs to print; if no 1414 * For /sys status file and user programs to print; if no
1419 * trailing brace is copied, we'll know it was truncated. 1415 * trailing brace is copied, we'll know it was truncated.
@@ -1921,9 +1917,8 @@ static noinline void unlikely_7220_intr(struct qib_devdata *dd, u64 istat)
1921 qib_stats.sps_errints++; 1917 qib_stats.sps_errints++;
1922 estat = qib_read_kreg64(dd, kr_errstatus); 1918 estat = qib_read_kreg64(dd, kr_errstatus);
1923 if (!estat) 1919 if (!estat)
1924 qib_devinfo(dd->pcidev, 1920 qib_devinfo(dd->pcidev, "error interrupt (%Lx), "
1925 "error interrupt (%Lx), but no error bits set!\n", 1921 "but no error bits set!\n", istat);
1926 istat);
1927 else 1922 else
1928 handle_7220_errors(dd, estat); 1923 handle_7220_errors(dd, estat);
1929 } 1924 }
@@ -2027,18 +2022,17 @@ bail:
2027static void qib_setup_7220_interrupt(struct qib_devdata *dd) 2022static void qib_setup_7220_interrupt(struct qib_devdata *dd)
2028{ 2023{
2029 if (!dd->cspec->irq) 2024 if (!dd->cspec->irq)
2030 qib_dev_err(dd, 2025 qib_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
2031 "irq is 0, BIOS error? Interrupts won't work\n"); 2026 "work\n");
2032 else { 2027 else {
2033 int ret = request_irq(dd->cspec->irq, qib_7220intr, 2028 int ret = request_irq(dd->cspec->irq, qib_7220intr,
2034 dd->msi_lo ? 0 : IRQF_SHARED, 2029 dd->msi_lo ? 0 : IRQF_SHARED,
2035 QIB_DRV_NAME, dd); 2030 QIB_DRV_NAME, dd);
2036 2031
2037 if (ret) 2032 if (ret)
2038 qib_dev_err(dd, 2033 qib_dev_err(dd, "Couldn't setup %s interrupt "
2039 "Couldn't setup %s interrupt (irq=%d): %d\n", 2034 "(irq=%d): %d\n", dd->msi_lo ?
2040 dd->msi_lo ? "MSI" : "INTx", 2035 "MSI" : "INTx", dd->cspec->irq, ret);
2041 dd->cspec->irq, ret);
2042 } 2036 }
2043} 2037}
2044 2038
@@ -2077,9 +2071,9 @@ static void qib_7220_boardname(struct qib_devdata *dd)
2077 snprintf(dd->boardname, namelen, "%s", n); 2071 snprintf(dd->boardname, namelen, "%s", n);
2078 2072
2079 if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2) 2073 if (dd->majrev != 5 || !dd->minrev || dd->minrev > 2)
2080 qib_dev_err(dd, 2074 qib_dev_err(dd, "Unsupported InfiniPath hardware "
2081 "Unsupported InfiniPath hardware revision %u.%u!\n", 2075 "revision %u.%u!\n",
2082 dd->majrev, dd->minrev); 2076 dd->majrev, dd->minrev);
2083 2077
2084 snprintf(dd->boardversion, sizeof(dd->boardversion), 2078 snprintf(dd->boardversion, sizeof(dd->boardversion),
2085 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n", 2079 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, SW Compat %u\n",
@@ -2151,8 +2145,8 @@ static int qib_setup_7220_reset(struct qib_devdata *dd)
2151bail: 2145bail:
2152 if (ret) { 2146 if (ret) {
2153 if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL)) 2147 if (qib_pcie_params(dd, dd->lbus_width, NULL, NULL))
2154 qib_dev_err(dd, 2148 qib_dev_err(dd, "Reset failed to setup PCIe or "
2155 "Reset failed to setup PCIe or interrupts; continuing anyway\n"); 2149 "interrupts; continuing anyway\n");
2156 2150
2157 /* hold IBC in reset, no sends, etc till later */ 2151 /* hold IBC in reset, no sends, etc till later */
2158 qib_write_kreg(dd, kr_control, 0ULL); 2152 qib_write_kreg(dd, kr_control, 0ULL);
@@ -2192,9 +2186,8 @@ static void qib_7220_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
2192 return; 2186 return;
2193 } 2187 }
2194 if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) { 2188 if (chippa >= (1UL << IBA7220_TID_SZ_SHIFT)) {
2195 qib_dev_err(dd, 2189 qib_dev_err(dd, "Physical page address 0x%lx "
2196 "Physical page address 0x%lx larger than supported\n", 2190 "larger than supported\n", pa);
2197 pa);
2198 return; 2191 return;
2199 } 2192 }
2200 2193
@@ -2712,9 +2705,8 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what)
2712 ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback); 2705 ppd->cpspec->ibcctrl &= ~SYM_MASK(IBCCtrl, Loopback);
2713 /* enable heart beat again */ 2706 /* enable heart beat again */
2714 val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT; 2707 val = IBA7220_IBC_HRTBT_MASK << IBA7220_IBC_HRTBT_SHIFT;
2715 qib_devinfo(ppd->dd->pcidev, 2708 qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
2716 "Disabling IB%u:%u IBC loopback (normal)\n", 2709 "(normal)\n", ppd->dd->unit, ppd->port);
2717 ppd->dd->unit, ppd->port);
2718 } else 2710 } else
2719 ret = -EINVAL; 2711 ret = -EINVAL;
2720 if (!ret) { 2712 if (!ret) {
@@ -3314,8 +3306,8 @@ static int qib_7220_intr_fallback(struct qib_devdata *dd)
3314 if (!dd->msi_lo) 3306 if (!dd->msi_lo)
3315 return 0; 3307 return 0;
3316 3308
3317 qib_devinfo(dd->pcidev, 3309 qib_devinfo(dd->pcidev, "MSI interrupt not detected,"
3318 "MSI interrupt not detected, trying INTx interrupts\n"); 3310 " trying INTx interrupts\n");
3319 qib_7220_free_irq(dd); 3311 qib_7220_free_irq(dd);
3320 qib_enable_intx(dd->pcidev); 3312 qib_enable_intx(dd->pcidev);
3321 /* 3313 /*
@@ -3987,10 +3979,11 @@ static int qib_late_7220_initreg(struct qib_devdata *dd)
3987 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); 3979 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
3988 val = qib_read_kreg64(dd, kr_sendpioavailaddr); 3980 val = qib_read_kreg64(dd, kr_sendpioavailaddr);
3989 if (val != dd->pioavailregs_phys) { 3981 if (val != dd->pioavailregs_phys) {
3990 qib_dev_err(dd, 3982 qib_dev_err(dd, "Catastrophic software error, "
3991 "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", 3983 "SendPIOAvailAddr written as %lx, "
3992 (unsigned long) dd->pioavailregs_phys, 3984 "read back as %llx\n",
3993 (unsigned long long) val); 3985 (unsigned long) dd->pioavailregs_phys,
3986 (unsigned long long) val);
3994 ret = -EINVAL; 3987 ret = -EINVAL;
3995 } 3988 }
3996 qib_register_observer(dd, &sendctrl_observer); 3989 qib_register_observer(dd, &sendctrl_observer);
@@ -4020,8 +4013,8 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
4020 dd->revision = readq(&dd->kregbase[kr_revision]); 4013 dd->revision = readq(&dd->kregbase[kr_revision]);
4021 4014
4022 if ((dd->revision & 0xffffffffU) == 0xffffffffU) { 4015 if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
4023 qib_dev_err(dd, 4016 qib_dev_err(dd, "Revision register read failure, "
4024 "Revision register read failure, giving up initialization\n"); 4017 "giving up initialization\n");
4025 ret = -ENODEV; 4018 ret = -ENODEV;
4026 goto bail; 4019 goto bail;
4027 } 4020 }
@@ -4094,8 +4087,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
4094 /* we always allocate at least 2048 bytes for eager buffers */ 4087 /* we always allocate at least 2048 bytes for eager buffers */
4095 ret = ib_mtu_enum_to_int(qib_ibmtu); 4088 ret = ib_mtu_enum_to_int(qib_ibmtu);
4096 dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU; 4089 dd->rcvegrbufsize = ret != -1 ? max(ret, 2048) : QIB_DEFAULT_MTU;
4097 BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
4098 dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
4099 4090
4100 qib_7220_tidtemplate(dd); 4091 qib_7220_tidtemplate(dd);
4101 4092
@@ -4163,7 +4154,6 @@ static int qib_init_7220_variables(struct qib_devdata *dd)
4163 dd->cspec->sdmabufcnt; 4154 dd->cspec->sdmabufcnt;
4164 dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; 4155 dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
4165 dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ 4156 dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
4166 dd->last_pio = dd->cspec->lastbuf_for_pio;
4167 dd->pbufsctxt = dd->lastctxt_piobuf / 4157 dd->pbufsctxt = dd->lastctxt_piobuf /
4168 (dd->cfgctxts - dd->first_user_ctxt); 4158 (dd->cfgctxts - dd->first_user_ctxt);
4169 4159
@@ -4619,8 +4609,8 @@ struct qib_devdata *qib_init_iba7220_funcs(struct pci_dev *pdev,
4619 break; 4609 break;
4620 } 4610 }
4621 if (qib_pcie_params(dd, minwidth, NULL, NULL)) 4611 if (qib_pcie_params(dd, minwidth, NULL, NULL))
4622 qib_dev_err(dd, 4612 qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
4623 "Failed to setup PCIe or interrupts; continuing anyway\n"); 4613 "continuing anyway\n");
4624 4614
4625 /* save IRQ for possible later use */ 4615 /* save IRQ for possible later use */
4626 dd->cspec->irq = pdev->irq; 4616 dd->cspec->irq = pdev->irq;
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 3f6b21e9dc1..b7c3c7df268 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2008 - 2012 QLogic Corporation. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -41,7 +40,6 @@
41#include <linux/delay.h> 40#include <linux/delay.h>
42#include <linux/io.h> 41#include <linux/io.h>
43#include <linux/jiffies.h> 42#include <linux/jiffies.h>
44#include <linux/module.h>
45#include <rdma/ib_verbs.h> 43#include <rdma/ib_verbs.h>
46#include <rdma/ib_smi.h> 44#include <rdma/ib_smi.h>
47 45
@@ -50,10 +48,6 @@
50#include "qib_qsfp.h" 48#include "qib_qsfp.h"
51 49
52#include "qib_mad.h" 50#include "qib_mad.h"
53#include "qib_verbs.h"
54
55#undef pr_fmt
56#define pr_fmt(fmt) QIB_DRV_NAME " " fmt
57 51
58static void qib_setup_7322_setextled(struct qib_pportdata *, u32); 52static void qib_setup_7322_setextled(struct qib_pportdata *, u32);
59static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t); 53static void qib_7322_handle_hwerrors(struct qib_devdata *, char *, size_t);
@@ -546,7 +540,8 @@ struct qib_chip_specific {
546 u32 lastbuf_for_pio; 540 u32 lastbuf_for_pio;
547 u32 stay_in_freeze; 541 u32 stay_in_freeze;
548 u32 recovery_ports_initted; 542 u32 recovery_ports_initted;
549 struct qib_msix_entry *msix_entries; 543 struct msix_entry *msix_entries;
544 void **msix_arg;
550 unsigned long *sendchkenable; 545 unsigned long *sendchkenable;
551 unsigned long *sendgrhchk; 546 unsigned long *sendgrhchk;
552 unsigned long *sendibchk; 547 unsigned long *sendibchk;
@@ -619,8 +614,8 @@ struct qib_chippport_specific {
619 u64 ibmalfsnap; 614 u64 ibmalfsnap;
620 u64 ibcctrl_a; /* krp_ibcctrl_a shadow */ 615 u64 ibcctrl_a; /* krp_ibcctrl_a shadow */
621 u64 ibcctrl_b; /* krp_ibcctrl_b shadow */ 616 u64 ibcctrl_b; /* krp_ibcctrl_b shadow */
622 unsigned long qdr_dfe_time; 617 u64 qdr_dfe_time;
623 unsigned long chase_end; 618 u64 chase_end;
624 u32 autoneg_tries; 619 u32 autoneg_tries;
625 u32 recovery_init; 620 u32 recovery_init;
626 u32 qdr_dfe_on; 621 u32 qdr_dfe_on;
@@ -643,24 +638,24 @@ static struct {
643 int lsb; 638 int lsb;
644 int port; /* 0 if not port-specific, else port # */ 639 int port; /* 0 if not port-specific, else port # */
645} irq_table[] = { 640} irq_table[] = {
646 { "", qib_7322intr, -1, 0 }, 641 { QIB_DRV_NAME, qib_7322intr, -1, 0 },
647 { " (buf avail)", qib_7322bufavail, 642 { QIB_DRV_NAME " (buf avail)", qib_7322bufavail,
648 SYM_LSB(IntStatus, SendBufAvail), 0 }, 643 SYM_LSB(IntStatus, SendBufAvail), 0 },
649 { " (sdma 0)", sdma_intr, 644 { QIB_DRV_NAME " (sdma 0)", sdma_intr,
650 SYM_LSB(IntStatus, SDmaInt_0), 1 }, 645 SYM_LSB(IntStatus, SDmaInt_0), 1 },
651 { " (sdma 1)", sdma_intr, 646 { QIB_DRV_NAME " (sdma 1)", sdma_intr,
652 SYM_LSB(IntStatus, SDmaInt_1), 2 }, 647 SYM_LSB(IntStatus, SDmaInt_1), 2 },
653 { " (sdmaI 0)", sdma_idle_intr, 648 { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr,
654 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, 649 SYM_LSB(IntStatus, SDmaIdleInt_0), 1 },
655 { " (sdmaI 1)", sdma_idle_intr, 650 { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr,
656 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, 651 SYM_LSB(IntStatus, SDmaIdleInt_1), 2 },
657 { " (sdmaP 0)", sdma_progress_intr, 652 { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr,
658 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, 653 SYM_LSB(IntStatus, SDmaProgressInt_0), 1 },
659 { " (sdmaP 1)", sdma_progress_intr, 654 { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr,
660 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, 655 SYM_LSB(IntStatus, SDmaProgressInt_1), 2 },
661 { " (sdmaC 0)", sdma_cleanup_intr, 656 { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr,
662 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, 657 SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 },
663 { " (sdmaC 1)", sdma_cleanup_intr, 658 { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr,
664 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, 659 SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 },
665}; 660};
666 661
@@ -1580,8 +1575,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
1580 qib_stats.sps_errints++; 1575 qib_stats.sps_errints++;
1581 errs = qib_read_kreg64(dd, kr_errstatus); 1576 errs = qib_read_kreg64(dd, kr_errstatus);
1582 if (!errs) { 1577 if (!errs) {
1583 qib_devinfo(dd->pcidev, 1578 qib_devinfo(dd->pcidev, "device error interrupt, "
1584 "device error interrupt, but no error bits set!\n"); 1579 "but no error bits set!\n");
1585 goto done; 1580 goto done;
1586 } 1581 }
1587 1582
@@ -1627,8 +1622,8 @@ static noinline void handle_7322_errors(struct qib_devdata *dd)
1627 if (errs & QIB_E_RESET) { 1622 if (errs & QIB_E_RESET) {
1628 int pidx; 1623 int pidx;
1629 1624
1630 qib_dev_err(dd, 1625 qib_dev_err(dd, "Got reset, requires re-init "
1631 "Got reset, requires re-init (unload and reload driver)\n"); 1626 "(unload and reload driver)\n");
1632 dd->flags &= ~QIB_INITTED; /* needs re-init */ 1627 dd->flags &= ~QIB_INITTED; /* needs re-init */
1633 /* mark as having had error */ 1628 /* mark as having had error */
1634 *dd->devstatusp |= QIB_STATUS_HWERROR; 1629 *dd->devstatusp |= QIB_STATUS_HWERROR;
@@ -1676,8 +1671,7 @@ static void reenable_chase(unsigned long opaque)
1676 QLOGIC_IB_IBCC_LINKINITCMD_POLL); 1671 QLOGIC_IB_IBCC_LINKINITCMD_POLL);
1677} 1672}
1678 1673
1679static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow, 1674static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt)
1680 u8 ibclt)
1681{ 1675{
1682 ppd->cpspec->chase_end = 0; 1676 ppd->cpspec->chase_end = 0;
1683 1677
@@ -1693,7 +1687,7 @@ static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow,
1693static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) 1687static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
1694{ 1688{
1695 u8 ibclt; 1689 u8 ibclt;
1696 unsigned long tnow; 1690 u64 tnow;
1697 1691
1698 ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState); 1692 ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState);
1699 1693
@@ -1708,9 +1702,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
1708 case IB_7322_LT_STATE_CFGWAITRMT: 1702 case IB_7322_LT_STATE_CFGWAITRMT:
1709 case IB_7322_LT_STATE_TXREVLANES: 1703 case IB_7322_LT_STATE_TXREVLANES:
1710 case IB_7322_LT_STATE_CFGENH: 1704 case IB_7322_LT_STATE_CFGENH:
1711 tnow = jiffies; 1705 tnow = get_jiffies_64();
1712 if (ppd->cpspec->chase_end && 1706 if (ppd->cpspec->chase_end &&
1713 time_after(tnow, ppd->cpspec->chase_end)) 1707 time_after64(tnow, ppd->cpspec->chase_end))
1714 disable_chase(ppd, tnow, ibclt); 1708 disable_chase(ppd, tnow, ibclt);
1715 else if (!ppd->cpspec->chase_end) 1709 else if (!ppd->cpspec->chase_end)
1716 ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; 1710 ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME;
@@ -1765,9 +1759,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst)
1765 ppd->dd->cspec->r1 ? 1759 ppd->dd->cspec->r1 ?
1766 QDR_STATIC_ADAPT_DOWN_R1 : 1760 QDR_STATIC_ADAPT_DOWN_R1 :
1767 QDR_STATIC_ADAPT_DOWN); 1761 QDR_STATIC_ADAPT_DOWN);
1768 pr_info( 1762 printk(KERN_INFO QIB_DRV_NAME
1769 "IB%u:%u re-enabled QDR adaptation ibclt %x\n", 1763 " IB%u:%u re-enabled QDR adaptation "
1770 ppd->dd->unit, ppd->port, ibclt); 1764 "ibclt %x\n", ppd->dd->unit, ppd->port, ibclt);
1771 } 1765 }
1772 } 1766 }
1773} 1767}
@@ -1809,9 +1803,9 @@ static noinline void handle_7322_p_errors(struct qib_pportdata *ppd)
1809 if (!*msg) 1803 if (!*msg)
1810 snprintf(msg, sizeof ppd->cpspec->epmsgbuf, 1804 snprintf(msg, sizeof ppd->cpspec->epmsgbuf,
1811 "no others"); 1805 "no others");
1812 qib_dev_porterr(dd, ppd->port, 1806 qib_dev_porterr(dd, ppd->port, "error interrupt with unknown"
1813 "error interrupt with unknown errors 0x%016Lx set (and %s)\n", 1807 " errors 0x%016Lx set (and %s)\n",
1814 (errs & ~QIB_E_P_BITSEXTANT), msg); 1808 (errs & ~QIB_E_P_BITSEXTANT), msg);
1815 *msg = '\0'; 1809 *msg = '\0';
1816 } 1810 }
1817 1811
@@ -2029,8 +2023,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2029 if (!hwerrs) 2023 if (!hwerrs)
2030 goto bail; 2024 goto bail;
2031 if (hwerrs == ~0ULL) { 2025 if (hwerrs == ~0ULL) {
2032 qib_dev_err(dd, 2026 qib_dev_err(dd, "Read of hardware error status failed "
2033 "Read of hardware error status failed (all bits set); ignoring\n"); 2027 "(all bits set); ignoring\n");
2034 goto bail; 2028 goto bail;
2035 } 2029 }
2036 qib_stats.sps_hwerrs++; 2030 qib_stats.sps_hwerrs++;
@@ -2044,9 +2038,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2044 /* no EEPROM logging, yet */ 2038 /* no EEPROM logging, yet */
2045 2039
2046 if (hwerrs) 2040 if (hwerrs)
2047 qib_devinfo(dd->pcidev, 2041 qib_devinfo(dd->pcidev, "Hardware error: hwerr=0x%llx "
2048 "Hardware error: hwerr=0x%llx (cleared)\n", 2042 "(cleared)\n", (unsigned long long) hwerrs);
2049 (unsigned long long) hwerrs);
2050 2043
2051 ctrl = qib_read_kreg32(dd, kr_control); 2044 ctrl = qib_read_kreg32(dd, kr_control);
2052 if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) { 2045 if ((ctrl & SYM_MASK(Control, FreezeMode)) && !dd->diag_client) {
@@ -2070,9 +2063,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2070 2063
2071 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { 2064 if (hwerrs & HWE_MASK(PowerOnBISTFailed)) {
2072 isfatal = 1; 2065 isfatal = 1;
2073 strlcpy(msg, 2066 strlcpy(msg, "[Memory BIST test failed, "
2074 "[Memory BIST test failed, InfiniPath hardware unusable]", 2067 "InfiniPath hardware unusable]", msgl);
2075 msgl);
2076 /* ignore from now on, so disable until driver reloaded */ 2068 /* ignore from now on, so disable until driver reloaded */
2077 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed); 2069 dd->cspec->hwerrmask &= ~HWE_MASK(PowerOnBISTFailed);
2078 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask); 2070 qib_write_kreg(dd, kr_hwerrmask, dd->cspec->hwerrmask);
@@ -2085,9 +2077,8 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg,
2085 qib_dev_err(dd, "%s hardware error\n", msg); 2077 qib_dev_err(dd, "%s hardware error\n", msg);
2086 2078
2087 if (isfatal && !dd->diag_client) { 2079 if (isfatal && !dd->diag_client) {
2088 qib_dev_err(dd, 2080 qib_dev_err(dd, "Fatal Hardware Error, no longer"
2089 "Fatal Hardware Error, no longer usable, SN %.16s\n", 2081 " usable, SN %.16s\n", dd->serial);
2090 dd->serial);
2091 /* 2082 /*
2092 * for /sys status file and user programs to print; if no 2083 * for /sys status file and user programs to print; if no
2093 * trailing brace is copied, we'll know it was truncated. 2084 * trailing brace is copied, we'll know it was truncated.
@@ -2315,11 +2306,16 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
2315 SYM_LSB(IBCCtrlA_0, MaxPktLen); 2306 SYM_LSB(IBCCtrlA_0, MaxPktLen);
2316 ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */ 2307 ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */
2317 2308
2309 /* initially come up waiting for TS1, without sending anything. */
2310 val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
2311 QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
2312
2318 /* 2313 /*
2319 * Reset the PCS interface to the serdes (and also ibc, which is still 2314 * Reset the PCS interface to the serdes (and also ibc, which is still
2320 * in reset from above). Writes new value of ibcctrl_a as last step. 2315 * in reset from above). Writes new value of ibcctrl_a as last step.
2321 */ 2316 */
2322 qib_7322_mini_pcs_reset(ppd); 2317 qib_7322_mini_pcs_reset(ppd);
2318 qib_write_kreg(dd, kr_scratch, 0ULL);
2323 2319
2324 if (!ppd->cpspec->ibcctrl_b) { 2320 if (!ppd->cpspec->ibcctrl_b) {
2325 unsigned lse = ppd->link_speed_enabled; 2321 unsigned lse = ppd->link_speed_enabled;
@@ -2385,20 +2381,17 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd)
2385 ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn); 2381 ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn);
2386 set_vls(ppd); 2382 set_vls(ppd);
2387 2383
2388 /* initially come up DISABLED, without sending anything. */
2389 val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE <<
2390 QLOGIC_IB_IBCC_LINKINITCMD_SHIFT);
2391 qib_write_kreg_port(ppd, krp_ibcctrl_a, val);
2392 qib_write_kreg(dd, kr_scratch, 0ULL);
2393 /* clear the linkinit cmds */
2394 ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd);
2395
2396 /* be paranoid against later code motion, etc. */ 2384 /* be paranoid against later code motion, etc. */
2397 spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); 2385 spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags);
2398 ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable); 2386 ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable);
2399 qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl); 2387 qib_write_kreg_port(ppd, krp_rcvctrl, ppd->p_rcvctrl);
2400 spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags); 2388 spin_unlock_irqrestore(&dd->cspec->rcvmod_lock, flags);
2401 2389
2390 /* Hold the link state machine for mezz boards */
2391 if (IS_QMH(dd) || IS_QME(dd))
2392 qib_set_ib_7322_lstate(ppd, 0,
2393 QLOGIC_IB_IBCC_LINKINITCMD_DISABLE);
2394
2402 /* Also enable IBSTATUSCHG interrupt. */ 2395 /* Also enable IBSTATUSCHG interrupt. */
2403 val = qib_read_kreg_port(ppd, krp_errmask); 2396 val = qib_read_kreg_port(ppd, krp_errmask);
2404 qib_write_kreg_port(ppd, krp_errmask, 2397 qib_write_kreg_port(ppd, krp_errmask,
@@ -2574,13 +2567,9 @@ static void qib_7322_nomsix(struct qib_devdata *dd)
2574 int i; 2567 int i;
2575 2568
2576 dd->cspec->num_msix_entries = 0; 2569 dd->cspec->num_msix_entries = 0;
2577 for (i = 0; i < n; i++) { 2570 for (i = 0; i < n; i++)
2578 irq_set_affinity_hint( 2571 free_irq(dd->cspec->msix_entries[i].vector,
2579 dd->cspec->msix_entries[i].msix.vector, NULL); 2572 dd->cspec->msix_arg[i]);
2580 free_cpumask_var(dd->cspec->msix_entries[i].mask);
2581 free_irq(dd->cspec->msix_entries[i].msix.vector,
2582 dd->cspec->msix_entries[i].arg);
2583 }
2584 qib_nomsix(dd); 2573 qib_nomsix(dd);
2585 } 2574 }
2586 /* make sure no MSIx interrupts are left pending */ 2575 /* make sure no MSIx interrupts are left pending */
@@ -2608,6 +2597,7 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
2608 kfree(dd->cspec->sendgrhchk); 2597 kfree(dd->cspec->sendgrhchk);
2609 kfree(dd->cspec->sendibchk); 2598 kfree(dd->cspec->sendibchk);
2610 kfree(dd->cspec->msix_entries); 2599 kfree(dd->cspec->msix_entries);
2600 kfree(dd->cspec->msix_arg);
2611 for (i = 0; i < dd->num_pports; i++) { 2601 for (i = 0; i < dd->num_pports; i++) {
2612 unsigned long flags; 2602 unsigned long flags;
2613 u32 mask = QSFP_GPIO_MOD_PRS_N | 2603 u32 mask = QSFP_GPIO_MOD_PRS_N |
@@ -2675,9 +2665,8 @@ static noinline void unknown_7322_ibits(struct qib_devdata *dd, u64 istat)
2675 char msg[128]; 2665 char msg[128];
2676 2666
2677 kills = istat & ~QIB_I_BITSEXTANT; 2667 kills = istat & ~QIB_I_BITSEXTANT;
2678 qib_dev_err(dd, 2668 qib_dev_err(dd, "Clearing reserved interrupt(s) 0x%016llx:"
2679 "Clearing reserved interrupt(s) 0x%016llx: %s\n", 2669 " %s\n", (unsigned long long) kills, msg);
2680 (unsigned long long) kills, msg);
2681 qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills)); 2670 qib_write_kreg(dd, kr_intmask, (dd->cspec->int_enable_mask & ~kills));
2682} 2671}
2683 2672
@@ -2726,7 +2715,7 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd)
2726 pins >>= SYM_LSB(EXTStatus, GPIOIn); 2715 pins >>= SYM_LSB(EXTStatus, GPIOIn);
2727 if (!(pins & mask)) { 2716 if (!(pins & mask)) {
2728 ++handled; 2717 ++handled;
2729 qd->t_insert = jiffies; 2718 qd->t_insert = get_jiffies_64();
2730 queue_work(ib_wq, &qd->work); 2719 queue_work(ib_wq, &qd->work);
2731 } 2720 }
2732 } 2721 }
@@ -2864,8 +2853,9 @@ static irqreturn_t qib_7322intr(int irq, void *data)
2864 for (i = 0; i < dd->first_user_ctxt; i++) { 2853 for (i = 0; i < dd->first_user_ctxt; i++) {
2865 if (ctxtrbits & rmask) { 2854 if (ctxtrbits & rmask) {
2866 ctxtrbits &= ~rmask; 2855 ctxtrbits &= ~rmask;
2867 if (dd->rcd[i]) 2856 if (dd->rcd[i]) {
2868 qib_kreceive(dd->rcd[i], NULL, &npkts); 2857 qib_kreceive(dd->rcd[i], NULL, &npkts);
2858 }
2869 } 2859 }
2870 rmask <<= 1; 2860 rmask <<= 1;
2871 } 2861 }
@@ -3081,8 +3071,6 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
3081 int ret, i, msixnum; 3071 int ret, i, msixnum;
3082 u64 redirect[6]; 3072 u64 redirect[6];
3083 u64 mask; 3073 u64 mask;
3084 const struct cpumask *local_mask;
3085 int firstcpu, secondcpu = 0, currrcvcpu = 0;
3086 3074
3087 if (!dd->num_pports) 3075 if (!dd->num_pports)
3088 return; 3076 return;
@@ -3110,16 +3098,16 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend)
3110 /* Try to get INTx interrupt */ 3098 /* Try to get INTx interrupt */
3111try_intx: 3099try_intx:
3112 if (!dd->pcidev->irq) { 3100 if (!dd->pcidev->irq) {
3113 qib_dev_err(dd, 3101 qib_dev_err(dd, "irq is 0, BIOS error? "
3114 "irq is 0, BIOS error? Interrupts won't work\n"); 3102 "Interrupts won't work\n");
3115 goto bail; 3103 goto bail;
3116 } 3104 }
3117 ret = request_irq(dd->pcidev->irq, qib_7322intr, 3105 ret = request_irq(dd->pcidev->irq, qib_7322intr,
3118 IRQF_SHARED, QIB_DRV_NAME, dd); 3106 IRQF_SHARED, QIB_DRV_NAME, dd);
3119 if (ret) { 3107 if (ret) {
3120 qib_dev_err(dd, 3108 qib_dev_err(dd, "Couldn't setup INTx "
3121 "Couldn't setup INTx interrupt (irq=%d): %d\n", 3109 "interrupt (irq=%d): %d\n",
3122 dd->pcidev->irq, ret); 3110 dd->pcidev->irq, ret);
3123 goto bail; 3111 goto bail;
3124 } 3112 }
3125 dd->cspec->irq = dd->pcidev->irq; 3113 dd->cspec->irq = dd->pcidev->irq;
@@ -3131,28 +3119,13 @@ try_intx:
3131 memset(redirect, 0, sizeof redirect); 3119 memset(redirect, 0, sizeof redirect);
3132 mask = ~0ULL; 3120 mask = ~0ULL;
3133 msixnum = 0; 3121 msixnum = 0;
3134 local_mask = cpumask_of_pcibus(dd->pcidev->bus);
3135 firstcpu = cpumask_first(local_mask);
3136 if (firstcpu >= nr_cpu_ids ||
3137 cpumask_weight(local_mask) == num_online_cpus()) {
3138 local_mask = topology_core_cpumask(0);
3139 firstcpu = cpumask_first(local_mask);
3140 }
3141 if (firstcpu < nr_cpu_ids) {
3142 secondcpu = cpumask_next(firstcpu, local_mask);
3143 if (secondcpu >= nr_cpu_ids)
3144 secondcpu = firstcpu;
3145 currrcvcpu = secondcpu;
3146 }
3147 for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { 3122 for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) {
3148 irq_handler_t handler; 3123 irq_handler_t handler;
3124 const char *name;
3149 void *arg; 3125 void *arg;
3150 u64 val; 3126 u64 val;
3151 int lsb, reg, sh; 3127 int lsb, reg, sh;
3152 3128
3153 dd->cspec->msix_entries[msixnum].
3154 name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1]
3155 = '\0';
3156 if (i < ARRAY_SIZE(irq_table)) { 3129 if (i < ARRAY_SIZE(irq_table)) {
3157 if (irq_table[i].port) { 3130 if (irq_table[i].port) {
3158 /* skip if for a non-configured port */ 3131 /* skip if for a non-configured port */
@@ -3163,11 +3136,7 @@ try_intx:
3163 arg = dd; 3136 arg = dd;
3164 lsb = irq_table[i].lsb; 3137 lsb = irq_table[i].lsb;
3165 handler = irq_table[i].handler; 3138 handler = irq_table[i].handler;
3166 snprintf(dd->cspec->msix_entries[msixnum].name, 3139 name = irq_table[i].name;
3167 sizeof(dd->cspec->msix_entries[msixnum].name)
3168 - 1,
3169 QIB_DRV_NAME "%d%s", dd->unit,
3170 irq_table[i].name);
3171 } else { 3140 } else {
3172 unsigned ctxt; 3141 unsigned ctxt;
3173 3142
@@ -3180,29 +3149,23 @@ try_intx:
3180 continue; 3149 continue;
3181 lsb = QIB_I_RCVAVAIL_LSB + ctxt; 3150 lsb = QIB_I_RCVAVAIL_LSB + ctxt;
3182 handler = qib_7322pintr; 3151 handler = qib_7322pintr;
3183 snprintf(dd->cspec->msix_entries[msixnum].name, 3152 name = QIB_DRV_NAME " (kctx)";
3184 sizeof(dd->cspec->msix_entries[msixnum].name)
3185 - 1,
3186 QIB_DRV_NAME "%d (kctx)", dd->unit);
3187 } 3153 }
3188 ret = request_irq( 3154 ret = request_irq(dd->cspec->msix_entries[msixnum].vector,
3189 dd->cspec->msix_entries[msixnum].msix.vector, 3155 handler, 0, name, arg);
3190 handler, 0, dd->cspec->msix_entries[msixnum].name,
3191 arg);
3192 if (ret) { 3156 if (ret) {
3193 /* 3157 /*
3194 * Shouldn't happen since the enable said we could 3158 * Shouldn't happen since the enable said we could
3195 * have as many as we are trying to setup here. 3159 * have as many as we are trying to setup here.
3196 */ 3160 */
3197 qib_dev_err(dd, 3161 qib_dev_err(dd, "Couldn't setup MSIx "
3198 "Couldn't setup MSIx interrupt (vec=%d, irq=%d): %d\n", 3162 "interrupt (vec=%d, irq=%d): %d\n", msixnum,
3199 msixnum, 3163 dd->cspec->msix_entries[msixnum].vector,
3200 dd->cspec->msix_entries[msixnum].msix.vector, 3164 ret);
3201 ret);
3202 qib_7322_nomsix(dd); 3165 qib_7322_nomsix(dd);
3203 goto try_intx; 3166 goto try_intx;
3204 } 3167 }
3205 dd->cspec->msix_entries[msixnum].arg = arg; 3168 dd->cspec->msix_arg[msixnum] = arg;
3206 if (lsb >= 0) { 3169 if (lsb >= 0) {
3207 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; 3170 reg = lsb / IBA7322_REDIRECT_VEC_PER_REG;
3208 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * 3171 sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) *
@@ -3212,25 +3175,6 @@ try_intx:
3212 } 3175 }
3213 val = qib_read_kreg64(dd, 2 * msixnum + 1 + 3176 val = qib_read_kreg64(dd, 2 * msixnum + 1 +
3214 (QIB_7322_MsixTable_OFFS / sizeof(u64))); 3177 (QIB_7322_MsixTable_OFFS / sizeof(u64)));
3215 if (firstcpu < nr_cpu_ids &&
3216 zalloc_cpumask_var(
3217 &dd->cspec->msix_entries[msixnum].mask,
3218 GFP_KERNEL)) {
3219 if (handler == qib_7322pintr) {
3220 cpumask_set_cpu(currrcvcpu,
3221 dd->cspec->msix_entries[msixnum].mask);
3222 currrcvcpu = cpumask_next(currrcvcpu,
3223 local_mask);
3224 if (currrcvcpu >= nr_cpu_ids)
3225 currrcvcpu = secondcpu;
3226 } else {
3227 cpumask_set_cpu(firstcpu,
3228 dd->cspec->msix_entries[msixnum].mask);
3229 }
3230 irq_set_affinity_hint(
3231 dd->cspec->msix_entries[msixnum].msix.vector,
3232 dd->cspec->msix_entries[msixnum].mask);
3233 }
3234 msixnum++; 3178 msixnum++;
3235 } 3179 }
3236 /* Initialize the vector mapping */ 3180 /* Initialize the vector mapping */
@@ -3315,9 +3259,8 @@ static unsigned qib_7322_boardname(struct qib_devdata *dd)
3315 (unsigned)SYM_FIELD(dd->revision, Revision_R, SW)); 3259 (unsigned)SYM_FIELD(dd->revision, Revision_R, SW));
3316 3260
3317 if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) { 3261 if (qib_singleport && (features >> PORT_SPD_CAP_SHIFT) & PORT_SPD_CAP) {
3318 qib_devinfo(dd->pcidev, 3262 qib_devinfo(dd->pcidev, "IB%u: Forced to single port mode"
3319 "IB%u: Forced to single port mode by module parameter\n", 3263 " by module parameter\n", dd->unit);
3320 dd->unit);
3321 features &= PORT_SPD_CAP; 3264 features &= PORT_SPD_CAP;
3322 } 3265 }
3323 3266
@@ -3411,8 +3354,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
3411 if (val == dd->revision) 3354 if (val == dd->revision)
3412 break; 3355 break;
3413 if (i == 5) { 3356 if (i == 5) {
3414 qib_dev_err(dd, 3357 qib_dev_err(dd, "Failed to initialize after reset, "
3415 "Failed to initialize after reset, unusable\n"); 3358 "unusable\n");
3416 ret = 0; 3359 ret = 0;
3417 goto bail; 3360 goto bail;
3418 } 3361 }
@@ -3423,7 +3366,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
3423 if (msix_entries) { 3366 if (msix_entries) {
3424 /* restore the MSIx vector address and data if saved above */ 3367 /* restore the MSIx vector address and data if saved above */
3425 for (i = 0; i < msix_entries; i++) { 3368 for (i = 0; i < msix_entries; i++) {
3426 dd->cspec->msix_entries[i].msix.entry = i; 3369 dd->cspec->msix_entries[i].entry = i;
3427 if (!msix_vecsave || !msix_vecsave[2 * i]) 3370 if (!msix_vecsave || !msix_vecsave[2 * i])
3428 continue; 3371 continue;
3429 qib_write_kreg(dd, 2 * i + 3372 qib_write_kreg(dd, 2 * i +
@@ -3443,8 +3386,8 @@ static int qib_do_7322_reset(struct qib_devdata *dd)
3443 if (qib_pcie_params(dd, dd->lbus_width, 3386 if (qib_pcie_params(dd, dd->lbus_width,
3444 &dd->cspec->num_msix_entries, 3387 &dd->cspec->num_msix_entries,
3445 dd->cspec->msix_entries)) 3388 dd->cspec->msix_entries))
3446 qib_dev_err(dd, 3389 qib_dev_err(dd, "Reset failed to setup PCIe or interrupts; "
3447 "Reset failed to setup PCIe or interrupts; continuing anyway\n"); 3390 "continuing anyway\n");
3448 3391
3449 qib_setup_7322_interrupt(dd, 1); 3392 qib_setup_7322_interrupt(dd, 1);
3450 3393
@@ -3485,9 +3428,8 @@ static void qib_7322_put_tid(struct qib_devdata *dd, u64 __iomem *tidptr,
3485 return; 3428 return;
3486 } 3429 }
3487 if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) { 3430 if (chippa >= (1UL << IBA7322_TID_SZ_SHIFT)) {
3488 qib_dev_err(dd, 3431 qib_dev_err(dd, "Physical page address 0x%lx "
3489 "Physical page address 0x%lx larger than supported\n", 3432 "larger than supported\n", pa);
3490 pa);
3491 return; 3433 return;
3492 } 3434 }
3493 3435
@@ -3662,7 +3604,7 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd)
3662 if (qib_rcvhdrcnt) 3604 if (qib_rcvhdrcnt)
3663 dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt); 3605 dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt);
3664 else 3606 else
3665 dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt, 3607 dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt,
3666 dd->num_pports > 1 ? 1024U : 2048U); 3608 dd->num_pports > 1 ? 1024U : 2048U);
3667} 3609}
3668 3610
@@ -4041,9 +3983,8 @@ static int qib_7322_set_loopback(struct qib_pportdata *ppd, const char *what)
4041 Loopback); 3983 Loopback);
4042 /* enable heart beat again */ 3984 /* enable heart beat again */
4043 val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB; 3985 val = IBA7322_IBC_HRTBT_RMASK << IBA7322_IBC_HRTBT_LSB;
4044 qib_devinfo(ppd->dd->pcidev, 3986 qib_devinfo(ppd->dd->pcidev, "Disabling IB%u:%u IBC loopback "
4045 "Disabling IB%u:%u IBC loopback (normal)\n", 3987 "(normal)\n", ppd->dd->unit, ppd->port);
4046 ppd->dd->unit, ppd->port);
4047 } else 3988 } else
4048 ret = -EINVAL; 3989 ret = -EINVAL;
4049 if (!ret) { 3990 if (!ret) {
@@ -4727,8 +4668,8 @@ static void init_7322_cntrnames(struct qib_devdata *dd)
4727 dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs 4668 dd->pport[i].cpspec->portcntrs = kmalloc(dd->cspec->nportcntrs
4728 * sizeof(u64), GFP_KERNEL); 4669 * sizeof(u64), GFP_KERNEL);
4729 if (!dd->pport[i].cpspec->portcntrs) 4670 if (!dd->pport[i].cpspec->portcntrs)
4730 qib_dev_err(dd, 4671 qib_dev_err(dd, "Failed allocation for"
4731 "Failed allocation for portcounters\n"); 4672 " portcounters\n");
4732 } 4673 }
4733} 4674}
4734 4675
@@ -4857,7 +4798,7 @@ static void qib_get_7322_faststats(unsigned long opaque)
4857 (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | 4798 (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED |
4858 QIBL_LINKACTIVE)) && 4799 QIBL_LINKACTIVE)) &&
4859 ppd->cpspec->qdr_dfe_time && 4800 ppd->cpspec->qdr_dfe_time &&
4860 time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) { 4801 time_after64(get_jiffies_64(), ppd->cpspec->qdr_dfe_time)) {
4861 ppd->cpspec->qdr_dfe_on = 0; 4802 ppd->cpspec->qdr_dfe_on = 0;
4862 4803
4863 qib_write_kreg_port(ppd, krp_static_adapt_dis(2), 4804 qib_write_kreg_port(ppd, krp_static_adapt_dis(2),
@@ -4878,8 +4819,8 @@ static int qib_7322_intr_fallback(struct qib_devdata *dd)
4878 if (!dd->cspec->num_msix_entries) 4819 if (!dd->cspec->num_msix_entries)
4879 return 0; /* already using INTx */ 4820 return 0; /* already using INTx */
4880 4821
4881 qib_devinfo(dd->pcidev, 4822 qib_devinfo(dd->pcidev, "MSIx interrupt not detected,"
4882 "MSIx interrupt not detected, trying INTx interrupts\n"); 4823 " trying INTx interrupts\n");
4883 qib_7322_nomsix(dd); 4824 qib_7322_nomsix(dd);
4884 qib_enable_intx(dd->pcidev); 4825 qib_enable_intx(dd->pcidev);
4885 qib_setup_7322_interrupt(dd, 0); 4826 qib_setup_7322_interrupt(dd, 0);
@@ -5164,11 +5105,15 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
5164 goto retry; 5105 goto retry;
5165 5106
5166 if (!ibp->smi_ah) { 5107 if (!ibp->smi_ah) {
5108 struct ib_ah_attr attr;
5167 struct ib_ah *ah; 5109 struct ib_ah *ah;
5168 5110
5169 ah = qib_create_qp0_ah(ibp, be16_to_cpu(IB_LID_PERMISSIVE)); 5111 memset(&attr, 0, sizeof attr);
5112 attr.dlid = be16_to_cpu(IB_LID_PERMISSIVE);
5113 attr.port_num = ppd->port;
5114 ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
5170 if (IS_ERR(ah)) 5115 if (IS_ERR(ah))
5171 ret = PTR_ERR(ah); 5116 ret = -EINVAL;
5172 else { 5117 else {
5173 send_buf->ah = ah; 5118 send_buf->ah = ah;
5174 ibp->smi_ah = to_iah(ah); 5119 ibp->smi_ah = to_iah(ah);
@@ -5287,8 +5232,6 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
5287 QIBL_IB_AUTONEG_INPROG))) 5232 QIBL_IB_AUTONEG_INPROG)))
5288 set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled); 5233 set_7322_ibspeed_fast(ppd, ppd->link_speed_enabled);
5289 if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) { 5234 if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG)) {
5290 struct qib_qsfp_data *qd =
5291 &ppd->cpspec->qsfp_data;
5292 /* unlock the Tx settings, speed may change */ 5235 /* unlock the Tx settings, speed may change */
5293 qib_write_kreg_port(ppd, krp_tx_deemph_override, 5236 qib_write_kreg_port(ppd, krp_tx_deemph_override,
5294 SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0, 5237 SYM_MASK(IBSD_TX_DEEMPHASIS_OVERRIDE_0,
@@ -5296,12 +5239,6 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs)
5296 qib_cancel_sends(ppd); 5239 qib_cancel_sends(ppd);
5297 /* on link down, ensure sane pcs state */ 5240 /* on link down, ensure sane pcs state */
5298 qib_7322_mini_pcs_reset(ppd); 5241 qib_7322_mini_pcs_reset(ppd);
5299 /* schedule the qsfp refresh which should turn the link
5300 off */
5301 if (ppd->dd->flags & QIB_HAS_QSFP) {
5302 qd->t_insert = jiffies;
5303 queue_work(ib_wq, &qd->work);
5304 }
5305 spin_lock_irqsave(&ppd->sdma_lock, flags); 5242 spin_lock_irqsave(&ppd->sdma_lock, flags);
5306 if (__qib_sdma_running(ppd)) 5243 if (__qib_sdma_running(ppd))
5307 __qib_sdma_process_event(ppd, 5244 __qib_sdma_process_event(ppd,
@@ -5651,79 +5588,44 @@ static void qsfp_7322_event(struct work_struct *work)
5651{ 5588{
5652 struct qib_qsfp_data *qd; 5589 struct qib_qsfp_data *qd;
5653 struct qib_pportdata *ppd; 5590 struct qib_pportdata *ppd;
5654 unsigned long pwrup; 5591 u64 pwrup;
5655 unsigned long flags;
5656 int ret; 5592 int ret;
5657 u32 le2; 5593 u32 le2;
5658 5594
5659 qd = container_of(work, struct qib_qsfp_data, work); 5595 qd = container_of(work, struct qib_qsfp_data, work);
5660 ppd = qd->ppd; 5596 ppd = qd->ppd;
5661 pwrup = qd->t_insert + 5597 pwrup = qd->t_insert + msecs_to_jiffies(QSFP_PWR_LAG_MSEC);
5662 msecs_to_jiffies(QSFP_PWR_LAG_MSEC - QSFP_MODPRS_LAG_MSEC);
5663 5598
5664 /* Delay for 20 msecs to allow ModPrs resistor to setup */ 5599 /*
5665 mdelay(QSFP_MODPRS_LAG_MSEC); 5600 * Some QSFP's not only do not respond until the full power-up
5666 5601 * time, but may behave badly if we try. So hold off responding
5667 if (!qib_qsfp_mod_present(ppd)) { 5602 * to insertion.
5668 ppd->cpspec->qsfp_data.modpresent = 0; 5603 */
5669 /* Set the physical link to disabled */ 5604 while (1) {
5670 qib_set_ib_7322_lstate(ppd, 0, 5605 u64 now = get_jiffies_64();
5671 QLOGIC_IB_IBCC_LINKINITCMD_DISABLE); 5606 if (time_after64(now, pwrup))
5672 spin_lock_irqsave(&ppd->lflags_lock, flags); 5607 break;
5673 ppd->lflags &= ~QIBL_LINKV; 5608 msleep(20);
5674 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
5675 } else {
5676 /*
5677 * Some QSFP's not only do not respond until the full power-up
5678 * time, but may behave badly if we try. So hold off responding
5679 * to insertion.
5680 */
5681 while (1) {
5682 if (time_is_before_jiffies(pwrup))
5683 break;
5684 msleep(20);
5685 }
5686
5687 ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
5688
5689 /*
5690 * Need to change LE2 back to defaults if we couldn't
5691 * read the cable type (to handle cable swaps), so do this
5692 * even on failure to read cable information. We don't
5693 * get here for QME, so IS_QME check not needed here.
5694 */
5695 if (!ret && !ppd->dd->cspec->r1) {
5696 if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
5697 le2 = LE2_QME;
5698 else if (qd->cache.atten[1] >= qib_long_atten &&
5699 QSFP_IS_CU(qd->cache.tech))
5700 le2 = LE2_5m;
5701 else
5702 le2 = LE2_DEFAULT;
5703 } else
5704 le2 = LE2_DEFAULT;
5705 ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
5706 /*
5707 * We always change parameteters, since we can choose
5708 * values for cables without eeproms, and the cable may have
5709 * changed from a cable with full or partial eeprom content
5710 * to one with partial or no content.
5711 */
5712 init_txdds_table(ppd, 0);
5713 /* The physical link is being re-enabled only when the
5714 * previous state was DISABLED and the VALID bit is not
5715 * set. This should only happen when the cable has been
5716 * physically pulled. */
5717 if (!ppd->cpspec->qsfp_data.modpresent &&
5718 (ppd->lflags & (QIBL_LINKV | QIBL_IB_LINK_DISABLED))) {
5719 ppd->cpspec->qsfp_data.modpresent = 1;
5720 qib_set_ib_7322_lstate(ppd, 0,
5721 QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
5722 spin_lock_irqsave(&ppd->lflags_lock, flags);
5723 ppd->lflags |= QIBL_LINKV;
5724 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
5725 }
5726 } 5609 }
5610 ret = qib_refresh_qsfp_cache(ppd, &qd->cache);
5611 /*
5612 * Need to change LE2 back to defaults if we couldn't
5613 * read the cable type (to handle cable swaps), so do this
5614 * even on failure to read cable information. We don't
5615 * get here for QME, so IS_QME check not needed here.
5616 */
5617 if (!ret && !ppd->dd->cspec->r1) {
5618 if (QSFP_IS_ACTIVE_FAR(qd->cache.tech))
5619 le2 = LE2_QME;
5620 else if (qd->cache.atten[1] >= qib_long_atten &&
5621 QSFP_IS_CU(qd->cache.tech))
5622 le2 = LE2_5m;
5623 else
5624 le2 = LE2_DEFAULT;
5625 } else
5626 le2 = LE2_DEFAULT;
5627 ibsd_wr_allchans(ppd, 13, (le2 << 7), BMASK(9, 7));
5628 init_txdds_table(ppd, 0);
5727} 5629}
5728 5630
5729/* 5631/*
@@ -5827,8 +5729,7 @@ static void set_no_qsfp_atten(struct qib_devdata *dd, int change)
5827 /* now change the IBC and serdes, overriding generic */ 5729 /* now change the IBC and serdes, overriding generic */
5828 init_txdds_table(ppd, 1); 5730 init_txdds_table(ppd, 1);
5829 /* Re-enable the physical state machine on mezz boards 5731 /* Re-enable the physical state machine on mezz boards
5830 * now that the correct settings have been set. 5732 * now that the correct settings have been set. */
5831 * QSFP boards are handles by the QSFP event handler */
5832 if (IS_QMH(dd) || IS_QME(dd)) 5733 if (IS_QMH(dd) || IS_QME(dd))
5833 qib_set_ib_7322_lstate(ppd, 0, 5734 qib_set_ib_7322_lstate(ppd, 0,
5834 QLOGIC_IB_IBCC_LINKINITCMD_SLEEP); 5735 QLOGIC_IB_IBCC_LINKINITCMD_SLEEP);
@@ -5853,21 +5754,22 @@ static int setup_txselect(const char *str, struct kernel_param *kp)
5853{ 5754{
5854 struct qib_devdata *dd; 5755 struct qib_devdata *dd;
5855 unsigned long val; 5756 unsigned long val;
5856 int ret; 5757 char *n;
5857
5858 if (strlen(str) >= MAX_ATTEN_LEN) { 5758 if (strlen(str) >= MAX_ATTEN_LEN) {
5859 pr_info("txselect_values string too long\n"); 5759 printk(KERN_INFO QIB_DRV_NAME " txselect_values string "
5760 "too long\n");
5860 return -ENOSPC; 5761 return -ENOSPC;
5861 } 5762 }
5862 ret = kstrtoul(str, 0, &val); 5763 val = simple_strtoul(str, &n, 0);
5863 if (ret || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + 5764 if (n == str || val >= (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
5864 TXDDS_MFG_SZ)) { 5765 TXDDS_MFG_SZ)) {
5865 pr_info("txselect_values must start with a number < %d\n", 5766 printk(KERN_INFO QIB_DRV_NAME
5767 "txselect_values must start with a number < %d\n",
5866 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ); 5768 TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + TXDDS_MFG_SZ);
5867 return ret ? ret : -EINVAL; 5769 return -EINVAL;
5868 } 5770 }
5869
5870 strcpy(txselect_list, str); 5771 strcpy(txselect_list, str);
5772
5871 list_for_each_entry(dd, &qib_dev_list, list) 5773 list_for_each_entry(dd, &qib_dev_list, list)
5872 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322) 5774 if (dd->deviceid == PCI_DEVICE_ID_QLOGIC_IB_7322)
5873 set_no_qsfp_atten(dd, 1); 5775 set_no_qsfp_atten(dd, 1);
@@ -5890,10 +5792,11 @@ static int qib_late_7322_initreg(struct qib_devdata *dd)
5890 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys); 5792 qib_write_kreg(dd, kr_sendpioavailaddr, dd->pioavailregs_phys);
5891 val = qib_read_kreg64(dd, kr_sendpioavailaddr); 5793 val = qib_read_kreg64(dd, kr_sendpioavailaddr);
5892 if (val != dd->pioavailregs_phys) { 5794 if (val != dd->pioavailregs_phys) {
5893 qib_dev_err(dd, 5795 qib_dev_err(dd, "Catastrophic software error, "
5894 "Catastrophic software error, SendPIOAvailAddr written as %lx, read back as %llx\n", 5796 "SendPIOAvailAddr written as %lx, "
5895 (unsigned long) dd->pioavailregs_phys, 5797 "read back as %llx\n",
5896 (unsigned long long) val); 5798 (unsigned long) dd->pioavailregs_phys,
5799 (unsigned long long) val);
5897 ret = -EINVAL; 5800 ret = -EINVAL;
5898 } 5801 }
5899 5802
@@ -6105,8 +6008,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
6105 dd->revision = readq(&dd->kregbase[kr_revision]); 6008 dd->revision = readq(&dd->kregbase[kr_revision]);
6106 6009
6107 if ((dd->revision & 0xffffffffU) == 0xffffffffU) { 6010 if ((dd->revision & 0xffffffffU) == 0xffffffffU) {
6108 qib_dev_err(dd, 6011 qib_dev_err(dd, "Revision register read failure, "
6109 "Revision register read failure, giving up initialization\n"); 6012 "giving up initialization\n");
6110 ret = -ENODEV; 6013 ret = -ENODEV;
6111 goto bail; 6014 goto bail;
6112 } 6015 }
@@ -6272,9 +6175,9 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
6272 */ 6175 */
6273 if (!(dd->flags & QIB_HAS_QSFP)) { 6176 if (!(dd->flags & QIB_HAS_QSFP)) {
6274 if (!IS_QMH(dd) && !IS_QME(dd)) 6177 if (!IS_QMH(dd) && !IS_QME(dd))
6275 qib_devinfo(dd->pcidev, 6178 qib_devinfo(dd->pcidev, "IB%u:%u: "
6276 "IB%u:%u: Unknown mezzanine card type\n", 6179 "Unknown mezzanine card type\n",
6277 dd->unit, ppd->port); 6180 dd->unit, ppd->port);
6278 cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME; 6181 cp->h1_val = IS_QMH(dd) ? H1_FORCE_QMH : H1_FORCE_QME;
6279 /* 6182 /*
6280 * Choose center value as default tx serdes setting 6183 * Choose center value as default tx serdes setting
@@ -6304,8 +6207,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
6304 6207
6305 /* we always allocate at least 2048 bytes for eager buffers */ 6208 /* we always allocate at least 2048 bytes for eager buffers */
6306 dd->rcvegrbufsize = max(mtu, 2048); 6209 dd->rcvegrbufsize = max(mtu, 2048);
6307 BUG_ON(!is_power_of_2(dd->rcvegrbufsize));
6308 dd->rcvegrbufsize_shift = ilog2(dd->rcvegrbufsize);
6309 6210
6310 qib_7322_tidtemplate(dd); 6211 qib_7322_tidtemplate(dd);
6311 6212
@@ -6346,10 +6247,8 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
6346 dd->piobcnt4k * dd->align4k; 6247 dd->piobcnt4k * dd->align4k;
6347 dd->piovl15base = ioremap_nocache(vl15off, 6248 dd->piovl15base = ioremap_nocache(vl15off,
6348 NUM_VL15_BUFS * dd->align4k); 6249 NUM_VL15_BUFS * dd->align4k);
6349 if (!dd->piovl15base) { 6250 if (!dd->piovl15base)
6350 ret = -ENOMEM;
6351 goto bail; 6251 goto bail;
6352 }
6353 } 6252 }
6354 qib_7322_set_baseaddrs(dd); /* set chip access pointers now */ 6253 qib_7322_set_baseaddrs(dd); /* set chip access pointers now */
6355 6254
@@ -6388,7 +6287,6 @@ static int qib_init_7322_variables(struct qib_devdata *dd)
6388 dd->cspec->sdmabufcnt; 6287 dd->cspec->sdmabufcnt;
6389 dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs; 6288 dd->lastctxt_piobuf = dd->cspec->lastbuf_for_pio - sbufs;
6390 dd->cspec->lastbuf_for_pio--; /* range is <= , not < */ 6289 dd->cspec->lastbuf_for_pio--; /* range is <= , not < */
6391 dd->last_pio = dd->cspec->lastbuf_for_pio;
6392 dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ? 6290 dd->pbufsctxt = (dd->cfgctxts > dd->first_user_ctxt) ?
6393 dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0; 6291 dd->lastctxt_piobuf / (dd->cfgctxts - dd->first_user_ctxt) : 0;
6394 6292
@@ -6922,17 +6820,19 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev,
6922 6820
6923 tabsize = actual_cnt; 6821 tabsize = actual_cnt;
6924 dd->cspec->msix_entries = kmalloc(tabsize * 6822 dd->cspec->msix_entries = kmalloc(tabsize *
6925 sizeof(struct qib_msix_entry), GFP_KERNEL); 6823 sizeof(struct msix_entry), GFP_KERNEL);
6926 if (!dd->cspec->msix_entries) { 6824 dd->cspec->msix_arg = kmalloc(tabsize *
6825 sizeof(void *), GFP_KERNEL);
6826 if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) {
6927 qib_dev_err(dd, "No memory for MSIx table\n"); 6827 qib_dev_err(dd, "No memory for MSIx table\n");
6928 tabsize = 0; 6828 tabsize = 0;
6929 } 6829 }
6930 for (i = 0; i < tabsize; i++) 6830 for (i = 0; i < tabsize; i++)
6931 dd->cspec->msix_entries[i].msix.entry = i; 6831 dd->cspec->msix_entries[i].entry = i;
6932 6832
6933 if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) 6833 if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries))
6934 qib_dev_err(dd, 6834 qib_dev_err(dd, "Failed to setup PCIe or interrupts; "
6935 "Failed to setup PCIe or interrupts; continuing anyway\n"); 6835 "continuing anyway\n");
6936 /* may be less than we wanted, if not enough available */ 6836 /* may be less than we wanted, if not enough available */
6937 dd->cspec->num_msix_entries = tabsize; 6837 dd->cspec->num_msix_entries = tabsize;
6938 6838
@@ -7249,8 +7149,7 @@ static void find_best_ent(struct qib_pportdata *ppd,
7249 } 7149 }
7250 } 7150 }
7251 7151
7252 /* Active cables don't have attenuation so we only set SERDES 7152 /* Lookup serdes setting by cable type and attenuation */
7253 * settings to account for the attenuation of the board traces. */
7254 if (!override && QSFP_IS_ACTIVE(qd->tech)) { 7153 if (!override && QSFP_IS_ACTIVE(qd->tech)) {
7255 *sdr_dds = txdds_sdr + ppd->dd->board_atten; 7154 *sdr_dds = txdds_sdr + ppd->dd->board_atten;
7256 *ddr_dds = txdds_ddr + ppd->dd->board_atten; 7155 *ddr_dds = txdds_ddr + ppd->dd->board_atten;
@@ -7285,7 +7184,8 @@ static void find_best_ent(struct qib_pportdata *ppd,
7285 ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ + 7184 ppd->cpspec->no_eep < (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ +
7286 TXDDS_MFG_SZ)) { 7185 TXDDS_MFG_SZ)) {
7287 idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ); 7186 idx = ppd->cpspec->no_eep - (TXDDS_TABLE_SZ + TXDDS_EXTRA_SZ);
7288 pr_info("IB%u:%u use idx %u into txdds_mfg\n", 7187 printk(KERN_INFO QIB_DRV_NAME
7188 " IB%u:%u use idx %u into txdds_mfg\n",
7289 ppd->dd->unit, ppd->port, idx); 7189 ppd->dd->unit, ppd->port, idx);
7290 *sdr_dds = &txdds_extra_mfg[idx]; 7190 *sdr_dds = &txdds_extra_mfg[idx];
7291 *ddr_dds = &txdds_extra_mfg[idx]; 7191 *ddr_dds = &txdds_extra_mfg[idx];
@@ -7440,11 +7340,11 @@ static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
7440 u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN); 7340 u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN);
7441 7341
7442 if (enable && !state) { 7342 if (enable && !state) {
7443 pr_info("IB%u:%u Turning LOS on\n", 7343 printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n",
7444 ppd->dd->unit, ppd->port); 7344 ppd->dd->unit, ppd->port);
7445 data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN); 7345 data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
7446 } else if (!enable && state) { 7346 } else if (!enable && state) {
7447 pr_info("IB%u:%u Turning LOS off\n", 7347 printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n",
7448 ppd->dd->unit, ppd->port); 7348 ppd->dd->unit, ppd->port);
7449 data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN); 7349 data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
7450 } 7350 }
@@ -7562,10 +7462,16 @@ static int serdes_7322_init_old(struct qib_pportdata *ppd)
7562 7462
7563static int serdes_7322_init_new(struct qib_pportdata *ppd) 7463static int serdes_7322_init_new(struct qib_pportdata *ppd)
7564{ 7464{
7565 unsigned long tend; 7465 u64 tstart;
7566 u32 le_val, rxcaldone; 7466 u32 le_val, rxcaldone;
7567 int chan, chan_done = (1 << SERDES_CHANS) - 1; 7467 int chan, chan_done = (1 << SERDES_CHANS) - 1;
7568 7468
7469 /*
7470 * Initialize the Tx DDS tables. Also done every QSFP event,
7471 * for adapters with QSFP
7472 */
7473 init_txdds_table(ppd, 0);
7474
7569 /* Clear cmode-override, may be set from older driver */ 7475 /* Clear cmode-override, may be set from older driver */
7570 ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14); 7476 ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 5, 10, 0 << 14, 1 << 14);
7571 7477
@@ -7667,8 +7573,10 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
7667 msleep(20); 7573 msleep(20);
7668 /* Start Calibration */ 7574 /* Start Calibration */
7669 ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10)); 7575 ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10));
7670 tend = jiffies + msecs_to_jiffies(500); 7576 tstart = get_jiffies_64();
7671 while (chan_done && !time_is_before_jiffies(tend)) { 7577 while (chan_done &&
7578 !time_after64(get_jiffies_64(),
7579 tstart + msecs_to_jiffies(500))) {
7672 msleep(20); 7580 msleep(20);
7673 for (chan = 0; chan < SERDES_CHANS; ++chan) { 7581 for (chan = 0; chan < SERDES_CHANS; ++chan) {
7674 rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), 7582 rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx),
@@ -7680,7 +7588,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
7680 } 7588 }
7681 } 7589 }
7682 if (chan_done) { 7590 if (chan_done) {
7683 pr_info("Serdes %d calibration not done after .5 sec: 0x%x\n", 7591 printk(KERN_INFO QIB_DRV_NAME
7592 " Serdes %d calibration not done after .5 sec: 0x%x\n",
7684 IBSD(ppd->hw_pidx), chan_done); 7593 IBSD(ppd->hw_pidx), chan_done);
7685 } else { 7594 } else {
7686 for (chan = 0; chan < SERDES_CHANS; ++chan) { 7595 for (chan = 0; chan < SERDES_CHANS; ++chan) {
@@ -7688,8 +7597,9 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
7688 (chan + (chan >> 1)), 7597 (chan + (chan >> 1)),
7689 25, 0, 0); 7598 25, 0, 0);
7690 if ((~rxcaldone & (u32)BMASK(10, 10)) == 0) 7599 if ((~rxcaldone & (u32)BMASK(10, 10)) == 0)
7691 pr_info("Serdes %d chan %d calibration failed\n", 7600 printk(KERN_INFO QIB_DRV_NAME
7692 IBSD(ppd->hw_pidx), chan); 7601 " Serdes %d chan %d calibration "
7602 "failed\n", IBSD(ppd->hw_pidx), chan);
7693 } 7603 }
7694 } 7604 }
7695 7605
@@ -7715,7 +7625,7 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
7715 ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0)); 7625 ibsd_wr_allchans(ppd, 5, 0, BMASK(0, 0));
7716 msleep(20); 7626 msleep(20);
7717 /* Set Frequency Loop Bandwidth */ 7627 /* Set Frequency Loop Bandwidth */
7718 ibsd_wr_allchans(ppd, 2, (15 << 5), BMASK(8, 5)); 7628 ibsd_wr_allchans(ppd, 2, (7 << 5), BMASK(8, 5));
7719 /* Enable Frequency Loop */ 7629 /* Enable Frequency Loop */
7720 ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4)); 7630 ibsd_wr_allchans(ppd, 2, (1 << 4), BMASK(4, 4));
7721 /* Set Timing Loop Bandwidth */ 7631 /* Set Timing Loop Bandwidth */
@@ -7747,12 +7657,6 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd)
7747 /* VGA output common mode */ 7657 /* VGA output common mode */
7748 ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2)); 7658 ibsd_wr_allchans(ppd, 12, (3 << 2), BMASK(3, 2));
7749 7659
7750 /*
7751 * Initialize the Tx DDS tables. Also done every QSFP event,
7752 * for adapters with QSFP
7753 */
7754 init_txdds_table(ppd, 0);
7755
7756 return 0; 7660 return 0;
7757} 7661}
7758 7662
diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c
index ddf066d9abb..a01f3fce8eb 100644
--- a/drivers/infiniband/hw/qib/qib_init.c
+++ b/drivers/infiniband/hw/qib/qib_init.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -37,15 +37,9 @@
37#include <linux/vmalloc.h> 37#include <linux/vmalloc.h>
38#include <linux/delay.h> 38#include <linux/delay.h>
39#include <linux/idr.h> 39#include <linux/idr.h>
40#include <linux/module.h>
41#include <linux/printk.h>
42 40
43#include "qib.h" 41#include "qib.h"
44#include "qib_common.h" 42#include "qib_common.h"
45#include "qib_mad.h"
46
47#undef pr_fmt
48#define pr_fmt(fmt) QIB_DRV_NAME ": " fmt
49 43
50/* 44/*
51 * min buffers we want to have per context, after driver 45 * min buffers we want to have per context, after driver
@@ -76,9 +70,6 @@ unsigned qib_n_krcv_queues;
76module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO); 70module_param_named(krcvqs, qib_n_krcv_queues, uint, S_IRUGO);
77MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port"); 71MODULE_PARM_DESC(krcvqs, "number of kernel receive queues per IB port");
78 72
79unsigned qib_cc_table_size;
80module_param_named(cc_table_size, qib_cc_table_size, uint, S_IRUGO);
81MODULE_PARM_DESC(cc_table_size, "Congestion control table entries 0 (CCA disabled - default), min = 128, max = 1984");
82/* 73/*
83 * qib_wc_pat parameter: 74 * qib_wc_pat parameter:
84 * 0 is WC via MTRR 75 * 0 is WC via MTRR
@@ -110,8 +101,6 @@ void qib_set_ctxtcnt(struct qib_devdata *dd)
110 dd->cfgctxts = qib_cfgctxts; 101 dd->cfgctxts = qib_cfgctxts;
111 else 102 else
112 dd->cfgctxts = dd->ctxtcnt; 103 dd->cfgctxts = dd->ctxtcnt;
113 dd->freectxts = (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
114 dd->cfgctxts - dd->first_user_ctxt;
115} 104}
116 105
117/* 106/*
@@ -128,8 +117,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
128 */ 117 */
129 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL); 118 dd->rcd = kzalloc(sizeof(*dd->rcd) * dd->ctxtcnt, GFP_KERNEL);
130 if (!dd->rcd) { 119 if (!dd->rcd) {
131 qib_dev_err(dd, 120 qib_dev_err(dd, "Unable to allocate ctxtdata array, "
132 "Unable to allocate ctxtdata array, failing\n"); 121 "failing\n");
133 ret = -ENOMEM; 122 ret = -ENOMEM;
134 goto done; 123 goto done;
135 } 124 }
@@ -145,8 +134,8 @@ int qib_create_ctxts(struct qib_devdata *dd)
145 ppd = dd->pport + (i % dd->num_pports); 134 ppd = dd->pport + (i % dd->num_pports);
146 rcd = qib_create_ctxtdata(ppd, i); 135 rcd = qib_create_ctxtdata(ppd, i);
147 if (!rcd) { 136 if (!rcd) {
148 qib_dev_err(dd, 137 qib_dev_err(dd, "Unable to allocate ctxtdata"
149 "Unable to allocate ctxtdata for Kernel ctxt, failing\n"); 138 " for Kernel ctxt, failing\n");
150 ret = -ENOMEM; 139 ret = -ENOMEM;
151 goto done; 140 goto done;
152 } 141 }
@@ -194,9 +183,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
194 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt + 183 rcd->rcvegrbuf_chunks = (rcd->rcvegrcnt +
195 rcd->rcvegrbufs_perchunk - 1) / 184 rcd->rcvegrbufs_perchunk - 1) /
196 rcd->rcvegrbufs_perchunk; 185 rcd->rcvegrbufs_perchunk;
197 BUG_ON(!is_power_of_2(rcd->rcvegrbufs_perchunk));
198 rcd->rcvegrbufs_perchunk_shift =
199 ilog2(rcd->rcvegrbufs_perchunk);
200 } 186 }
201 return rcd; 187 return rcd;
202} 188}
@@ -207,7 +193,6 @@ struct qib_ctxtdata *qib_create_ctxtdata(struct qib_pportdata *ppd, u32 ctxt)
207void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd, 193void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
208 u8 hw_pidx, u8 port) 194 u8 hw_pidx, u8 port)
209{ 195{
210 int size;
211 ppd->dd = dd; 196 ppd->dd = dd;
212 ppd->hw_pidx = hw_pidx; 197 ppd->hw_pidx = hw_pidx;
213 ppd->port = port; /* IB port number, not index */ 198 ppd->port = port; /* IB port number, not index */
@@ -219,83 +204,6 @@ void qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
219 init_timer(&ppd->symerr_clear_timer); 204 init_timer(&ppd->symerr_clear_timer);
220 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup; 205 ppd->symerr_clear_timer.function = qib_clear_symerror_on_linkup;
221 ppd->symerr_clear_timer.data = (unsigned long)ppd; 206 ppd->symerr_clear_timer.data = (unsigned long)ppd;
222
223 ppd->qib_wq = NULL;
224
225 spin_lock_init(&ppd->cc_shadow_lock);
226
227 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
228 goto bail;
229
230 ppd->cc_supported_table_entries = min(max_t(int, qib_cc_table_size,
231 IB_CCT_MIN_ENTRIES), IB_CCT_ENTRIES*IB_CC_TABLE_CAP_DEFAULT);
232
233 ppd->cc_max_table_entries =
234 ppd->cc_supported_table_entries/IB_CCT_ENTRIES;
235
236 size = IB_CC_TABLE_CAP_DEFAULT * sizeof(struct ib_cc_table_entry)
237 * IB_CCT_ENTRIES;
238 ppd->ccti_entries = kzalloc(size, GFP_KERNEL);
239 if (!ppd->ccti_entries) {
240 qib_dev_err(dd,
241 "failed to allocate congestion control table for port %d!\n",
242 port);
243 goto bail;
244 }
245
246 size = IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry);
247 ppd->congestion_entries = kzalloc(size, GFP_KERNEL);
248 if (!ppd->congestion_entries) {
249 qib_dev_err(dd,
250 "failed to allocate congestion setting list for port %d!\n",
251 port);
252 goto bail_1;
253 }
254
255 size = sizeof(struct cc_table_shadow);
256 ppd->ccti_entries_shadow = kzalloc(size, GFP_KERNEL);
257 if (!ppd->ccti_entries_shadow) {
258 qib_dev_err(dd,
259 "failed to allocate shadow ccti list for port %d!\n",
260 port);
261 goto bail_2;
262 }
263
264 size = sizeof(struct ib_cc_congestion_setting_attr);
265 ppd->congestion_entries_shadow = kzalloc(size, GFP_KERNEL);
266 if (!ppd->congestion_entries_shadow) {
267 qib_dev_err(dd,
268 "failed to allocate shadow congestion setting list for port %d!\n",
269 port);
270 goto bail_3;
271 }
272
273 return;
274
275bail_3:
276 kfree(ppd->ccti_entries_shadow);
277 ppd->ccti_entries_shadow = NULL;
278bail_2:
279 kfree(ppd->congestion_entries);
280 ppd->congestion_entries = NULL;
281bail_1:
282 kfree(ppd->ccti_entries);
283 ppd->ccti_entries = NULL;
284bail:
285 /* User is intentionally disabling the congestion control agent */
286 if (!qib_cc_table_size)
287 return;
288
289 if (qib_cc_table_size < IB_CCT_MIN_ENTRIES) {
290 qib_cc_table_size = 0;
291 qib_dev_err(dd,
292 "Congestion Control table size %d less than minimum %d for port %d\n",
293 qib_cc_table_size, IB_CCT_MIN_ENTRIES, port);
294 }
295
296 qib_dev_err(dd, "Congestion Control Agent disabled for port %d\n",
297 port);
298 return;
299} 207}
300 208
301static int init_pioavailregs(struct qib_devdata *dd) 209static int init_pioavailregs(struct qib_devdata *dd)
@@ -307,8 +215,8 @@ static int init_pioavailregs(struct qib_devdata *dd)
307 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys, 215 &dd->pcidev->dev, PAGE_SIZE, &dd->pioavailregs_phys,
308 GFP_KERNEL); 216 GFP_KERNEL);
309 if (!dd->pioavailregs_dma) { 217 if (!dd->pioavailregs_dma) {
310 qib_dev_err(dd, 218 qib_dev_err(dd, "failed to allocate PIOavail reg area "
311 "failed to allocate PIOavail reg area in memory\n"); 219 "in memory\n");
312 ret = -ENOMEM; 220 ret = -ENOMEM;
313 goto done; 221 goto done;
314 } 222 }
@@ -363,15 +271,15 @@ static void init_shadow_tids(struct qib_devdata *dd)
363 271
364 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *)); 272 pages = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(struct page *));
365 if (!pages) { 273 if (!pages) {
366 qib_dev_err(dd, 274 qib_dev_err(dd, "failed to allocate shadow page * "
367 "failed to allocate shadow page * array, no expected sends!\n"); 275 "array, no expected sends!\n");
368 goto bail; 276 goto bail;
369 } 277 }
370 278
371 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t)); 279 addrs = vzalloc(dd->cfgctxts * dd->rcvtidcnt * sizeof(dma_addr_t));
372 if (!addrs) { 280 if (!addrs) {
373 qib_dev_err(dd, 281 qib_dev_err(dd, "failed to allocate shadow dma handle "
374 "failed to allocate shadow dma handle array, no expected sends!\n"); 282 "array, no expected sends!\n");
375 goto bail_free; 283 goto bail_free;
376 } 284 }
377 285
@@ -395,13 +303,13 @@ static int loadtime_init(struct qib_devdata *dd)
395 303
396 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) & 304 if (((dd->revision >> QLOGIC_IB_R_SOFTWARE_SHIFT) &
397 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) { 305 QLOGIC_IB_R_SOFTWARE_MASK) != QIB_CHIP_SWVERSION) {
398 qib_dev_err(dd, 306 qib_dev_err(dd, "Driver only handles version %d, "
399 "Driver only handles version %d, chip swversion is %d (%llx), failng\n", 307 "chip swversion is %d (%llx), failng\n",
400 QIB_CHIP_SWVERSION, 308 QIB_CHIP_SWVERSION,
401 (int)(dd->revision >> 309 (int)(dd->revision >>
402 QLOGIC_IB_R_SOFTWARE_SHIFT) & 310 QLOGIC_IB_R_SOFTWARE_SHIFT) &
403 QLOGIC_IB_R_SOFTWARE_MASK, 311 QLOGIC_IB_R_SOFTWARE_MASK,
404 (unsigned long long) dd->revision); 312 (unsigned long long) dd->revision);
405 ret = -ENOSYS; 313 ret = -ENOSYS;
406 goto done; 314 goto done;
407 } 315 }
@@ -505,8 +413,8 @@ static void verify_interrupt(unsigned long opaque)
505 */ 413 */
506 if (dd->int_counter == 0) { 414 if (dd->int_counter == 0) {
507 if (!dd->f_intr_fallback(dd)) 415 if (!dd->f_intr_fallback(dd))
508 dev_err(&dd->pcidev->dev, 416 dev_err(&dd->pcidev->dev, "No interrupts detected, "
509 "No interrupts detected, not usable.\n"); 417 "not usable.\n");
510 else /* re-arm the timer to see if fallback works */ 418 else /* re-arm the timer to see if fallback works */
511 mod_timer(&dd->intrchk_timer, jiffies + HZ/2); 419 mod_timer(&dd->intrchk_timer, jiffies + HZ/2);
512 } 420 }
@@ -569,41 +477,6 @@ static void init_piobuf_state(struct qib_devdata *dd)
569} 477}
570 478
571/** 479/**
572 * qib_create_workqueues - create per port workqueues
573 * @dd: the qlogic_ib device
574 */
575static int qib_create_workqueues(struct qib_devdata *dd)
576{
577 int pidx;
578 struct qib_pportdata *ppd;
579
580 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
581 ppd = dd->pport + pidx;
582 if (!ppd->qib_wq) {
583 char wq_name[8]; /* 3 + 2 + 1 + 1 + 1 */
584 snprintf(wq_name, sizeof(wq_name), "qib%d_%d",
585 dd->unit, pidx);
586 ppd->qib_wq =
587 create_singlethread_workqueue(wq_name);
588 if (!ppd->qib_wq)
589 goto wq_error;
590 }
591 }
592 return 0;
593wq_error:
594 pr_err("create_singlethread_workqueue failed for port %d\n",
595 pidx + 1);
596 for (pidx = 0; pidx < dd->num_pports; ++pidx) {
597 ppd = dd->pport + pidx;
598 if (ppd->qib_wq) {
599 destroy_workqueue(ppd->qib_wq);
600 ppd->qib_wq = NULL;
601 }
602 }
603 return -ENOMEM;
604}
605
606/**
607 * qib_init - do the actual initialization sequence on the chip 480 * qib_init - do the actual initialization sequence on the chip
608 * @dd: the qlogic_ib device 481 * @dd: the qlogic_ib device
609 * @reinit: reinitializing, so don't allocate new memory 482 * @reinit: reinitializing, so don't allocate new memory
@@ -668,8 +541,8 @@ int qib_init(struct qib_devdata *dd, int reinit)
668 if (!lastfail) 541 if (!lastfail)
669 lastfail = qib_setup_eagerbufs(rcd); 542 lastfail = qib_setup_eagerbufs(rcd);
670 if (lastfail) { 543 if (lastfail) {
671 qib_dev_err(dd, 544 qib_dev_err(dd, "failed to allocate kernel ctxt's "
672 "failed to allocate kernel ctxt's rcvhdrq and/or egr bufs\n"); 545 "rcvhdrq and/or egr bufs\n");
673 continue; 546 continue;
674 } 547 }
675 } 548 }
@@ -708,6 +581,10 @@ int qib_init(struct qib_devdata *dd, int reinit)
708 continue; 581 continue;
709 } 582 }
710 583
584 /* let link come up, and enable IBC */
585 spin_lock_irqsave(&ppd->lflags_lock, flags);
586 ppd->lflags &= ~QIBL_IB_LINK_DISABLED;
587 spin_unlock_irqrestore(&ppd->lflags_lock, flags);
711 portok++; 588 portok++;
712 } 589 }
713 590
@@ -885,11 +762,6 @@ static void qib_shutdown_device(struct qib_devdata *dd)
885 * We can't count on interrupts since we are stopping. 762 * We can't count on interrupts since we are stopping.
886 */ 763 */
887 dd->f_quiet_serdes(ppd); 764 dd->f_quiet_serdes(ppd);
888
889 if (ppd->qib_wq) {
890 destroy_workqueue(ppd->qib_wq);
891 ppd->qib_wq = NULL;
892 }
893 } 765 }
894 766
895 qib_update_eeprom_log(dd); 767 qib_update_eeprom_log(dd);
@@ -1019,7 +891,8 @@ static void qib_verify_pioperf(struct qib_devdata *dd)
1019 /* 1 GiB/sec, slightly over IB SDR line rate */ 891 /* 1 GiB/sec, slightly over IB SDR line rate */
1020 if (lcnt < (emsecs * 1024U)) 892 if (lcnt < (emsecs * 1024U))
1021 qib_dev_err(dd, 893 qib_dev_err(dd,
1022 "Performance problem: bandwidth to PIO buffers is only %u MiB/sec\n", 894 "Performance problem: bandwidth to PIO buffers is "
895 "only %u MiB/sec\n",
1023 lcnt / (u32) emsecs); 896 lcnt / (u32) emsecs);
1024 897
1025 preempt_enable(); 898 preempt_enable();
@@ -1092,8 +965,8 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
1092 if (qib_cpulist) 965 if (qib_cpulist)
1093 qib_cpulist_count = count; 966 qib_cpulist_count = count;
1094 else 967 else
1095 qib_early_err(&pdev->dev, 968 qib_early_err(&pdev->dev, "Could not alloc cpulist "
1096 "Could not alloc cpulist info, cpu affinity might be wrong\n"); 969 "info, cpu affinity might be wrong\n");
1097 } 970 }
1098 971
1099bail: 972bail:
@@ -1134,13 +1007,14 @@ void qib_disable_after_error(struct qib_devdata *dd)
1134 *dd->devstatusp |= QIB_STATUS_HWERROR; 1007 *dd->devstatusp |= QIB_STATUS_HWERROR;
1135} 1008}
1136 1009
1137static void qib_remove_one(struct pci_dev *); 1010static void __devexit qib_remove_one(struct pci_dev *);
1138static int qib_init_one(struct pci_dev *, const struct pci_device_id *); 1011static int __devinit qib_init_one(struct pci_dev *,
1012 const struct pci_device_id *);
1139 1013
1140#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " 1014#define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: "
1141#define PFX QIB_DRV_NAME ": " 1015#define PFX QIB_DRV_NAME ": "
1142 1016
1143static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { 1017static const struct pci_device_id qib_pci_tbl[] = {
1144 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, 1018 { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) },
1145 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, 1019 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) },
1146 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, 1020 { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) },
@@ -1152,7 +1026,7 @@ MODULE_DEVICE_TABLE(pci, qib_pci_tbl);
1152struct pci_driver qib_driver = { 1026struct pci_driver qib_driver = {
1153 .name = QIB_DRV_NAME, 1027 .name = QIB_DRV_NAME,
1154 .probe = qib_init_one, 1028 .probe = qib_init_one,
1155 .remove = qib_remove_one, 1029 .remove = __devexit_p(qib_remove_one),
1156 .id_table = qib_pci_tbl, 1030 .id_table = qib_pci_tbl,
1157 .err_handler = &qib_pci_err_handler, 1031 .err_handler = &qib_pci_err_handler,
1158}; 1032};
@@ -1181,20 +1055,21 @@ static int __init qlogic_ib_init(void)
1181 */ 1055 */
1182 idr_init(&qib_unit_table); 1056 idr_init(&qib_unit_table);
1183 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) { 1057 if (!idr_pre_get(&qib_unit_table, GFP_KERNEL)) {
1184 pr_err("idr_pre_get() failed\n"); 1058 printk(KERN_ERR QIB_DRV_NAME ": idr_pre_get() failed\n");
1185 ret = -ENOMEM; 1059 ret = -ENOMEM;
1186 goto bail_cq_wq; 1060 goto bail_cq_wq;
1187 } 1061 }
1188 1062
1189 ret = pci_register_driver(&qib_driver); 1063 ret = pci_register_driver(&qib_driver);
1190 if (ret < 0) { 1064 if (ret < 0) {
1191 pr_err("Unable to register driver: error %d\n", -ret); 1065 printk(KERN_ERR QIB_DRV_NAME
1066 ": Unable to register driver: error %d\n", -ret);
1192 goto bail_unit; 1067 goto bail_unit;
1193 } 1068 }
1194 1069
1195 /* not fatal if it doesn't work */ 1070 /* not fatal if it doesn't work */
1196 if (qib_init_qibfs()) 1071 if (qib_init_qibfs())
1197 pr_err("Unable to register ipathfs\n"); 1072 printk(KERN_ERR QIB_DRV_NAME ": Unable to register ipathfs\n");
1198 goto bail; /* all OK */ 1073 goto bail; /* all OK */
1199 1074
1200bail_unit: 1075bail_unit:
@@ -1218,9 +1093,9 @@ static void __exit qlogic_ib_cleanup(void)
1218 1093
1219 ret = qib_exit_qibfs(); 1094 ret = qib_exit_qibfs();
1220 if (ret) 1095 if (ret)
1221 pr_err( 1096 printk(KERN_ERR QIB_DRV_NAME ": "
1222 "Unable to cleanup counter filesystem: error %d\n", 1097 "Unable to cleanup counter filesystem: "
1223 -ret); 1098 "error %d\n", -ret);
1224 1099
1225 pci_unregister_driver(&qib_driver); 1100 pci_unregister_driver(&qib_driver);
1226 1101
@@ -1244,24 +1119,10 @@ static void cleanup_device_data(struct qib_devdata *dd)
1244 unsigned long flags; 1119 unsigned long flags;
1245 1120
1246 /* users can't do anything more with chip */ 1121 /* users can't do anything more with chip */
1247 for (pidx = 0; pidx < dd->num_pports; ++pidx) { 1122 for (pidx = 0; pidx < dd->num_pports; ++pidx)
1248 if (dd->pport[pidx].statusp) 1123 if (dd->pport[pidx].statusp)
1249 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT; 1124 *dd->pport[pidx].statusp &= ~QIB_STATUS_CHIP_PRESENT;
1250 1125
1251 spin_lock(&dd->pport[pidx].cc_shadow_lock);
1252
1253 kfree(dd->pport[pidx].congestion_entries);
1254 dd->pport[pidx].congestion_entries = NULL;
1255 kfree(dd->pport[pidx].ccti_entries);
1256 dd->pport[pidx].ccti_entries = NULL;
1257 kfree(dd->pport[pidx].ccti_entries_shadow);
1258 dd->pport[pidx].ccti_entries_shadow = NULL;
1259 kfree(dd->pport[pidx].congestion_entries_shadow);
1260 dd->pport[pidx].congestion_entries_shadow = NULL;
1261
1262 spin_unlock(&dd->pport[pidx].cc_shadow_lock);
1263 }
1264
1265 if (!qib_wc_pat) 1126 if (!qib_wc_pat)
1266 qib_disable_wc(dd); 1127 qib_disable_wc(dd);
1267 1128
@@ -1341,7 +1202,8 @@ static void qib_postinit_cleanup(struct qib_devdata *dd)
1341 qib_free_devdata(dd); 1202 qib_free_devdata(dd);
1342} 1203}
1343 1204
1344static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 1205static int __devinit qib_init_one(struct pci_dev *pdev,
1206 const struct pci_device_id *ent)
1345{ 1207{
1346 int ret, j, pidx, initfail; 1208 int ret, j, pidx, initfail;
1347 struct qib_devdata *dd = NULL; 1209 struct qib_devdata *dd = NULL;
@@ -1359,9 +1221,9 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1359#ifdef CONFIG_PCI_MSI 1221#ifdef CONFIG_PCI_MSI
1360 dd = qib_init_iba6120_funcs(pdev, ent); 1222 dd = qib_init_iba6120_funcs(pdev, ent);
1361#else 1223#else
1362 qib_early_err(&pdev->dev, 1224 qib_early_err(&pdev->dev, "QLogic PCIE device 0x%x cannot "
1363 "QLogic PCIE device 0x%x cannot work if CONFIG_PCI_MSI is not enabled\n", 1225 "work if CONFIG_PCI_MSI is not enabled\n",
1364 ent->device); 1226 ent->device);
1365 dd = ERR_PTR(-ENODEV); 1227 dd = ERR_PTR(-ENODEV);
1366#endif 1228#endif
1367 break; 1229 break;
@@ -1375,9 +1237,8 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1375 break; 1237 break;
1376 1238
1377 default: 1239 default:
1378 qib_early_err(&pdev->dev, 1240 qib_early_err(&pdev->dev, "Failing on unknown QLogic "
1379 "Failing on unknown QLogic deviceid 0x%x\n", 1241 "deviceid 0x%x\n", ent->device);
1380 ent->device);
1381 ret = -ENODEV; 1242 ret = -ENODEV;
1382 } 1243 }
1383 1244
@@ -1386,10 +1247,6 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1386 if (ret) 1247 if (ret)
1387 goto bail; /* error already printed */ 1248 goto bail; /* error already printed */
1388 1249
1389 ret = qib_create_workqueues(dd);
1390 if (ret)
1391 goto bail;
1392
1393 /* do the generic initialization */ 1250 /* do the generic initialization */
1394 initfail = qib_init(dd, 0); 1251 initfail = qib_init(dd, 0);
1395 1252
@@ -1434,9 +1291,9 @@ static int qib_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
1434 if (!qib_wc_pat) { 1291 if (!qib_wc_pat) {
1435 ret = qib_enable_wc(dd); 1292 ret = qib_enable_wc(dd);
1436 if (ret) { 1293 if (ret) {
1437 qib_dev_err(dd, 1294 qib_dev_err(dd, "Write combining not enabled "
1438 "Write combining not enabled (err %d): performance may be poor\n", 1295 "(err %d): performance may be poor\n",
1439 -ret); 1296 -ret);
1440 ret = 0; 1297 ret = 0;
1441 } 1298 }
1442 } 1299 }
@@ -1446,7 +1303,7 @@ bail:
1446 return ret; 1303 return ret;
1447} 1304}
1448 1305
1449static void qib_remove_one(struct pci_dev *pdev) 1306static void __devexit qib_remove_one(struct pci_dev *pdev)
1450{ 1307{
1451 struct qib_devdata *dd = pci_get_drvdata(pdev); 1308 struct qib_devdata *dd = pci_get_drvdata(pdev);
1452 int ret; 1309 int ret;
@@ -1502,9 +1359,9 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1502 gfp_flags | __GFP_COMP); 1359 gfp_flags | __GFP_COMP);
1503 1360
1504 if (!rcd->rcvhdrq) { 1361 if (!rcd->rcvhdrq) {
1505 qib_dev_err(dd, 1362 qib_dev_err(dd, "attempt to allocate %d bytes "
1506 "attempt to allocate %d bytes for ctxt %u rcvhdrq failed\n", 1363 "for ctxt %u rcvhdrq failed\n",
1507 amt, rcd->ctxt); 1364 amt, rcd->ctxt);
1508 goto bail; 1365 goto bail;
1509 } 1366 }
1510 1367
@@ -1533,9 +1390,8 @@ int qib_create_rcvhdrq(struct qib_devdata *dd, struct qib_ctxtdata *rcd)
1533 return 0; 1390 return 0;
1534 1391
1535bail_free: 1392bail_free:
1536 qib_dev_err(dd, 1393 qib_dev_err(dd, "attempt to allocate 1 page for ctxt %u "
1537 "attempt to allocate 1 page for ctxt %u rcvhdrqtailaddr failed\n", 1394 "rcvhdrqtailaddr failed\n", rcd->ctxt);
1538 rcd->ctxt);
1539 vfree(rcd->user_event_mask); 1395 vfree(rcd->user_event_mask);
1540 rcd->user_event_mask = NULL; 1396 rcd->user_event_mask = NULL;
1541bail_free_hdrq: 1397bail_free_hdrq:
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index f4918f2165e..6ae57d23004 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -224,15 +224,15 @@ void qib_bad_intrstatus(struct qib_devdata *dd)
224 * We print the message and disable interrupts, in hope of 224 * We print the message and disable interrupts, in hope of
225 * having a better chance of debugging the problem. 225 * having a better chance of debugging the problem.
226 */ 226 */
227 qib_dev_err(dd, 227 qib_dev_err(dd, "Read of chip interrupt status failed"
228 "Read of chip interrupt status failed disabling interrupts\n"); 228 " disabling interrupts\n");
229 if (allbits++) { 229 if (allbits++) {
230 /* disable interrupt delivery, something is very wrong */ 230 /* disable interrupt delivery, something is very wrong */
231 if (allbits == 2) 231 if (allbits == 2)
232 dd->f_set_intr_state(dd, 0); 232 dd->f_set_intr_state(dd, 0);
233 if (allbits == 3) { 233 if (allbits == 3) {
234 qib_dev_err(dd, 234 qib_dev_err(dd, "2nd bad interrupt status, "
235 "2nd bad interrupt status, unregistering interrupts\n"); 235 "unregistering interrupts\n");
236 dd->flags |= QIB_BADINTR; 236 dd->flags |= QIB_BADINTR;
237 dd->flags &= ~QIB_INITTED; 237 dd->flags &= ~QIB_INITTED;
238 dd->f_free_irq(dd); 238 dd->f_free_irq(dd);
diff --git a/drivers/infiniband/hw/qib/qib_keys.c b/drivers/infiniband/hw/qib/qib_keys.c
index 81c7b73695d..8fd19a47df0 100644
--- a/drivers/infiniband/hw/qib/qib_keys.c
+++ b/drivers/infiniband/hw/qib/qib_keys.c
@@ -35,41 +35,21 @@
35 35
36/** 36/**
37 * qib_alloc_lkey - allocate an lkey 37 * qib_alloc_lkey - allocate an lkey
38 * @rkt: lkey table in which to allocate the lkey
38 * @mr: memory region that this lkey protects 39 * @mr: memory region that this lkey protects
39 * @dma_region: 0->normal key, 1->restricted DMA key
40 *
41 * Returns 0 if successful, otherwise returns -errno.
42 *
43 * Increments mr reference count as required.
44 *
45 * Sets the lkey field mr for non-dma regions.
46 * 40 *
41 * Returns 1 if successful, otherwise returns 0.
47 */ 42 */
48 43
49int qib_alloc_lkey(struct qib_mregion *mr, int dma_region) 44int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr)
50{ 45{
51 unsigned long flags; 46 unsigned long flags;
52 u32 r; 47 u32 r;
53 u32 n; 48 u32 n;
54 int ret = 0; 49 int ret;
55 struct qib_ibdev *dev = to_idev(mr->pd->device);
56 struct qib_lkey_table *rkt = &dev->lk_table;
57 50
58 spin_lock_irqsave(&rkt->lock, flags); 51 spin_lock_irqsave(&rkt->lock, flags);
59 52
60 /* special case for dma_mr lkey == 0 */
61 if (dma_region) {
62 struct qib_mregion *tmr;
63
64 tmr = rcu_dereference(dev->dma_mr);
65 if (!tmr) {
66 qib_get_mr(mr);
67 rcu_assign_pointer(dev->dma_mr, mr);
68 mr->lkey_published = 1;
69 }
70 goto success;
71 }
72
73 /* Find the next available LKEY */ 53 /* Find the next available LKEY */
74 r = rkt->next; 54 r = rkt->next;
75 n = r; 55 n = r;
@@ -77,8 +57,11 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
77 if (rkt->table[r] == NULL) 57 if (rkt->table[r] == NULL)
78 break; 58 break;
79 r = (r + 1) & (rkt->max - 1); 59 r = (r + 1) & (rkt->max - 1);
80 if (r == n) 60 if (r == n) {
61 spin_unlock_irqrestore(&rkt->lock, flags);
62 ret = 0;
81 goto bail; 63 goto bail;
64 }
82 } 65 }
83 rkt->next = (r + 1) & (rkt->max - 1); 66 rkt->next = (r + 1) & (rkt->max - 1);
84 /* 67 /*
@@ -93,58 +76,57 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
93 mr->lkey |= 1 << 8; 76 mr->lkey |= 1 << 8;
94 rkt->gen++; 77 rkt->gen++;
95 } 78 }
96 qib_get_mr(mr); 79 rkt->table[r] = mr;
97 rcu_assign_pointer(rkt->table[r], mr);
98 mr->lkey_published = 1;
99success:
100 spin_unlock_irqrestore(&rkt->lock, flags); 80 spin_unlock_irqrestore(&rkt->lock, flags);
101out: 81
102 return ret; 82 ret = 1;
83
103bail: 84bail:
104 spin_unlock_irqrestore(&rkt->lock, flags); 85 return ret;
105 ret = -ENOMEM;
106 goto out;
107} 86}
108 87
109/** 88/**
110 * qib_free_lkey - free an lkey 89 * qib_free_lkey - free an lkey
111 * @mr: mr to free from tables 90 * @rkt: table from which to free the lkey
91 * @lkey: lkey id to free
112 */ 92 */
113void qib_free_lkey(struct qib_mregion *mr) 93int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr)
114{ 94{
115 unsigned long flags; 95 unsigned long flags;
116 u32 lkey = mr->lkey; 96 u32 lkey = mr->lkey;
117 u32 r; 97 u32 r;
118 struct qib_ibdev *dev = to_idev(mr->pd->device); 98 int ret;
119 struct qib_lkey_table *rkt = &dev->lk_table;
120 99
121 spin_lock_irqsave(&rkt->lock, flags); 100 spin_lock_irqsave(&dev->lk_table.lock, flags);
122 if (!mr->lkey_published) 101 if (lkey == 0) {
123 goto out; 102 if (dev->dma_mr && dev->dma_mr == mr) {
124 if (lkey == 0) 103 ret = atomic_read(&dev->dma_mr->refcount);
125 rcu_assign_pointer(dev->dma_mr, NULL); 104 if (!ret)
126 else { 105 dev->dma_mr = NULL;
106 } else
107 ret = 0;
108 } else {
127 r = lkey >> (32 - ib_qib_lkey_table_size); 109 r = lkey >> (32 - ib_qib_lkey_table_size);
128 rcu_assign_pointer(rkt->table[r], NULL); 110 ret = atomic_read(&dev->lk_table.table[r]->refcount);
111 if (!ret)
112 dev->lk_table.table[r] = NULL;
129 } 113 }
130 qib_put_mr(mr); 114 spin_unlock_irqrestore(&dev->lk_table.lock, flags);
131 mr->lkey_published = 0; 115
132out: 116 if (ret)
133 spin_unlock_irqrestore(&rkt->lock, flags); 117 ret = -EBUSY;
118 return ret;
134} 119}
135 120
136/** 121/**
137 * qib_lkey_ok - check IB SGE for validity and initialize 122 * qib_lkey_ok - check IB SGE for validity and initialize
138 * @rkt: table containing lkey to check SGE against 123 * @rkt: table containing lkey to check SGE against
139 * @pd: protection domain
140 * @isge: outgoing internal SGE 124 * @isge: outgoing internal SGE
141 * @sge: SGE to check 125 * @sge: SGE to check
142 * @acc: access flags 126 * @acc: access flags
143 * 127 *
144 * Return 1 if valid and successful, otherwise returns 0. 128 * Return 1 if valid and successful, otherwise returns 0.
145 * 129 *
146 * increments the reference count upon success
147 *
148 * Check the IB SGE for validity and initialize our internal version 130 * Check the IB SGE for validity and initialize our internal version
149 * of it. 131 * of it.
150 */ 132 */
@@ -154,25 +136,24 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
154 struct qib_mregion *mr; 136 struct qib_mregion *mr;
155 unsigned n, m; 137 unsigned n, m;
156 size_t off; 138 size_t off;
139 unsigned long flags;
157 140
158 /* 141 /*
159 * We use LKEY == zero for kernel virtual addresses 142 * We use LKEY == zero for kernel virtual addresses
160 * (see qib_get_dma_mr and qib_dma.c). 143 * (see qib_get_dma_mr and qib_dma.c).
161 */ 144 */
162 rcu_read_lock(); 145 spin_lock_irqsave(&rkt->lock, flags);
163 if (sge->lkey == 0) { 146 if (sge->lkey == 0) {
164 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 147 struct qib_ibdev *dev = to_idev(pd->ibpd.device);
165 148
166 if (pd->user) 149 if (pd->user)
167 goto bail; 150 goto bail;
168 mr = rcu_dereference(dev->dma_mr); 151 if (!dev->dma_mr)
169 if (!mr)
170 goto bail;
171 if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
172 goto bail; 152 goto bail;
173 rcu_read_unlock(); 153 atomic_inc(&dev->dma_mr->refcount);
154 spin_unlock_irqrestore(&rkt->lock, flags);
174 155
175 isge->mr = mr; 156 isge->mr = dev->dma_mr;
176 isge->vaddr = (void *) sge->addr; 157 isge->vaddr = (void *) sge->addr;
177 isge->length = sge->length; 158 isge->length = sge->length;
178 isge->sge_length = sge->length; 159 isge->sge_length = sge->length;
@@ -180,9 +161,9 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
180 isge->n = 0; 161 isge->n = 0;
181 goto ok; 162 goto ok;
182 } 163 }
183 mr = rcu_dereference( 164 mr = rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))];
184 rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]); 165 if (unlikely(mr == NULL || mr->lkey != sge->lkey ||
185 if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 166 mr->pd != &pd->ibpd))
186 goto bail; 167 goto bail;
187 168
188 off = sge->addr - mr->user_base; 169 off = sge->addr - mr->user_base;
@@ -190,9 +171,8 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
190 off + sge->length > mr->length || 171 off + sge->length > mr->length ||
191 (mr->access_flags & acc) != acc)) 172 (mr->access_flags & acc) != acc))
192 goto bail; 173 goto bail;
193 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 174 atomic_inc(&mr->refcount);
194 goto bail; 175 spin_unlock_irqrestore(&rkt->lock, flags);
195 rcu_read_unlock();
196 176
197 off += mr->offset; 177 off += mr->offset;
198 if (mr->page_shift) { 178 if (mr->page_shift) {
@@ -228,22 +208,20 @@ int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
228ok: 208ok:
229 return 1; 209 return 1;
230bail: 210bail:
231 rcu_read_unlock(); 211 spin_unlock_irqrestore(&rkt->lock, flags);
232 return 0; 212 return 0;
233} 213}
234 214
235/** 215/**
236 * qib_rkey_ok - check the IB virtual address, length, and RKEY 216 * qib_rkey_ok - check the IB virtual address, length, and RKEY
237 * @qp: qp for validation 217 * @dev: infiniband device
238 * @sge: SGE state 218 * @ss: SGE state
239 * @len: length of data 219 * @len: length of data
240 * @vaddr: virtual address to place data 220 * @vaddr: virtual address to place data
241 * @rkey: rkey to check 221 * @rkey: rkey to check
242 * @acc: access flags 222 * @acc: access flags
243 * 223 *
244 * Return 1 if successful, otherwise 0. 224 * Return 1 if successful, otherwise 0.
245 *
246 * increments the reference count upon success
247 */ 225 */
248int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge, 226int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
249 u32 len, u64 vaddr, u32 rkey, int acc) 227 u32 len, u64 vaddr, u32 rkey, int acc)
@@ -252,26 +230,25 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
252 struct qib_mregion *mr; 230 struct qib_mregion *mr;
253 unsigned n, m; 231 unsigned n, m;
254 size_t off; 232 size_t off;
233 unsigned long flags;
255 234
256 /* 235 /*
257 * We use RKEY == zero for kernel virtual addresses 236 * We use RKEY == zero for kernel virtual addresses
258 * (see qib_get_dma_mr and qib_dma.c). 237 * (see qib_get_dma_mr and qib_dma.c).
259 */ 238 */
260 rcu_read_lock(); 239 spin_lock_irqsave(&rkt->lock, flags);
261 if (rkey == 0) { 240 if (rkey == 0) {
262 struct qib_pd *pd = to_ipd(qp->ibqp.pd); 241 struct qib_pd *pd = to_ipd(qp->ibqp.pd);
263 struct qib_ibdev *dev = to_idev(pd->ibpd.device); 242 struct qib_ibdev *dev = to_idev(pd->ibpd.device);
264 243
265 if (pd->user) 244 if (pd->user)
266 goto bail; 245 goto bail;
267 mr = rcu_dereference(dev->dma_mr); 246 if (!dev->dma_mr)
268 if (!mr)
269 goto bail; 247 goto bail;
270 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 248 atomic_inc(&dev->dma_mr->refcount);
271 goto bail; 249 spin_unlock_irqrestore(&rkt->lock, flags);
272 rcu_read_unlock();
273 250
274 sge->mr = mr; 251 sge->mr = dev->dma_mr;
275 sge->vaddr = (void *) vaddr; 252 sge->vaddr = (void *) vaddr;
276 sge->length = len; 253 sge->length = len;
277 sge->sge_length = len; 254 sge->sge_length = len;
@@ -280,18 +257,16 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
280 goto ok; 257 goto ok;
281 } 258 }
282 259
283 mr = rcu_dereference( 260 mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
284 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]); 261 if (unlikely(mr == NULL || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
285 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
286 goto bail; 262 goto bail;
287 263
288 off = vaddr - mr->iova; 264 off = vaddr - mr->iova;
289 if (unlikely(vaddr < mr->iova || off + len > mr->length || 265 if (unlikely(vaddr < mr->iova || off + len > mr->length ||
290 (mr->access_flags & acc) == 0)) 266 (mr->access_flags & acc) == 0))
291 goto bail; 267 goto bail;
292 if (unlikely(!atomic_inc_not_zero(&mr->refcount))) 268 atomic_inc(&mr->refcount);
293 goto bail; 269 spin_unlock_irqrestore(&rkt->lock, flags);
294 rcu_read_unlock();
295 270
296 off += mr->offset; 271 off += mr->offset;
297 if (mr->page_shift) { 272 if (mr->page_shift) {
@@ -327,7 +302,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
327ok: 302ok:
328 return 1; 303 return 1;
329bail: 304bail:
330 rcu_read_unlock(); 305 spin_unlock_irqrestore(&rkt->lock, flags);
331 return 0; 306 return 0;
332} 307}
333 308
@@ -350,9 +325,7 @@ int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr)
350 if (pd->user || rkey == 0) 325 if (pd->user || rkey == 0)
351 goto bail; 326 goto bail;
352 327
353 mr = rcu_dereference_protected( 328 mr = rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))];
354 rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))],
355 lockdep_is_held(&rkt->lock));
356 if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd)) 329 if (unlikely(mr == NULL || qp->ibqp.pd != mr->pd))
357 goto bail; 330 goto bail;
358 331
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index ccb119143d2..3b3745f261f 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -49,18 +49,6 @@ static int reply(struct ib_smp *smp)
49 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY; 49 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
50} 50}
51 51
52static int reply_failure(struct ib_smp *smp)
53{
54 /*
55 * The verbs framework will handle the directed/LID route
56 * packet changes.
57 */
58 smp->method = IB_MGMT_METHOD_GET_RESP;
59 if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
60 smp->status |= IB_SMP_DIRECTION;
61 return IB_MAD_RESULT_FAILURE | IB_MAD_RESULT_REPLY;
62}
63
64static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len) 52static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
65{ 53{
66 struct ib_mad_send_buf *send_buf; 54 struct ib_mad_send_buf *send_buf;
@@ -102,10 +90,14 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
102 if (!ibp->sm_ah) { 90 if (!ibp->sm_ah) {
103 if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) { 91 if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
104 struct ib_ah *ah; 92 struct ib_ah *ah;
93 struct ib_ah_attr attr;
105 94
106 ah = qib_create_qp0_ah(ibp, ibp->sm_lid); 95 memset(&attr, 0, sizeof attr);
96 attr.dlid = ibp->sm_lid;
97 attr.port_num = ppd_from_ibp(ibp)->port;
98 ah = ib_create_ah(ibp->qp0->ibqp.pd, &attr);
107 if (IS_ERR(ah)) 99 if (IS_ERR(ah))
108 ret = PTR_ERR(ah); 100 ret = -EINVAL;
109 else { 101 else {
110 send_buf->ah = ah; 102 send_buf->ah = ah;
111 ibp->sm_ah = to_iah(ah); 103 ibp->sm_ah = to_iah(ah);
@@ -404,7 +396,6 @@ static int get_linkdowndefaultstate(struct qib_pportdata *ppd)
404 396
405static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags) 397static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
406{ 398{
407 int valid_mkey = 0;
408 int ret = 0; 399 int ret = 0;
409 400
410 /* Is the mkey in the process of expiring? */ 401 /* Is the mkey in the process of expiring? */
@@ -415,36 +406,23 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
415 ibp->mkeyprot = 0; 406 ibp->mkeyprot = 0;
416 } 407 }
417 408
418 if ((mad_flags & IB_MAD_IGNORE_MKEY) || ibp->mkey == 0 || 409 /* M_Key checking depends on Portinfo:M_Key_protect_bits */
419 ibp->mkey == smp->mkey) 410 if ((mad_flags & IB_MAD_IGNORE_MKEY) == 0 && ibp->mkey != 0 &&
420 valid_mkey = 1; 411 ibp->mkey != smp->mkey &&
421 412 (smp->method == IB_MGMT_METHOD_SET ||
422 /* Unset lease timeout on any valid Get/Set/TrapRepress */ 413 smp->method == IB_MGMT_METHOD_TRAP_REPRESS ||
423 if (valid_mkey && ibp->mkey_lease_timeout && 414 (smp->method == IB_MGMT_METHOD_GET && ibp->mkeyprot >= 2))) {
424 (smp->method == IB_MGMT_METHOD_GET || 415 if (ibp->mkey_violations != 0xFFFF)
425 smp->method == IB_MGMT_METHOD_SET || 416 ++ibp->mkey_violations;
426 smp->method == IB_MGMT_METHOD_TRAP_REPRESS)) 417 if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
418 ibp->mkey_lease_timeout = jiffies +
419 ibp->mkey_lease_period * HZ;
420 /* Generate a trap notice. */
421 qib_bad_mkey(ibp, smp);
422 ret = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
423 } else if (ibp->mkey_lease_timeout)
427 ibp->mkey_lease_timeout = 0; 424 ibp->mkey_lease_timeout = 0;
428 425
429 if (!valid_mkey) {
430 switch (smp->method) {
431 case IB_MGMT_METHOD_GET:
432 /* Bad mkey not a violation below level 2 */
433 if (ibp->mkeyprot < 2)
434 break;
435 case IB_MGMT_METHOD_SET:
436 case IB_MGMT_METHOD_TRAP_REPRESS:
437 if (ibp->mkey_violations != 0xFFFF)
438 ++ibp->mkey_violations;
439 if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
440 ibp->mkey_lease_timeout = jiffies +
441 ibp->mkey_lease_period * HZ;
442 /* Generate a trap notice. */
443 qib_bad_mkey(ibp, smp);
444 ret = 1;
445 }
446 }
447
448 return ret; 426 return ret;
449} 427}
450 428
@@ -455,6 +433,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
455 struct qib_pportdata *ppd; 433 struct qib_pportdata *ppd;
456 struct qib_ibport *ibp; 434 struct qib_ibport *ibp;
457 struct ib_port_info *pip = (struct ib_port_info *)smp->data; 435 struct ib_port_info *pip = (struct ib_port_info *)smp->data;
436 u16 lid;
458 u8 mtu; 437 u8 mtu;
459 int ret; 438 int ret;
460 u32 state; 439 u32 state;
@@ -471,10 +450,8 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
471 if (port_num != port) { 450 if (port_num != port) {
472 ibp = to_iport(ibdev, port_num); 451 ibp = to_iport(ibdev, port_num);
473 ret = check_mkey(ibp, smp, 0); 452 ret = check_mkey(ibp, smp, 0);
474 if (ret) { 453 if (ret)
475 ret = IB_MAD_RESULT_FAILURE;
476 goto bail; 454 goto bail;
477 }
478 } 455 }
479 } 456 }
480 457
@@ -492,7 +469,8 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
492 ibp->mkeyprot == 1)) 469 ibp->mkeyprot == 1))
493 pip->mkey = ibp->mkey; 470 pip->mkey = ibp->mkey;
494 pip->gid_prefix = ibp->gid_prefix; 471 pip->gid_prefix = ibp->gid_prefix;
495 pip->lid = cpu_to_be16(ppd->lid); 472 lid = ppd->lid;
473 pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE;
496 pip->sm_lid = cpu_to_be16(ibp->sm_lid); 474 pip->sm_lid = cpu_to_be16(ibp->sm_lid);
497 pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); 475 pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
498 /* pip->diag_code; */ 476 /* pip->diag_code; */
@@ -655,7 +633,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
655 struct qib_devdata *dd; 633 struct qib_devdata *dd;
656 struct qib_pportdata *ppd; 634 struct qib_pportdata *ppd;
657 struct qib_ibport *ibp; 635 struct qib_ibport *ibp;
658 u8 clientrereg = (pip->clientrereg_resv_subnetto & 0x80); 636 char clientrereg = 0;
659 unsigned long flags; 637 unsigned long flags;
660 u16 lid, smlid; 638 u16 lid, smlid;
661 u8 lwe; 639 u8 lwe;
@@ -805,6 +783,12 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
805 783
806 ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F; 784 ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
807 785
786 if (pip->clientrereg_resv_subnetto & 0x80) {
787 clientrereg = 1;
788 event.event = IB_EVENT_CLIENT_REREGISTER;
789 ib_dispatch_event(&event);
790 }
791
808 /* 792 /*
809 * Do the port state change now that the other link parameters 793 * Do the port state change now that the other link parameters
810 * have been set. 794 * have been set.
@@ -862,15 +846,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
862 smp->status |= IB_SMP_INVALID_FIELD; 846 smp->status |= IB_SMP_INVALID_FIELD;
863 } 847 }
864 848
865 if (clientrereg) {
866 event.event = IB_EVENT_CLIENT_REREGISTER;
867 ib_dispatch_event(&event);
868 }
869
870 ret = subn_get_portinfo(smp, ibdev, port); 849 ret = subn_get_portinfo(smp, ibdev, port);
871 850
872 /* restore re-reg bit per o14-12.2.1 */ 851 if (clientrereg)
873 pip->clientrereg_resv_subnetto |= clientrereg; 852 pip->clientrereg_resv_subnetto |= 0x80;
874 853
875 goto get_only; 854 goto get_only;
876 855
@@ -1858,7 +1837,6 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
1858 port_num && port_num <= ibdev->phys_port_cnt && 1837 port_num && port_num <= ibdev->phys_port_cnt &&
1859 port != port_num) 1838 port != port_num)
1860 (void) check_mkey(to_iport(ibdev, port_num), smp, 0); 1839 (void) check_mkey(to_iport(ibdev, port_num), smp, 0);
1861 ret = IB_MAD_RESULT_FAILURE;
1862 goto bail; 1840 goto bail;
1863 } 1841 }
1864 1842
@@ -2060,298 +2038,6 @@ bail:
2060 return ret; 2038 return ret;
2061} 2039}
2062 2040
2063static int cc_get_classportinfo(struct ib_cc_mad *ccp,
2064 struct ib_device *ibdev)
2065{
2066 struct ib_cc_classportinfo_attr *p =
2067 (struct ib_cc_classportinfo_attr *)ccp->mgmt_data;
2068
2069 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2070
2071 p->base_version = 1;
2072 p->class_version = 1;
2073 p->cap_mask = 0;
2074
2075 /*
2076 * Expected response time is 4.096 usec. * 2^18 == 1.073741824 sec.
2077 */
2078 p->resp_time_value = 18;
2079
2080 return reply((struct ib_smp *) ccp);
2081}
2082
2083static int cc_get_congestion_info(struct ib_cc_mad *ccp,
2084 struct ib_device *ibdev, u8 port)
2085{
2086 struct ib_cc_info_attr *p =
2087 (struct ib_cc_info_attr *)ccp->mgmt_data;
2088 struct qib_ibport *ibp = to_iport(ibdev, port);
2089 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2090
2091 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2092
2093 p->congestion_info = 0;
2094 p->control_table_cap = ppd->cc_max_table_entries;
2095
2096 return reply((struct ib_smp *) ccp);
2097}
2098
2099static int cc_get_congestion_setting(struct ib_cc_mad *ccp,
2100 struct ib_device *ibdev, u8 port)
2101{
2102 int i;
2103 struct ib_cc_congestion_setting_attr *p =
2104 (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
2105 struct qib_ibport *ibp = to_iport(ibdev, port);
2106 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2107 struct ib_cc_congestion_entry_shadow *entries;
2108
2109 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2110
2111 spin_lock(&ppd->cc_shadow_lock);
2112
2113 entries = ppd->congestion_entries_shadow->entries;
2114 p->port_control = cpu_to_be16(
2115 ppd->congestion_entries_shadow->port_control);
2116 p->control_map = cpu_to_be16(
2117 ppd->congestion_entries_shadow->control_map);
2118 for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
2119 p->entries[i].ccti_increase = entries[i].ccti_increase;
2120 p->entries[i].ccti_timer = cpu_to_be16(entries[i].ccti_timer);
2121 p->entries[i].trigger_threshold = entries[i].trigger_threshold;
2122 p->entries[i].ccti_min = entries[i].ccti_min;
2123 }
2124
2125 spin_unlock(&ppd->cc_shadow_lock);
2126
2127 return reply((struct ib_smp *) ccp);
2128}
2129
2130static int cc_get_congestion_control_table(struct ib_cc_mad *ccp,
2131 struct ib_device *ibdev, u8 port)
2132{
2133 struct ib_cc_table_attr *p =
2134 (struct ib_cc_table_attr *)ccp->mgmt_data;
2135 struct qib_ibport *ibp = to_iport(ibdev, port);
2136 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2137 u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
2138 u32 max_cct_block;
2139 u32 cct_entry;
2140 struct ib_cc_table_entry_shadow *entries;
2141 int i;
2142
2143 /* Is the table index more than what is supported? */
2144 if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
2145 goto bail;
2146
2147 memset(ccp->mgmt_data, 0, sizeof(ccp->mgmt_data));
2148
2149 spin_lock(&ppd->cc_shadow_lock);
2150
2151 max_cct_block =
2152 (ppd->ccti_entries_shadow->ccti_last_entry + 1)/IB_CCT_ENTRIES;
2153 max_cct_block = max_cct_block ? max_cct_block - 1 : 0;
2154
2155 if (cct_block_index > max_cct_block) {
2156 spin_unlock(&ppd->cc_shadow_lock);
2157 goto bail;
2158 }
2159
2160 ccp->attr_mod = cpu_to_be32(cct_block_index);
2161
2162 cct_entry = IB_CCT_ENTRIES * (cct_block_index + 1);
2163
2164 cct_entry--;
2165
2166 p->ccti_limit = cpu_to_be16(cct_entry);
2167
2168 entries = &ppd->ccti_entries_shadow->
2169 entries[IB_CCT_ENTRIES * cct_block_index];
2170 cct_entry %= IB_CCT_ENTRIES;
2171
2172 for (i = 0; i <= cct_entry; i++)
2173 p->ccti_entries[i].entry = cpu_to_be16(entries[i].entry);
2174
2175 spin_unlock(&ppd->cc_shadow_lock);
2176
2177 return reply((struct ib_smp *) ccp);
2178
2179bail:
2180 return reply_failure((struct ib_smp *) ccp);
2181}
2182
2183static int cc_set_congestion_setting(struct ib_cc_mad *ccp,
2184 struct ib_device *ibdev, u8 port)
2185{
2186 struct ib_cc_congestion_setting_attr *p =
2187 (struct ib_cc_congestion_setting_attr *)ccp->mgmt_data;
2188 struct qib_ibport *ibp = to_iport(ibdev, port);
2189 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2190 int i;
2191
2192 ppd->cc_sl_control_map = be16_to_cpu(p->control_map);
2193
2194 for (i = 0; i < IB_CC_CCS_ENTRIES; i++) {
2195 ppd->congestion_entries[i].ccti_increase =
2196 p->entries[i].ccti_increase;
2197
2198 ppd->congestion_entries[i].ccti_timer =
2199 be16_to_cpu(p->entries[i].ccti_timer);
2200
2201 ppd->congestion_entries[i].trigger_threshold =
2202 p->entries[i].trigger_threshold;
2203
2204 ppd->congestion_entries[i].ccti_min =
2205 p->entries[i].ccti_min;
2206 }
2207
2208 return reply((struct ib_smp *) ccp);
2209}
2210
2211static int cc_set_congestion_control_table(struct ib_cc_mad *ccp,
2212 struct ib_device *ibdev, u8 port)
2213{
2214 struct ib_cc_table_attr *p =
2215 (struct ib_cc_table_attr *)ccp->mgmt_data;
2216 struct qib_ibport *ibp = to_iport(ibdev, port);
2217 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2218 u32 cct_block_index = be32_to_cpu(ccp->attr_mod);
2219 u32 cct_entry;
2220 struct ib_cc_table_entry_shadow *entries;
2221 int i;
2222
2223 /* Is the table index more than what is supported? */
2224 if (cct_block_index > IB_CC_TABLE_CAP_DEFAULT - 1)
2225 goto bail;
2226
2227 /* If this packet is the first in the sequence then
2228 * zero the total table entry count.
2229 */
2230 if (be16_to_cpu(p->ccti_limit) < IB_CCT_ENTRIES)
2231 ppd->total_cct_entry = 0;
2232
2233 cct_entry = (be16_to_cpu(p->ccti_limit))%IB_CCT_ENTRIES;
2234
2235 /* ccti_limit is 0 to 63 */
2236 ppd->total_cct_entry += (cct_entry + 1);
2237
2238 if (ppd->total_cct_entry > ppd->cc_supported_table_entries)
2239 goto bail;
2240
2241 ppd->ccti_limit = be16_to_cpu(p->ccti_limit);
2242
2243 entries = ppd->ccti_entries + (IB_CCT_ENTRIES * cct_block_index);
2244
2245 for (i = 0; i <= cct_entry; i++)
2246 entries[i].entry = be16_to_cpu(p->ccti_entries[i].entry);
2247
2248 spin_lock(&ppd->cc_shadow_lock);
2249
2250 ppd->ccti_entries_shadow->ccti_last_entry = ppd->total_cct_entry - 1;
2251 memcpy(ppd->ccti_entries_shadow->entries, ppd->ccti_entries,
2252 (ppd->total_cct_entry * sizeof(struct ib_cc_table_entry)));
2253
2254 ppd->congestion_entries_shadow->port_control = IB_CC_CCS_PC_SL_BASED;
2255 ppd->congestion_entries_shadow->control_map = ppd->cc_sl_control_map;
2256 memcpy(ppd->congestion_entries_shadow->entries, ppd->congestion_entries,
2257 IB_CC_CCS_ENTRIES * sizeof(struct ib_cc_congestion_entry));
2258
2259 spin_unlock(&ppd->cc_shadow_lock);
2260
2261 return reply((struct ib_smp *) ccp);
2262
2263bail:
2264 return reply_failure((struct ib_smp *) ccp);
2265}
2266
2267static int check_cc_key(struct qib_ibport *ibp,
2268 struct ib_cc_mad *ccp, int mad_flags)
2269{
2270 return 0;
2271}
2272
2273static int process_cc(struct ib_device *ibdev, int mad_flags,
2274 u8 port, struct ib_mad *in_mad,
2275 struct ib_mad *out_mad)
2276{
2277 struct ib_cc_mad *ccp = (struct ib_cc_mad *)out_mad;
2278 struct qib_ibport *ibp = to_iport(ibdev, port);
2279 int ret;
2280
2281 *out_mad = *in_mad;
2282
2283 if (ccp->class_version != 2) {
2284 ccp->status |= IB_SMP_UNSUP_VERSION;
2285 ret = reply((struct ib_smp *)ccp);
2286 goto bail;
2287 }
2288
2289 ret = check_cc_key(ibp, ccp, mad_flags);
2290 if (ret)
2291 goto bail;
2292
2293 switch (ccp->method) {
2294 case IB_MGMT_METHOD_GET:
2295 switch (ccp->attr_id) {
2296 case IB_CC_ATTR_CLASSPORTINFO:
2297 ret = cc_get_classportinfo(ccp, ibdev);
2298 goto bail;
2299
2300 case IB_CC_ATTR_CONGESTION_INFO:
2301 ret = cc_get_congestion_info(ccp, ibdev, port);
2302 goto bail;
2303
2304 case IB_CC_ATTR_CA_CONGESTION_SETTING:
2305 ret = cc_get_congestion_setting(ccp, ibdev, port);
2306 goto bail;
2307
2308 case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
2309 ret = cc_get_congestion_control_table(ccp, ibdev, port);
2310 goto bail;
2311
2312 /* FALLTHROUGH */
2313 default:
2314 ccp->status |= IB_SMP_UNSUP_METH_ATTR;
2315 ret = reply((struct ib_smp *) ccp);
2316 goto bail;
2317 }
2318
2319 case IB_MGMT_METHOD_SET:
2320 switch (ccp->attr_id) {
2321 case IB_CC_ATTR_CA_CONGESTION_SETTING:
2322 ret = cc_set_congestion_setting(ccp, ibdev, port);
2323 goto bail;
2324
2325 case IB_CC_ATTR_CONGESTION_CONTROL_TABLE:
2326 ret = cc_set_congestion_control_table(ccp, ibdev, port);
2327 goto bail;
2328
2329 /* FALLTHROUGH */
2330 default:
2331 ccp->status |= IB_SMP_UNSUP_METH_ATTR;
2332 ret = reply((struct ib_smp *) ccp);
2333 goto bail;
2334 }
2335
2336 case IB_MGMT_METHOD_GET_RESP:
2337 /*
2338 * The ib_mad module will call us to process responses
2339 * before checking for other consumers.
2340 * Just tell the caller to process it normally.
2341 */
2342 ret = IB_MAD_RESULT_SUCCESS;
2343 goto bail;
2344
2345 case IB_MGMT_METHOD_TRAP:
2346 default:
2347 ccp->status |= IB_SMP_UNSUP_METHOD;
2348 ret = reply((struct ib_smp *) ccp);
2349 }
2350
2351bail:
2352 return ret;
2353}
2354
2355/** 2041/**
2356 * qib_process_mad - process an incoming MAD packet 2042 * qib_process_mad - process an incoming MAD packet
2357 * @ibdev: the infiniband device this packet came in on 2043 * @ibdev: the infiniband device this packet came in on
@@ -2376,8 +2062,6 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
2376 struct ib_mad *in_mad, struct ib_mad *out_mad) 2062 struct ib_mad *in_mad, struct ib_mad *out_mad)
2377{ 2063{
2378 int ret; 2064 int ret;
2379 struct qib_ibport *ibp = to_iport(ibdev, port);
2380 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2381 2065
2382 switch (in_mad->mad_hdr.mgmt_class) { 2066 switch (in_mad->mad_hdr.mgmt_class) {
2383 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE: 2067 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
@@ -2389,15 +2073,6 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port,
2389 ret = process_perf(ibdev, port, in_mad, out_mad); 2073 ret = process_perf(ibdev, port, in_mad, out_mad);
2390 goto bail; 2074 goto bail;
2391 2075
2392 case IB_MGMT_CLASS_CONG_MGMT:
2393 if (!ppd->congestion_entries_shadow ||
2394 !qib_cc_table_size) {
2395 ret = IB_MAD_RESULT_SUCCESS;
2396 goto bail;
2397 }
2398 ret = process_cc(ibdev, mad_flags, port, in_mad, out_mad);
2399 goto bail;
2400
2401 default: 2076 default:
2402 ret = IB_MAD_RESULT_SUCCESS; 2077 ret = IB_MAD_RESULT_SUCCESS;
2403 } 2078 }
diff --git a/drivers/infiniband/hw/qib/qib_mad.h b/drivers/infiniband/hw/qib/qib_mad.h
index 57bd3fa016b..ecc416cdbaa 100644
--- a/drivers/infiniband/hw/qib/qib_mad.h
+++ b/drivers/infiniband/hw/qib/qib_mad.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -31,8 +31,6 @@
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 * SOFTWARE. 32 * SOFTWARE.
33 */ 33 */
34#ifndef _QIB_MAD_H
35#define _QIB_MAD_H
36 34
37#include <rdma/ib_pma.h> 35#include <rdma/ib_pma.h>
38 36
@@ -225,198 +223,6 @@ struct ib_pma_portcounters_cong {
225#define IB_PMA_SEL_CONG_ROUTING 0x08 223#define IB_PMA_SEL_CONG_ROUTING 0x08
226 224
227/* 225/*
228 * Congestion control class attributes
229 */
230#define IB_CC_ATTR_CLASSPORTINFO cpu_to_be16(0x0001)
231#define IB_CC_ATTR_NOTICE cpu_to_be16(0x0002)
232#define IB_CC_ATTR_CONGESTION_INFO cpu_to_be16(0x0011)
233#define IB_CC_ATTR_CONGESTION_KEY_INFO cpu_to_be16(0x0012)
234#define IB_CC_ATTR_CONGESTION_LOG cpu_to_be16(0x0013)
235#define IB_CC_ATTR_SWITCH_CONGESTION_SETTING cpu_to_be16(0x0014)
236#define IB_CC_ATTR_SWITCH_PORT_CONGESTION_SETTING cpu_to_be16(0x0015)
237#define IB_CC_ATTR_CA_CONGESTION_SETTING cpu_to_be16(0x0016)
238#define IB_CC_ATTR_CONGESTION_CONTROL_TABLE cpu_to_be16(0x0017)
239#define IB_CC_ATTR_TIME_STAMP cpu_to_be16(0x0018)
240
241/* generalizations for threshold values */
242#define IB_CC_THRESHOLD_NONE 0x0
243#define IB_CC_THRESHOLD_MIN 0x1
244#define IB_CC_THRESHOLD_MAX 0xf
245
246/* CCA MAD header constants */
247#define IB_CC_MAD_LOGDATA_LEN 32
248#define IB_CC_MAD_MGMTDATA_LEN 192
249
250struct ib_cc_mad {
251 u8 base_version;
252 u8 mgmt_class;
253 u8 class_version;
254 u8 method;
255 __be16 status;
256 __be16 class_specific;
257 __be64 tid;
258 __be16 attr_id;
259 __be16 resv;
260 __be32 attr_mod;
261 __be64 cckey;
262
263 /* For CongestionLog attribute only */
264 u8 log_data[IB_CC_MAD_LOGDATA_LEN];
265
266 u8 mgmt_data[IB_CC_MAD_MGMTDATA_LEN];
267} __packed;
268
269/*
270 * Congestion Control class portinfo capability mask bits
271 */
272#define IB_CC_CPI_CM_TRAP_GEN cpu_to_be16(1 << 0)
273#define IB_CC_CPI_CM_GET_SET_NOTICE cpu_to_be16(1 << 1)
274#define IB_CC_CPI_CM_CAP2 cpu_to_be16(1 << 2)
275#define IB_CC_CPI_CM_ENHANCEDPORT0_CC cpu_to_be16(1 << 8)
276
277struct ib_cc_classportinfo_attr {
278 u8 base_version;
279 u8 class_version;
280 __be16 cap_mask;
281 u8 reserved[3];
282 u8 resp_time_value; /* only lower 5 bits */
283 union ib_gid redirect_gid;
284 __be32 redirect_tc_sl_fl; /* 8, 4, 20 bits respectively */
285 __be16 redirect_lid;
286 __be16 redirect_pkey;
287 __be32 redirect_qp; /* only lower 24 bits */
288 __be32 redirect_qkey;
289 union ib_gid trap_gid;
290 __be32 trap_tc_sl_fl; /* 8, 4, 20 bits respectively */
291 __be16 trap_lid;
292 __be16 trap_pkey;
293 __be32 trap_hl_qp; /* 8, 24 bits respectively */
294 __be32 trap_qkey;
295} __packed;
296
297/* Congestion control traps */
298#define IB_CC_TRAP_KEY_VIOLATION 0x0000
299
300struct ib_cc_trap_key_violation_attr {
301 __be16 source_lid;
302 u8 method;
303 u8 reserved1;
304 __be16 attrib_id;
305 __be32 attrib_mod;
306 __be32 qp;
307 __be64 cckey;
308 u8 sgid[16];
309 u8 padding[24];
310} __packed;
311
312/* Congestion info flags */
313#define IB_CC_CI_FLAGS_CREDIT_STARVATION 0x1
314#define IB_CC_TABLE_CAP_DEFAULT 31
315
316struct ib_cc_info_attr {
317 __be16 congestion_info;
318 u8 control_table_cap; /* Multiple of 64 entry unit CCTs */
319} __packed;
320
321struct ib_cc_key_info_attr {
322 __be64 cckey;
323 u8 protect;
324 __be16 lease_period;
325 __be16 violations;
326} __packed;
327
328#define IB_CC_CL_CA_LOGEVENTS_LEN 208
329
330struct ib_cc_log_attr {
331 u8 log_type;
332 u8 congestion_flags;
333 __be16 threshold_event_counter;
334 __be16 threshold_congestion_event_map;
335 __be16 current_time_stamp;
336 u8 log_events[IB_CC_CL_CA_LOGEVENTS_LEN];
337} __packed;
338
339#define IB_CC_CLEC_SERVICETYPE_RC 0x0
340#define IB_CC_CLEC_SERVICETYPE_UC 0x1
341#define IB_CC_CLEC_SERVICETYPE_RD 0x2
342#define IB_CC_CLEC_SERVICETYPE_UD 0x3
343
344struct ib_cc_log_event {
345 u8 local_qp_cn_entry;
346 u8 remote_qp_number_cn_entry[3];
347 u8 sl_cn_entry:4;
348 u8 service_type_cn_entry:4;
349 __be32 remote_lid_cn_entry;
350 __be32 timestamp_cn_entry;
351} __packed;
352
353/* Sixteen congestion entries */
354#define IB_CC_CCS_ENTRIES 16
355
356/* Port control flags */
357#define IB_CC_CCS_PC_SL_BASED 0x01
358
359struct ib_cc_congestion_entry {
360 u8 ccti_increase;
361 __be16 ccti_timer;
362 u8 trigger_threshold;
363 u8 ccti_min; /* min CCTI for cc table */
364} __packed;
365
366struct ib_cc_congestion_entry_shadow {
367 u8 ccti_increase;
368 u16 ccti_timer;
369 u8 trigger_threshold;
370 u8 ccti_min; /* min CCTI for cc table */
371} __packed;
372
373struct ib_cc_congestion_setting_attr {
374 __be16 port_control;
375 __be16 control_map;
376 struct ib_cc_congestion_entry entries[IB_CC_CCS_ENTRIES];
377} __packed;
378
379struct ib_cc_congestion_setting_attr_shadow {
380 u16 port_control;
381 u16 control_map;
382 struct ib_cc_congestion_entry_shadow entries[IB_CC_CCS_ENTRIES];
383} __packed;
384
385#define IB_CC_TABLE_ENTRY_INCREASE_DEFAULT 1
386#define IB_CC_TABLE_ENTRY_TIMER_DEFAULT 1
387
388/* 64 Congestion Control table entries in a single MAD */
389#define IB_CCT_ENTRIES 64
390#define IB_CCT_MIN_ENTRIES (IB_CCT_ENTRIES * 2)
391
392struct ib_cc_table_entry {
393 __be16 entry; /* shift:2, multiplier:14 */
394};
395
396struct ib_cc_table_entry_shadow {
397 u16 entry; /* shift:2, multiplier:14 */
398};
399
400struct ib_cc_table_attr {
401 __be16 ccti_limit; /* max CCTI for cc table */
402 struct ib_cc_table_entry ccti_entries[IB_CCT_ENTRIES];
403} __packed;
404
405struct ib_cc_table_attr_shadow {
406 u16 ccti_limit; /* max CCTI for cc table */
407 struct ib_cc_table_entry_shadow ccti_entries[IB_CCT_ENTRIES];
408} __packed;
409
410#define CC_TABLE_SHADOW_MAX \
411 (IB_CC_TABLE_CAP_DEFAULT * IB_CCT_ENTRIES)
412
413struct cc_table_shadow {
414 u16 ccti_last_entry;
415 struct ib_cc_table_entry_shadow entries[CC_TABLE_SHADOW_MAX];
416} __packed;
417
418#endif /* _QIB_MAD_H */
419/*
420 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields 226 * The PortSamplesControl.CounterMasks field is an array of 3 bit fields
421 * which specify the N'th counter's capabilities. See ch. 16.1.3.2. 227 * which specify the N'th counter's capabilities. See ch. 16.1.3.2.
422 * We support 5 counters which only count the mandatory quantities. 228 * We support 5 counters which only count the mandatory quantities.
diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index e6687ded821..08944e2ee33 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c
@@ -47,43 +47,6 @@ static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
47 return container_of(ibfmr, struct qib_fmr, ibfmr); 47 return container_of(ibfmr, struct qib_fmr, ibfmr);
48} 48}
49 49
50static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
51 int count)
52{
53 int m, i = 0;
54 int rval = 0;
55
56 m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
57 for (; i < m; i++) {
58 mr->map[i] = kzalloc(sizeof *mr->map[0], GFP_KERNEL);
59 if (!mr->map[i])
60 goto bail;
61 }
62 mr->mapsz = m;
63 init_completion(&mr->comp);
64 /* count returning the ptr to user */
65 atomic_set(&mr->refcount, 1);
66 mr->pd = pd;
67 mr->max_segs = count;
68out:
69 return rval;
70bail:
71 while (i)
72 kfree(mr->map[--i]);
73 rval = -ENOMEM;
74 goto out;
75}
76
77static void deinit_qib_mregion(struct qib_mregion *mr)
78{
79 int i = mr->mapsz;
80
81 mr->mapsz = 0;
82 while (i)
83 kfree(mr->map[--i]);
84}
85
86
87/** 50/**
88 * qib_get_dma_mr - get a DMA memory region 51 * qib_get_dma_mr - get a DMA memory region
89 * @pd: protection domain for this memory region 52 * @pd: protection domain for this memory region
@@ -95,9 +58,10 @@ static void deinit_qib_mregion(struct qib_mregion *mr)
95 */ 58 */
96struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc) 59struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
97{ 60{
98 struct qib_mr *mr = NULL; 61 struct qib_ibdev *dev = to_idev(pd->device);
62 struct qib_mr *mr;
99 struct ib_mr *ret; 63 struct ib_mr *ret;
100 int rval; 64 unsigned long flags;
101 65
102 if (to_ipd(pd)->user) { 66 if (to_ipd(pd)->user) {
103 ret = ERR_PTR(-EPERM); 67 ret = ERR_PTR(-EPERM);
@@ -110,64 +74,61 @@ struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
110 goto bail; 74 goto bail;
111 } 75 }
112 76
113 rval = init_qib_mregion(&mr->mr, pd, 0); 77 mr->mr.access_flags = acc;
114 if (rval) { 78 atomic_set(&mr->mr.refcount, 0);
115 ret = ERR_PTR(rval);
116 goto bail;
117 }
118
119 79
120 rval = qib_alloc_lkey(&mr->mr, 1); 80 spin_lock_irqsave(&dev->lk_table.lock, flags);
121 if (rval) { 81 if (!dev->dma_mr)
122 ret = ERR_PTR(rval); 82 dev->dma_mr = &mr->mr;
123 goto bail_mregion; 83 spin_unlock_irqrestore(&dev->lk_table.lock, flags);
124 }
125 84
126 mr->mr.access_flags = acc;
127 ret = &mr->ibmr; 85 ret = &mr->ibmr;
128done:
129 return ret;
130 86
131bail_mregion:
132 deinit_qib_mregion(&mr->mr);
133bail: 87bail:
134 kfree(mr); 88 return ret;
135 goto done;
136} 89}
137 90
138static struct qib_mr *alloc_mr(int count, struct ib_pd *pd) 91static struct qib_mr *alloc_mr(int count, struct qib_lkey_table *lk_table)
139{ 92{
140 struct qib_mr *mr; 93 struct qib_mr *mr;
141 int rval = -ENOMEM; 94 int m, i = 0;
142 int m;
143 95
144 /* Allocate struct plus pointers to first level page tables. */ 96 /* Allocate struct plus pointers to first level page tables. */
145 m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ; 97 m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
146 mr = kzalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL); 98 mr = kmalloc(sizeof *mr + m * sizeof mr->mr.map[0], GFP_KERNEL);
147 if (!mr) 99 if (!mr)
148 goto bail; 100 goto done;
101
102 /* Allocate first level page tables. */
103 for (; i < m; i++) {
104 mr->mr.map[i] = kmalloc(sizeof *mr->mr.map[0], GFP_KERNEL);
105 if (!mr->mr.map[i])
106 goto bail;
107 }
108 mr->mr.mapsz = m;
109 mr->mr.page_shift = 0;
110 mr->mr.max_segs = count;
149 111
150 rval = init_qib_mregion(&mr->mr, pd, count);
151 if (rval)
152 goto bail;
153 /* 112 /*
154 * ib_reg_phys_mr() will initialize mr->ibmr except for 113 * ib_reg_phys_mr() will initialize mr->ibmr except for
155 * lkey and rkey. 114 * lkey and rkey.
156 */ 115 */
157 rval = qib_alloc_lkey(&mr->mr, 0); 116 if (!qib_alloc_lkey(lk_table, &mr->mr))
158 if (rval) 117 goto bail;
159 goto bail_mregion;
160 mr->ibmr.lkey = mr->mr.lkey; 118 mr->ibmr.lkey = mr->mr.lkey;
161 mr->ibmr.rkey = mr->mr.lkey; 119 mr->ibmr.rkey = mr->mr.lkey;
162done:
163 return mr;
164 120
165bail_mregion: 121 atomic_set(&mr->mr.refcount, 0);
166 deinit_qib_mregion(&mr->mr); 122 goto done;
123
167bail: 124bail:
125 while (i)
126 kfree(mr->mr.map[--i]);
168 kfree(mr); 127 kfree(mr);
169 mr = ERR_PTR(rval); 128 mr = NULL;
170 goto done; 129
130done:
131 return mr;
171} 132}
172 133
173/** 134/**
@@ -187,15 +148,19 @@ struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
187 int n, m, i; 148 int n, m, i;
188 struct ib_mr *ret; 149 struct ib_mr *ret;
189 150
190 mr = alloc_mr(num_phys_buf, pd); 151 mr = alloc_mr(num_phys_buf, &to_idev(pd->device)->lk_table);
191 if (IS_ERR(mr)) { 152 if (mr == NULL) {
192 ret = (struct ib_mr *)mr; 153 ret = ERR_PTR(-ENOMEM);
193 goto bail; 154 goto bail;
194 } 155 }
195 156
157 mr->mr.pd = pd;
196 mr->mr.user_base = *iova_start; 158 mr->mr.user_base = *iova_start;
197 mr->mr.iova = *iova_start; 159 mr->mr.iova = *iova_start;
160 mr->mr.length = 0;
161 mr->mr.offset = 0;
198 mr->mr.access_flags = acc; 162 mr->mr.access_flags = acc;
163 mr->umem = NULL;
199 164
200 m = 0; 165 m = 0;
201 n = 0; 166 n = 0;
@@ -221,6 +186,7 @@ bail:
221 * @pd: protection domain for this memory region 186 * @pd: protection domain for this memory region
222 * @start: starting userspace address 187 * @start: starting userspace address
223 * @length: length of region to register 188 * @length: length of region to register
189 * @virt_addr: virtual address to use (from HCA's point of view)
224 * @mr_access_flags: access flags for this memory region 190 * @mr_access_flags: access flags for this memory region
225 * @udata: unused by the QLogic_IB driver 191 * @udata: unused by the QLogic_IB driver
226 * 192 *
@@ -250,13 +216,14 @@ struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
250 list_for_each_entry(chunk, &umem->chunk_list, list) 216 list_for_each_entry(chunk, &umem->chunk_list, list)
251 n += chunk->nents; 217 n += chunk->nents;
252 218
253 mr = alloc_mr(n, pd); 219 mr = alloc_mr(n, &to_idev(pd->device)->lk_table);
254 if (IS_ERR(mr)) { 220 if (!mr) {
255 ret = (struct ib_mr *)mr; 221 ret = ERR_PTR(-ENOMEM);
256 ib_umem_release(umem); 222 ib_umem_release(umem);
257 goto bail; 223 goto bail;
258 } 224 }
259 225
226 mr->mr.pd = pd;
260 mr->mr.user_base = start; 227 mr->mr.user_base = start;
261 mr->mr.iova = virt_addr; 228 mr->mr.iova = virt_addr;
262 mr->mr.length = length; 229 mr->mr.length = length;
@@ -304,25 +271,21 @@ bail:
304int qib_dereg_mr(struct ib_mr *ibmr) 271int qib_dereg_mr(struct ib_mr *ibmr)
305{ 272{
306 struct qib_mr *mr = to_imr(ibmr); 273 struct qib_mr *mr = to_imr(ibmr);
307 int ret = 0; 274 struct qib_ibdev *dev = to_idev(ibmr->device);
308 unsigned long timeout; 275 int ret;
309 276 int i;
310 qib_free_lkey(&mr->mr); 277
311 278 ret = qib_free_lkey(dev, &mr->mr);
312 qib_put_mr(&mr->mr); /* will set completion if last */ 279 if (ret)
313 timeout = wait_for_completion_timeout(&mr->mr.comp, 280 return ret;
314 5 * HZ); 281
315 if (!timeout) { 282 i = mr->mr.mapsz;
316 qib_get_mr(&mr->mr); 283 while (i)
317 ret = -EBUSY; 284 kfree(mr->mr.map[--i]);
318 goto out;
319 }
320 deinit_qib_mregion(&mr->mr);
321 if (mr->umem) 285 if (mr->umem)
322 ib_umem_release(mr->umem); 286 ib_umem_release(mr->umem);
323 kfree(mr); 287 kfree(mr);
324out: 288 return 0;
325 return ret;
326} 289}
327 290
328/* 291/*
@@ -335,9 +298,17 @@ struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len)
335{ 298{
336 struct qib_mr *mr; 299 struct qib_mr *mr;
337 300
338 mr = alloc_mr(max_page_list_len, pd); 301 mr = alloc_mr(max_page_list_len, &to_idev(pd->device)->lk_table);
339 if (IS_ERR(mr)) 302 if (mr == NULL)
340 return (struct ib_mr *)mr; 303 return ERR_PTR(-ENOMEM);
304
305 mr->mr.pd = pd;
306 mr->mr.user_base = 0;
307 mr->mr.iova = 0;
308 mr->mr.length = 0;
309 mr->mr.offset = 0;
310 mr->mr.access_flags = 0;
311 mr->umem = NULL;
341 312
342 return &mr->ibmr; 313 return &mr->ibmr;
343} 314}
@@ -351,11 +322,11 @@ qib_alloc_fast_reg_page_list(struct ib_device *ibdev, int page_list_len)
351 if (size > PAGE_SIZE) 322 if (size > PAGE_SIZE)
352 return ERR_PTR(-EINVAL); 323 return ERR_PTR(-EINVAL);
353 324
354 pl = kzalloc(sizeof *pl, GFP_KERNEL); 325 pl = kmalloc(sizeof *pl, GFP_KERNEL);
355 if (!pl) 326 if (!pl)
356 return ERR_PTR(-ENOMEM); 327 return ERR_PTR(-ENOMEM);
357 328
358 pl->page_list = kzalloc(size, GFP_KERNEL); 329 pl->page_list = kmalloc(size, GFP_KERNEL);
359 if (!pl->page_list) 330 if (!pl->page_list)
360 goto err_free; 331 goto err_free;
361 332
@@ -384,47 +355,57 @@ struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
384 struct ib_fmr_attr *fmr_attr) 355 struct ib_fmr_attr *fmr_attr)
385{ 356{
386 struct qib_fmr *fmr; 357 struct qib_fmr *fmr;
387 int m; 358 int m, i = 0;
388 struct ib_fmr *ret; 359 struct ib_fmr *ret;
389 int rval = -ENOMEM;
390 360
391 /* Allocate struct plus pointers to first level page tables. */ 361 /* Allocate struct plus pointers to first level page tables. */
392 m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ; 362 m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
393 fmr = kzalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL); 363 fmr = kmalloc(sizeof *fmr + m * sizeof fmr->mr.map[0], GFP_KERNEL);
394 if (!fmr) 364 if (!fmr)
395 goto bail; 365 goto bail;
396 366
397 rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages); 367 /* Allocate first level page tables. */
398 if (rval) 368 for (; i < m; i++) {
399 goto bail; 369 fmr->mr.map[i] = kmalloc(sizeof *fmr->mr.map[0],
370 GFP_KERNEL);
371 if (!fmr->mr.map[i])
372 goto bail;
373 }
374 fmr->mr.mapsz = m;
400 375
401 /* 376 /*
402 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & 377 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
403 * rkey. 378 * rkey.
404 */ 379 */
405 rval = qib_alloc_lkey(&fmr->mr, 0); 380 if (!qib_alloc_lkey(&to_idev(pd->device)->lk_table, &fmr->mr))
406 if (rval) 381 goto bail;
407 goto bail_mregion;
408 fmr->ibfmr.rkey = fmr->mr.lkey; 382 fmr->ibfmr.rkey = fmr->mr.lkey;
409 fmr->ibfmr.lkey = fmr->mr.lkey; 383 fmr->ibfmr.lkey = fmr->mr.lkey;
410 /* 384 /*
411 * Resources are allocated but no valid mapping (RKEY can't be 385 * Resources are allocated but no valid mapping (RKEY can't be
412 * used). 386 * used).
413 */ 387 */
388 fmr->mr.pd = pd;
389 fmr->mr.user_base = 0;
390 fmr->mr.iova = 0;
391 fmr->mr.length = 0;
392 fmr->mr.offset = 0;
414 fmr->mr.access_flags = mr_access_flags; 393 fmr->mr.access_flags = mr_access_flags;
415 fmr->mr.max_segs = fmr_attr->max_pages; 394 fmr->mr.max_segs = fmr_attr->max_pages;
416 fmr->mr.page_shift = fmr_attr->page_shift; 395 fmr->mr.page_shift = fmr_attr->page_shift;
417 396
397 atomic_set(&fmr->mr.refcount, 0);
418 ret = &fmr->ibfmr; 398 ret = &fmr->ibfmr;
419done: 399 goto done;
420 return ret;
421 400
422bail_mregion:
423 deinit_qib_mregion(&fmr->mr);
424bail: 401bail:
402 while (i)
403 kfree(fmr->mr.map[--i]);
425 kfree(fmr); 404 kfree(fmr);
426 ret = ERR_PTR(rval); 405 ret = ERR_PTR(-ENOMEM);
427 goto done; 406
407done:
408 return ret;
428} 409}
429 410
430/** 411/**
@@ -447,8 +428,7 @@ int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
447 u32 ps; 428 u32 ps;
448 int ret; 429 int ret;
449 430
450 i = atomic_read(&fmr->mr.refcount); 431 if (atomic_read(&fmr->mr.refcount))
451 if (i > 2)
452 return -EBUSY; 432 return -EBUSY;
453 433
454 if (list_len > fmr->mr.max_segs) { 434 if (list_len > fmr->mr.max_segs) {
@@ -510,27 +490,16 @@ int qib_unmap_fmr(struct list_head *fmr_list)
510int qib_dealloc_fmr(struct ib_fmr *ibfmr) 490int qib_dealloc_fmr(struct ib_fmr *ibfmr)
511{ 491{
512 struct qib_fmr *fmr = to_ifmr(ibfmr); 492 struct qib_fmr *fmr = to_ifmr(ibfmr);
513 int ret = 0; 493 int ret;
514 unsigned long timeout; 494 int i;
515
516 qib_free_lkey(&fmr->mr);
517 qib_put_mr(&fmr->mr); /* will set completion if last */
518 timeout = wait_for_completion_timeout(&fmr->mr.comp,
519 5 * HZ);
520 if (!timeout) {
521 qib_get_mr(&fmr->mr);
522 ret = -EBUSY;
523 goto out;
524 }
525 deinit_qib_mregion(&fmr->mr);
526 kfree(fmr);
527out:
528 return ret;
529}
530 495
531void mr_rcu_callback(struct rcu_head *list) 496 ret = qib_free_lkey(to_idev(ibfmr->device), &fmr->mr);
532{ 497 if (ret)
533 struct qib_mregion *mr = container_of(list, struct qib_mregion, list); 498 return ret;
534 499
535 complete(&mr->comp); 500 i = fmr->mr.mapsz;
501 while (i)
502 kfree(fmr->mr.map[--i]);
503 kfree(fmr);
504 return 0;
536} 505}
diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c
index c574ec7c85e..4426782ad28 100644
--- a/drivers/infiniband/hw/qib/qib_pcie.c
+++ b/drivers/infiniband/hw/qib/qib_pcie.c
@@ -35,7 +35,6 @@
35#include <linux/delay.h> 35#include <linux/delay.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/aer.h> 37#include <linux/aer.h>
38#include <linux/module.h>
39 38
40#include "qib.h" 39#include "qib.h"
41 40
@@ -194,24 +193,11 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd)
194} 193}
195 194
196static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, 195static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
197 struct qib_msix_entry *qib_msix_entry) 196 struct msix_entry *msix_entry)
198{ 197{
199 int ret; 198 int ret;
200 u32 tabsize = 0; 199 u32 tabsize = 0;
201 u16 msix_flags; 200 u16 msix_flags;
202 struct msix_entry *msix_entry;
203 int i;
204
205 /* We can't pass qib_msix_entry array to qib_msix_setup
206 * so use a dummy msix_entry array and copy the allocated
207 * irq back to the qib_msix_entry array. */
208 msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL);
209 if (!msix_entry) {
210 ret = -ENOMEM;
211 goto do_intx;
212 }
213 for (i = 0; i < *msixcnt; i++)
214 msix_entry[i] = qib_msix_entry[i].msix;
215 201
216 pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); 202 pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags);
217 tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); 203 tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE);
@@ -222,16 +208,11 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt,
222 tabsize = ret; 208 tabsize = ret;
223 ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); 209 ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize);
224 } 210 }
225do_intx:
226 if (ret) { 211 if (ret) {
227 qib_dev_err(dd, 212 qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, "
228 "pci_enable_msix %d vectors failed: %d, falling back to INTx\n", 213 "falling back to INTx\n", tabsize, ret);
229 tabsize, ret);
230 tabsize = 0; 214 tabsize = 0;
231 } 215 }
232 for (i = 0; i < tabsize; i++)
233 qib_msix_entry[i].msix = msix_entry[i];
234 kfree(msix_entry);
235 *msixcnt = tabsize; 216 *msixcnt = tabsize;
236 217
237 if (ret) 218 if (ret)
@@ -252,9 +233,8 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
252 233
253 ret = pci_enable_msi(pdev); 234 ret = pci_enable_msi(pdev);
254 if (ret) 235 if (ret)
255 qib_dev_err(dd, 236 qib_dev_err(dd, "pci_enable_msi failed: %d, "
256 "pci_enable_msi failed: %d, interrupts may not work\n", 237 "interrupts may not work\n", ret);
257 ret);
258 /* continue even if it fails, we may still be OK... */ 238 /* continue even if it fails, we may still be OK... */
259 239
260 pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO, 240 pci_read_config_dword(pdev, pos + PCI_MSI_ADDRESS_LO,
@@ -270,12 +250,13 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos)
270} 250}
271 251
272int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, 252int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
273 struct qib_msix_entry *entry) 253 struct msix_entry *entry)
274{ 254{
275 u16 linkstat, speed; 255 u16 linkstat, speed;
276 int pos = 0, ret = 1; 256 int pos = 0, pose, ret = 1;
277 257
278 if (!pci_is_pcie(dd->pcidev)) { 258 pose = pci_pcie_cap(dd->pcidev);
259 if (!pose) {
279 qib_dev_err(dd, "Can't find PCI Express capability!\n"); 260 qib_dev_err(dd, "Can't find PCI Express capability!\n");
280 /* set up something... */ 261 /* set up something... */
281 dd->lbus_width = 1; 262 dd->lbus_width = 1;
@@ -297,7 +278,7 @@ int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent,
297 if (!pos) 278 if (!pos)
298 qib_enable_intx(dd->pcidev); 279 qib_enable_intx(dd->pcidev);
299 280
300 pcie_capability_read_word(dd->pcidev, PCI_EXP_LNKSTA, &linkstat); 281 pci_read_config_word(dd->pcidev, pose + PCI_EXP_LNKSTA, &linkstat);
301 /* 282 /*
302 * speed is bits 0-3, linkwidth is bits 4-8 283 * speed is bits 0-3, linkwidth is bits 4-8
303 * no defines for them in headers 284 * no defines for them in headers
@@ -359,8 +340,8 @@ int qib_reinit_intr(struct qib_devdata *dd)
359 340
360 pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI); 341 pos = pci_find_capability(dd->pcidev, PCI_CAP_ID_MSI);
361 if (!pos) { 342 if (!pos) {
362 qib_dev_err(dd, 343 qib_dev_err(dd, "Can't find MSI capability, "
363 "Can't find MSI capability, can't restore MSI settings\n"); 344 "can't restore MSI settings\n");
364 ret = 0; 345 ret = 0;
365 /* nothing special for MSIx, just MSI */ 346 /* nothing special for MSIx, just MSI */
366 goto bail; 347 goto bail;
@@ -472,8 +453,8 @@ void qib_pcie_reenable(struct qib_devdata *dd, u16 cmd, u8 iline, u8 cline)
472 pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline); 453 pci_write_config_byte(dd->pcidev, PCI_CACHE_LINE_SIZE, cline);
473 r = pci_enable_device(dd->pcidev); 454 r = pci_enable_device(dd->pcidev);
474 if (r) 455 if (r)
475 qib_dev_err(dd, 456 qib_dev_err(dd, "pci_enable_device failed after "
476 "pci_enable_device failed after reset: %d\n", r); 457 "reset: %d\n", r);
477} 458}
478 459
479/* code to adjust PCIe capabilities. */ 460/* code to adjust PCIe capabilities. */
@@ -515,6 +496,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
515{ 496{
516 int r; 497 int r;
517 struct pci_dev *parent; 498 struct pci_dev *parent;
499 int ppos;
518 u16 devid; 500 u16 devid;
519 u32 mask, bits, val; 501 u32 mask, bits, val;
520 502
@@ -527,7 +509,8 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
527 qib_devinfo(dd->pcidev, "Parent not root\n"); 509 qib_devinfo(dd->pcidev, "Parent not root\n");
528 return 1; 510 return 1;
529 } 511 }
530 if (!pci_is_pcie(parent)) 512 ppos = pci_pcie_cap(parent);
513 if (!ppos)
531 return 1; 514 return 1;
532 if (parent->vendor != 0x8086) 515 if (parent->vendor != 0x8086)
533 return 1; 516 return 1;
@@ -578,12 +561,13 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd)
578 */ 561 */
579static int qib_pcie_caps; 562static int qib_pcie_caps;
580module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); 563module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO);
581MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); 564MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)");
582 565
583static int qib_tune_pcie_caps(struct qib_devdata *dd) 566static int qib_tune_pcie_caps(struct qib_devdata *dd)
584{ 567{
585 int ret = 1; /* Assume the worst */ 568 int ret = 1; /* Assume the worst */
586 struct pci_dev *parent; 569 struct pci_dev *parent;
570 int ppos, epos;
587 u16 pcaps, pctl, ecaps, ectl; 571 u16 pcaps, pctl, ecaps, ectl;
588 int rc_sup, ep_sup; 572 int rc_sup, ep_sup;
589 int rc_cur, ep_cur; 573 int rc_cur, ep_cur;
@@ -594,15 +578,19 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
594 qib_devinfo(dd->pcidev, "Parent not root\n"); 578 qib_devinfo(dd->pcidev, "Parent not root\n");
595 goto bail; 579 goto bail;
596 } 580 }
597 581 ppos = pci_pcie_cap(parent);
598 if (!pci_is_pcie(parent) || !pci_is_pcie(dd->pcidev)) 582 if (ppos) {
583 pci_read_config_word(parent, ppos + PCI_EXP_DEVCAP, &pcaps);
584 pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl);
585 } else
599 goto bail; 586 goto bail;
600 pcie_capability_read_word(parent, PCI_EXP_DEVCAP, &pcaps);
601 pcie_capability_read_word(parent, PCI_EXP_DEVCTL, &pctl);
602 /* Find out supported and configured values for endpoint (us) */ 587 /* Find out supported and configured values for endpoint (us) */
603 pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCAP, &ecaps); 588 epos = pci_pcie_cap(dd->pcidev);
604 pcie_capability_read_word(dd->pcidev, PCI_EXP_DEVCTL, &ectl); 589 if (epos) {
605 590 pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCAP, &ecaps);
591 pci_read_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, &ectl);
592 } else
593 goto bail;
606 ret = 0; 594 ret = 0;
607 /* Find max payload supported by root, endpoint */ 595 /* Find max payload supported by root, endpoint */
608 rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD); 596 rc_sup = fld2val(pcaps, PCI_EXP_DEVCAP_PAYLOAD);
@@ -621,14 +609,14 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
621 rc_cur = rc_sup; 609 rc_cur = rc_sup;
622 pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) | 610 pctl = (pctl & ~PCI_EXP_DEVCTL_PAYLOAD) |
623 val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD); 611 val2fld(rc_cur, PCI_EXP_DEVCTL_PAYLOAD);
624 pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); 612 pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl);
625 } 613 }
626 /* If less than (allowed, supported), bump endpoint payload */ 614 /* If less than (allowed, supported), bump endpoint payload */
627 if (rc_sup > ep_cur) { 615 if (rc_sup > ep_cur) {
628 ep_cur = rc_sup; 616 ep_cur = rc_sup;
629 ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) | 617 ectl = (ectl & ~PCI_EXP_DEVCTL_PAYLOAD) |
630 val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD); 618 val2fld(ep_cur, PCI_EXP_DEVCTL_PAYLOAD);
631 pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); 619 pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl);
632 } 620 }
633 621
634 /* 622 /*
@@ -646,13 +634,13 @@ static int qib_tune_pcie_caps(struct qib_devdata *dd)
646 rc_cur = rc_sup; 634 rc_cur = rc_sup;
647 pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) | 635 pctl = (pctl & ~PCI_EXP_DEVCTL_READRQ) |
648 val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ); 636 val2fld(rc_cur, PCI_EXP_DEVCTL_READRQ);
649 pcie_capability_write_word(parent, PCI_EXP_DEVCTL, pctl); 637 pci_write_config_word(parent, ppos + PCI_EXP_DEVCTL, pctl);
650 } 638 }
651 if (rc_sup > ep_cur) { 639 if (rc_sup > ep_cur) {
652 ep_cur = rc_sup; 640 ep_cur = rc_sup;
653 ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) | 641 ectl = (ectl & ~PCI_EXP_DEVCTL_READRQ) |
654 val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ); 642 val2fld(ep_cur, PCI_EXP_DEVCTL_READRQ);
655 pcie_capability_write_word(dd->pcidev, PCI_EXP_DEVCTL, ectl); 643 pci_write_config_word(dd->pcidev, epos + PCI_EXP_DEVCTL, ectl);
656 } 644 }
657bail: 645bail:
658 return ret; 646 return ret;
@@ -711,16 +699,15 @@ qib_pci_mmio_enabled(struct pci_dev *pdev)
711 if (words == ~0ULL) 699 if (words == ~0ULL)
712 ret = PCI_ERS_RESULT_NEED_RESET; 700 ret = PCI_ERS_RESULT_NEED_RESET;
713 } 701 }
714 qib_devinfo(pdev, 702 qib_devinfo(pdev, "QIB mmio_enabled function called, "
715 "QIB mmio_enabled function called, read wordscntr %Lx, returning %d\n", 703 "read wordscntr %Lx, returning %d\n", words, ret);
716 words, ret);
717 return ret; 704 return ret;
718} 705}
719 706
720static pci_ers_result_t 707static pci_ers_result_t
721qib_pci_slot_reset(struct pci_dev *pdev) 708qib_pci_slot_reset(struct pci_dev *pdev)
722{ 709{
723 qib_devinfo(pdev, "QIB slot_reset function called, ignored\n"); 710 qib_devinfo(pdev, "QIB link_reset function called, ignored\n");
724 return PCI_ERS_RESULT_CAN_RECOVER; 711 return PCI_ERS_RESULT_CAN_RECOVER;
725} 712}
726 713
@@ -745,7 +732,7 @@ qib_pci_resume(struct pci_dev *pdev)
745 qib_init(dd, 1); /* same as re-init after reset */ 732 qib_init(dd, 1); /* same as re-init after reset */
746} 733}
747 734
748const struct pci_error_handlers qib_pci_err_handler = { 735struct pci_error_handlers qib_pci_err_handler = {
749 .error_detected = qib_pci_error_detected, 736 .error_detected = qib_pci_error_detected,
750 .mmio_enabled = qib_pci_mmio_enabled, 737 .mmio_enabled = qib_pci_mmio_enabled,
751 .link_reset = qib_pci_link_reset, 738 .link_reset = qib_pci_link_reset,
diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c
index 4850d03870c..e16751f8639 100644
--- a/drivers/infiniband/hw/qib/qib_qp.c
+++ b/drivers/infiniband/hw/qib/qib_qp.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. * All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
34 34
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/vmalloc.h> 36#include <linux/vmalloc.h>
37#include <linux/jhash.h>
38 37
39#include "qib.h" 38#include "qib.h"
40 39
@@ -205,13 +204,6 @@ static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
205 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page); 204 clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
206} 205}
207 206
208static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
209{
210 return jhash_1word(qpn, dev->qp_rnd) &
211 (dev->qp_table_size - 1);
212}
213
214
215/* 207/*
216 * Put the QP into the hash table. 208 * Put the QP into the hash table.
217 * The hash table holds a reference to the QP. 209 * The hash table holds a reference to the QP.
@@ -219,23 +211,22 @@ static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
219static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp) 211static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
220{ 212{
221 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 213 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
214 unsigned n = qp->ibqp.qp_num % dev->qp_table_size;
222 unsigned long flags; 215 unsigned long flags;
223 unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
224 216
225 spin_lock_irqsave(&dev->qpt_lock, flags); 217 spin_lock_irqsave(&dev->qpt_lock, flags);
226 atomic_inc(&qp->refcount);
227 218
228 if (qp->ibqp.qp_num == 0) 219 if (qp->ibqp.qp_num == 0)
229 rcu_assign_pointer(ibp->qp0, qp); 220 ibp->qp0 = qp;
230 else if (qp->ibqp.qp_num == 1) 221 else if (qp->ibqp.qp_num == 1)
231 rcu_assign_pointer(ibp->qp1, qp); 222 ibp->qp1 = qp;
232 else { 223 else {
233 qp->next = dev->qp_table[n]; 224 qp->next = dev->qp_table[n];
234 rcu_assign_pointer(dev->qp_table[n], qp); 225 dev->qp_table[n] = qp;
235 } 226 }
227 atomic_inc(&qp->refcount);
236 228
237 spin_unlock_irqrestore(&dev->qpt_lock, flags); 229 spin_unlock_irqrestore(&dev->qpt_lock, flags);
238 synchronize_rcu();
239} 230}
240 231
241/* 232/*
@@ -245,42 +236,29 @@ static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
245static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp) 236static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
246{ 237{
247 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); 238 struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
248 unsigned n = qpn_hash(dev, qp->ibqp.qp_num); 239 struct qib_qp *q, **qpp;
249 unsigned long flags; 240 unsigned long flags;
250 241
242 qpp = &dev->qp_table[qp->ibqp.qp_num % dev->qp_table_size];
243
251 spin_lock_irqsave(&dev->qpt_lock, flags); 244 spin_lock_irqsave(&dev->qpt_lock, flags);
252 245
253 if (rcu_dereference_protected(ibp->qp0, 246 if (ibp->qp0 == qp) {
254 lockdep_is_held(&dev->qpt_lock)) == qp) { 247 ibp->qp0 = NULL;
255 atomic_dec(&qp->refcount); 248 atomic_dec(&qp->refcount);
256 rcu_assign_pointer(ibp->qp0, NULL); 249 } else if (ibp->qp1 == qp) {
257 } else if (rcu_dereference_protected(ibp->qp1, 250 ibp->qp1 = NULL;
258 lockdep_is_held(&dev->qpt_lock)) == qp) {
259 atomic_dec(&qp->refcount); 251 atomic_dec(&qp->refcount);
260 rcu_assign_pointer(ibp->qp1, NULL); 252 } else
261 } else { 253 for (; (q = *qpp) != NULL; qpp = &q->next)
262 struct qib_qp *q;
263 struct qib_qp __rcu **qpp;
264
265 qpp = &dev->qp_table[n];
266 q = rcu_dereference_protected(*qpp,
267 lockdep_is_held(&dev->qpt_lock));
268 for (; q; qpp = &q->next) {
269 if (q == qp) { 254 if (q == qp) {
270 atomic_dec(&qp->refcount);
271 *qpp = qp->next; 255 *qpp = qp->next;
272 rcu_assign_pointer(qp->next, NULL); 256 qp->next = NULL;
273 q = rcu_dereference_protected(*qpp, 257 atomic_dec(&qp->refcount);
274 lockdep_is_held(&dev->qpt_lock));
275 break; 258 break;
276 } 259 }
277 q = rcu_dereference_protected(*qpp,
278 lockdep_is_held(&dev->qpt_lock));
279 }
280 }
281 260
282 spin_unlock_irqrestore(&dev->qpt_lock, flags); 261 spin_unlock_irqrestore(&dev->qpt_lock, flags);
283 synchronize_rcu();
284} 262}
285 263
286/** 264/**
@@ -302,26 +280,21 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
302 280
303 if (!qib_mcast_tree_empty(ibp)) 281 if (!qib_mcast_tree_empty(ibp))
304 qp_inuse++; 282 qp_inuse++;
305 rcu_read_lock(); 283 if (ibp->qp0)
306 if (rcu_dereference(ibp->qp0))
307 qp_inuse++; 284 qp_inuse++;
308 if (rcu_dereference(ibp->qp1)) 285 if (ibp->qp1)
309 qp_inuse++; 286 qp_inuse++;
310 rcu_read_unlock();
311 } 287 }
312 288
313 spin_lock_irqsave(&dev->qpt_lock, flags); 289 spin_lock_irqsave(&dev->qpt_lock, flags);
314 for (n = 0; n < dev->qp_table_size; n++) { 290 for (n = 0; n < dev->qp_table_size; n++) {
315 qp = rcu_dereference_protected(dev->qp_table[n], 291 qp = dev->qp_table[n];
316 lockdep_is_held(&dev->qpt_lock)); 292 dev->qp_table[n] = NULL;
317 rcu_assign_pointer(dev->qp_table[n], NULL);
318 293
319 for (; qp; qp = rcu_dereference_protected(qp->next, 294 for (; qp; qp = qp->next)
320 lockdep_is_held(&dev->qpt_lock)))
321 qp_inuse++; 295 qp_inuse++;
322 } 296 }
323 spin_unlock_irqrestore(&dev->qpt_lock, flags); 297 spin_unlock_irqrestore(&dev->qpt_lock, flags);
324 synchronize_rcu();
325 298
326 return qp_inuse; 299 return qp_inuse;
327} 300}
@@ -336,29 +309,25 @@ unsigned qib_free_all_qps(struct qib_devdata *dd)
336 */ 309 */
337struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn) 310struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
338{ 311{
339 struct qib_qp *qp = NULL; 312 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
313 unsigned long flags;
314 struct qib_qp *qp;
340 315
341 if (unlikely(qpn <= 1)) { 316 spin_lock_irqsave(&dev->qpt_lock, flags);
342 rcu_read_lock();
343 if (qpn == 0)
344 qp = rcu_dereference(ibp->qp0);
345 else
346 qp = rcu_dereference(ibp->qp1);
347 } else {
348 struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
349 unsigned n = qpn_hash(dev, qpn);
350 317
351 rcu_read_lock(); 318 if (qpn == 0)
352 for (qp = rcu_dereference(dev->qp_table[n]); qp; 319 qp = ibp->qp0;
353 qp = rcu_dereference(qp->next)) 320 else if (qpn == 1)
321 qp = ibp->qp1;
322 else
323 for (qp = dev->qp_table[qpn % dev->qp_table_size]; qp;
324 qp = qp->next)
354 if (qp->ibqp.qp_num == qpn) 325 if (qp->ibqp.qp_num == qpn)
355 break; 326 break;
356 }
357 if (qp) 327 if (qp)
358 if (unlikely(!atomic_inc_not_zero(&qp->refcount))) 328 atomic_inc(&qp->refcount);
359 qp = NULL;
360 329
361 rcu_read_unlock(); 330 spin_unlock_irqrestore(&dev->qpt_lock, flags);
362 return qp; 331 return qp;
363} 332}
364 333
@@ -419,9 +388,18 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
419 unsigned n; 388 unsigned n;
420 389
421 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) 390 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
422 qib_put_ss(&qp->s_rdma_read_sge); 391 while (qp->s_rdma_read_sge.num_sge) {
392 atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
393 if (--qp->s_rdma_read_sge.num_sge)
394 qp->s_rdma_read_sge.sge =
395 *qp->s_rdma_read_sge.sg_list++;
396 }
423 397
424 qib_put_ss(&qp->r_sge); 398 while (qp->r_sge.num_sge) {
399 atomic_dec(&qp->r_sge.sge.mr->refcount);
400 if (--qp->r_sge.num_sge)
401 qp->r_sge.sge = *qp->r_sge.sg_list++;
402 }
425 403
426 if (clr_sends) { 404 if (clr_sends) {
427 while (qp->s_last != qp->s_head) { 405 while (qp->s_last != qp->s_head) {
@@ -431,7 +409,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
431 for (i = 0; i < wqe->wr.num_sge; i++) { 409 for (i = 0; i < wqe->wr.num_sge; i++) {
432 struct qib_sge *sge = &wqe->sg_list[i]; 410 struct qib_sge *sge = &wqe->sg_list[i];
433 411
434 qib_put_mr(sge->mr); 412 atomic_dec(&sge->mr->refcount);
435 } 413 }
436 if (qp->ibqp.qp_type == IB_QPT_UD || 414 if (qp->ibqp.qp_type == IB_QPT_UD ||
437 qp->ibqp.qp_type == IB_QPT_SMI || 415 qp->ibqp.qp_type == IB_QPT_SMI ||
@@ -441,7 +419,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
441 qp->s_last = 0; 419 qp->s_last = 0;
442 } 420 }
443 if (qp->s_rdma_mr) { 421 if (qp->s_rdma_mr) {
444 qib_put_mr(qp->s_rdma_mr); 422 atomic_dec(&qp->s_rdma_mr->refcount);
445 qp->s_rdma_mr = NULL; 423 qp->s_rdma_mr = NULL;
446 } 424 }
447 } 425 }
@@ -454,7 +432,7 @@ static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
454 432
455 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST && 433 if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
456 e->rdma_sge.mr) { 434 e->rdma_sge.mr) {
457 qib_put_mr(e->rdma_sge.mr); 435 atomic_dec(&e->rdma_sge.mr->refcount);
458 e->rdma_sge.mr = NULL; 436 e->rdma_sge.mr = NULL;
459 } 437 }
460 } 438 }
@@ -499,7 +477,7 @@ int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
499 if (!(qp->s_flags & QIB_S_BUSY)) { 477 if (!(qp->s_flags & QIB_S_BUSY)) {
500 qp->s_hdrwords = 0; 478 qp->s_hdrwords = 0;
501 if (qp->s_rdma_mr) { 479 if (qp->s_rdma_mr) {
502 qib_put_mr(qp->s_rdma_mr); 480 atomic_dec(&qp->s_rdma_mr->refcount);
503 qp->s_rdma_mr = NULL; 481 qp->s_rdma_mr = NULL;
504 } 482 }
505 if (qp->s_tx) { 483 if (qp->s_tx) {
@@ -787,10 +765,8 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
787 } 765 }
788 } 766 }
789 767
790 if (attr_mask & IB_QP_PATH_MTU) { 768 if (attr_mask & IB_QP_PATH_MTU)
791 qp->path_mtu = pmtu; 769 qp->path_mtu = pmtu;
792 qp->pmtu = ib_mtu_enum_to_int(pmtu);
793 }
794 770
795 if (attr_mask & IB_QP_RETRY_CNT) { 771 if (attr_mask & IB_QP_RETRY_CNT) {
796 qp->s_retry_cnt = attr->retry_cnt; 772 qp->s_retry_cnt = attr->retry_cnt;
@@ -805,12 +781,8 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
805 if (attr_mask & IB_QP_MIN_RNR_TIMER) 781 if (attr_mask & IB_QP_MIN_RNR_TIMER)
806 qp->r_min_rnr_timer = attr->min_rnr_timer; 782 qp->r_min_rnr_timer = attr->min_rnr_timer;
807 783
808 if (attr_mask & IB_QP_TIMEOUT) { 784 if (attr_mask & IB_QP_TIMEOUT)
809 qp->timeout = attr->timeout; 785 qp->timeout = attr->timeout;
810 qp->timeout_jiffies =
811 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
812 1000UL);
813 }
814 786
815 if (attr_mask & IB_QP_QKEY) 787 if (attr_mask & IB_QP_QKEY)
816 qp->qkey = attr->qkey; 788 qp->qkey = attr->qkey;
@@ -1041,15 +1013,6 @@ struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
1041 ret = ERR_PTR(-ENOMEM); 1013 ret = ERR_PTR(-ENOMEM);
1042 goto bail_swq; 1014 goto bail_swq;
1043 } 1015 }
1044 RCU_INIT_POINTER(qp->next, NULL);
1045 qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), GFP_KERNEL);
1046 if (!qp->s_hdr) {
1047 ret = ERR_PTR(-ENOMEM);
1048 goto bail_qp;
1049 }
1050 qp->timeout_jiffies =
1051 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1052 1000UL);
1053 if (init_attr->srq) 1016 if (init_attr->srq)
1054 sz = 0; 1017 sz = 0;
1055 else { 1018 else {
@@ -1168,7 +1131,6 @@ bail_ip:
1168 vfree(qp->r_rq.wq); 1131 vfree(qp->r_rq.wq);
1169 free_qpn(&dev->qpn_table, qp->ibqp.qp_num); 1132 free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
1170bail_qp: 1133bail_qp:
1171 kfree(qp->s_hdr);
1172 kfree(qp); 1134 kfree(qp);
1173bail_swq: 1135bail_swq:
1174 vfree(swq); 1136 vfree(swq);
@@ -1224,7 +1186,6 @@ int qib_destroy_qp(struct ib_qp *ibqp)
1224 else 1186 else
1225 vfree(qp->r_rq.wq); 1187 vfree(qp->r_rq.wq);
1226 vfree(qp->s_wq); 1188 vfree(qp->s_wq);
1227 kfree(qp->s_hdr);
1228 kfree(qp); 1189 kfree(qp);
1229 return 0; 1190 return 0;
1230} 1191}
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
index fa71b1e666c..3374a52232c 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.c
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -273,12 +273,18 @@ int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
273 int ret; 273 int ret;
274 int idx; 274 int idx;
275 u16 cks; 275 u16 cks;
276 u32 mask;
276 u8 peek[4]; 277 u8 peek[4];
277 278
278 /* ensure sane contents on invalid reads, for cable swaps */ 279 /* ensure sane contents on invalid reads, for cable swaps */
279 memset(cp, 0, sizeof(*cp)); 280 memset(cp, 0, sizeof(*cp));
280 281
281 if (!qib_qsfp_mod_present(ppd)) { 282 mask = QSFP_GPIO_MOD_PRS_N;
283 if (ppd->hw_pidx)
284 mask <<= QSFP_GPIO_PORT2_SHIFT;
285
286 ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
287 if (ret & mask) {
282 ret = -ENODEV; 288 ret = -ENODEV;
283 goto bail; 289 goto bail;
284 } 290 }
@@ -438,19 +444,6 @@ const char * const qib_qsfp_devtech[16] = {
438 444
439static const char *pwr_codes = "1.5W2.0W2.5W3.5W"; 445static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
440 446
441int qib_qsfp_mod_present(struct qib_pportdata *ppd)
442{
443 u32 mask;
444 int ret;
445
446 mask = QSFP_GPIO_MOD_PRS_N <<
447 (ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT);
448 ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
449
450 return !((ret & mask) >>
451 ((ppd->hw_pidx * QSFP_GPIO_PORT2_SHIFT) + 3));
452}
453
454/* 447/*
455 * Initialize structures that control access to QSFP. Called once per port 448 * Initialize structures that control access to QSFP. Called once per port
456 * on cards that support QSFP. 449 * on cards that support QSFP.
@@ -459,6 +452,7 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
459 void (*fevent)(struct work_struct *)) 452 void (*fevent)(struct work_struct *))
460{ 453{
461 u32 mask, highs; 454 u32 mask, highs;
455 int pins;
462 456
463 struct qib_devdata *dd = qd->ppd->dd; 457 struct qib_devdata *dd = qd->ppd->dd;
464 458
@@ -480,6 +474,19 @@ void qib_qsfp_init(struct qib_qsfp_data *qd,
480 udelay(20); /* Generous RST dwell */ 474 udelay(20); /* Generous RST dwell */
481 475
482 dd->f_gpio_mod(dd, mask, mask, mask); 476 dd->f_gpio_mod(dd, mask, mask, mask);
477 /* Spec says module can take up to two seconds! */
478 mask = QSFP_GPIO_MOD_PRS_N;
479 if (qd->ppd->hw_pidx)
480 mask <<= QSFP_GPIO_PORT2_SHIFT;
481
482 /* Do not try to wait here. Better to let event handle it */
483 pins = dd->f_gpio_mod(dd, 0, 0, 0);
484 if (pins & mask)
485 goto bail;
486 /* We see a module, but it may be unwise to look yet. Just schedule */
487 qd->t_insert = get_jiffies_64();
488 queue_work(ib_wq, &qd->work);
489bail:
483 return; 490 return;
484} 491}
485 492
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
index 91908f533a2..c109bbdc90a 100644
--- a/drivers/infiniband/hw/qib/qib_qsfp.h
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -34,7 +34,6 @@
34 34
35#define QSFP_DEV 0xA0 35#define QSFP_DEV 0xA0
36#define QSFP_PWR_LAG_MSEC 2000 36#define QSFP_PWR_LAG_MSEC 2000
37#define QSFP_MODPRS_LAG_MSEC 20
38 37
39/* 38/*
40 * Below are masks for various QSFP signals, for Port 1. 39 * Below are masks for various QSFP signals, for Port 1.
@@ -177,13 +176,11 @@ struct qib_qsfp_data {
177 struct qib_pportdata *ppd; 176 struct qib_pportdata *ppd;
178 struct work_struct work; 177 struct work_struct work;
179 struct qib_qsfp_cache cache; 178 struct qib_qsfp_cache cache;
180 unsigned long t_insert; 179 u64 t_insert;
181 u8 modpresent;
182}; 180};
183 181
184extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, 182extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
185 struct qib_qsfp_cache *cp); 183 struct qib_qsfp_cache *cp);
186extern int qib_qsfp_mod_present(struct qib_pportdata *ppd);
187extern void qib_qsfp_init(struct qib_qsfp_data *qd, 184extern void qib_qsfp_init(struct qib_qsfp_data *qd,
188 void (*fevent)(struct work_struct *)); 185 void (*fevent)(struct work_struct *));
189extern void qib_qsfp_deinit(struct qib_qsfp_data *qd); 186extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
index 3ab341320ea..eca0c41f122 100644
--- a/drivers/infiniband/hw/qib/qib_rc.c
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -59,7 +59,8 @@ static void start_timer(struct qib_qp *qp)
59 qp->s_flags |= QIB_S_TIMER; 59 qp->s_flags |= QIB_S_TIMER;
60 qp->s_timer.function = rc_timeout; 60 qp->s_timer.function = rc_timeout;
61 /* 4.096 usec. * (1 << qp->timeout) */ 61 /* 4.096 usec. * (1 << qp->timeout) */
62 qp->s_timer.expires = jiffies + qp->timeout_jiffies; 62 qp->s_timer.expires = jiffies +
63 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL);
63 add_timer(&qp->s_timer); 64 add_timer(&qp->s_timer);
64} 65}
65 66
@@ -95,7 +96,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
95 case OP(RDMA_READ_RESPONSE_ONLY): 96 case OP(RDMA_READ_RESPONSE_ONLY):
96 e = &qp->s_ack_queue[qp->s_tail_ack_queue]; 97 e = &qp->s_ack_queue[qp->s_tail_ack_queue];
97 if (e->rdma_sge.mr) { 98 if (e->rdma_sge.mr) {
98 qib_put_mr(e->rdma_sge.mr); 99 atomic_dec(&e->rdma_sge.mr->refcount);
99 e->rdma_sge.mr = NULL; 100 e->rdma_sge.mr = NULL;
100 } 101 }
101 /* FALLTHROUGH */ 102 /* FALLTHROUGH */
@@ -133,7 +134,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
133 /* Copy SGE state in case we need to resend */ 134 /* Copy SGE state in case we need to resend */
134 qp->s_rdma_mr = e->rdma_sge.mr; 135 qp->s_rdma_mr = e->rdma_sge.mr;
135 if (qp->s_rdma_mr) 136 if (qp->s_rdma_mr)
136 qib_get_mr(qp->s_rdma_mr); 137 atomic_inc(&qp->s_rdma_mr->refcount);
137 qp->s_ack_rdma_sge.sge = e->rdma_sge; 138 qp->s_ack_rdma_sge.sge = e->rdma_sge;
138 qp->s_ack_rdma_sge.num_sge = 1; 139 qp->s_ack_rdma_sge.num_sge = 1;
139 qp->s_cur_sge = &qp->s_ack_rdma_sge; 140 qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +173,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
172 qp->s_cur_sge = &qp->s_ack_rdma_sge; 173 qp->s_cur_sge = &qp->s_ack_rdma_sge;
173 qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr; 174 qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
174 if (qp->s_rdma_mr) 175 if (qp->s_rdma_mr)
175 qib_get_mr(qp->s_rdma_mr); 176 atomic_inc(&qp->s_rdma_mr->refcount);
176 len = qp->s_ack_rdma_sge.sge.sge_length; 177 len = qp->s_ack_rdma_sge.sge.sge_length;
177 if (len > pmtu) 178 if (len > pmtu)
178 len = pmtu; 179 len = pmtu;
@@ -238,15 +239,15 @@ int qib_make_rc_req(struct qib_qp *qp)
238 u32 len; 239 u32 len;
239 u32 bth0; 240 u32 bth0;
240 u32 bth2; 241 u32 bth2;
241 u32 pmtu = qp->pmtu; 242 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
242 char newreq; 243 char newreq;
243 unsigned long flags; 244 unsigned long flags;
244 int ret = 0; 245 int ret = 0;
245 int delta; 246 int delta;
246 247
247 ohdr = &qp->s_hdr->u.oth; 248 ohdr = &qp->s_hdr.u.oth;
248 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 249 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
249 ohdr = &qp->s_hdr->u.l.oth; 250 ohdr = &qp->s_hdr.u.l.oth;
250 251
251 /* 252 /*
252 * The lock is needed to synchronize between the sending tasklet, 253 * The lock is needed to synchronize between the sending tasklet,
@@ -271,9 +272,13 @@ int qib_make_rc_req(struct qib_qp *qp)
271 goto bail; 272 goto bail;
272 } 273 }
273 wqe = get_swqe_ptr(qp, qp->s_last); 274 wqe = get_swqe_ptr(qp, qp->s_last);
274 qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? 275 while (qp->s_last != qp->s_acked) {
275 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); 276 qib_send_complete(qp, wqe, IB_WC_SUCCESS);
276 /* will get called again */ 277 if (++qp->s_last >= qp->s_size)
278 qp->s_last = 0;
279 wqe = get_swqe_ptr(qp, qp->s_last);
280 }
281 qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
277 goto done; 282 goto done;
278 } 283 }
279 284
@@ -1012,7 +1017,7 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
1012 for (i = 0; i < wqe->wr.num_sge; i++) { 1017 for (i = 0; i < wqe->wr.num_sge; i++) {
1013 struct qib_sge *sge = &wqe->sg_list[i]; 1018 struct qib_sge *sge = &wqe->sg_list[i];
1014 1019
1015 qib_put_mr(sge->mr); 1020 atomic_dec(&sge->mr->refcount);
1016 } 1021 }
1017 /* Post a send completion queue entry if requested. */ 1022 /* Post a send completion queue entry if requested. */
1018 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || 1023 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1068,7 +1073,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
1068 for (i = 0; i < wqe->wr.num_sge; i++) { 1073 for (i = 0; i < wqe->wr.num_sge; i++) {
1069 struct qib_sge *sge = &wqe->sg_list[i]; 1074 struct qib_sge *sge = &wqe->sg_list[i];
1070 1075
1071 qib_put_mr(sge->mr); 1076 atomic_dec(&sge->mr->refcount);
1072 } 1077 }
1073 /* Post a send completion queue entry if requested. */ 1078 /* Post a send completion queue entry if requested. */
1074 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) || 1079 if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
@@ -1514,7 +1519,9 @@ read_middle:
1514 * 4.096 usec. * (1 << qp->timeout) 1519 * 4.096 usec. * (1 << qp->timeout)
1515 */ 1520 */
1516 qp->s_flags |= QIB_S_TIMER; 1521 qp->s_flags |= QIB_S_TIMER;
1517 mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies); 1522 mod_timer(&qp->s_timer, jiffies +
1523 usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
1524 1000UL));
1518 if (qp->s_flags & QIB_S_WAIT_ACK) { 1525 if (qp->s_flags & QIB_S_WAIT_ACK) {
1519 qp->s_flags &= ~QIB_S_WAIT_ACK; 1526 qp->s_flags &= ~QIB_S_WAIT_ACK;
1520 qib_schedule_send(qp); 1527 qib_schedule_send(qp);
@@ -1725,12 +1732,12 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
1725 * same request. 1732 * same request.
1726 */ 1733 */
1727 offset = ((psn - e->psn) & QIB_PSN_MASK) * 1734 offset = ((psn - e->psn) & QIB_PSN_MASK) *
1728 qp->pmtu; 1735 ib_mtu_enum_to_int(qp->path_mtu);
1729 len = be32_to_cpu(reth->length); 1736 len = be32_to_cpu(reth->length);
1730 if (unlikely(offset + len != e->rdma_sge.sge_length)) 1737 if (unlikely(offset + len != e->rdma_sge.sge_length))
1731 goto unlock_done; 1738 goto unlock_done;
1732 if (e->rdma_sge.mr) { 1739 if (e->rdma_sge.mr) {
1733 qib_put_mr(e->rdma_sge.mr); 1740 atomic_dec(&e->rdma_sge.mr->refcount);
1734 e->rdma_sge.mr = NULL; 1741 e->rdma_sge.mr = NULL;
1735 } 1742 }
1736 if (len != 0) { 1743 if (len != 0) {
@@ -1869,7 +1876,7 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
1869 u32 psn; 1876 u32 psn;
1870 u32 pad; 1877 u32 pad;
1871 struct ib_wc wc; 1878 struct ib_wc wc;
1872 u32 pmtu = qp->pmtu; 1879 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
1873 int diff; 1880 int diff;
1874 struct ib_reth *reth; 1881 struct ib_reth *reth;
1875 unsigned long flags; 1882 unsigned long flags;
@@ -1885,8 +1892,10 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
1885 } 1892 }
1886 1893
1887 opcode = be32_to_cpu(ohdr->bth[0]); 1894 opcode = be32_to_cpu(ohdr->bth[0]);
1895 spin_lock_irqsave(&qp->s_lock, flags);
1888 if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) 1896 if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
1889 return; 1897 goto sunlock;
1898 spin_unlock_irqrestore(&qp->s_lock, flags);
1890 1899
1891 psn = be32_to_cpu(ohdr->bth[2]); 1900 psn = be32_to_cpu(ohdr->bth[2]);
1892 opcode >>= 24; 1901 opcode >>= 24;
@@ -1946,6 +1955,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
1946 break; 1955 break;
1947 } 1956 }
1948 1957
1958 memset(&wc, 0, sizeof wc);
1959
1949 if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) { 1960 if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
1950 qp->r_flags |= QIB_R_COMM_EST; 1961 qp->r_flags |= QIB_R_COMM_EST;
1951 if (qp->ibqp.event_handler) { 1962 if (qp->ibqp.event_handler) {
@@ -1998,19 +2009,16 @@ send_middle:
1998 goto rnr_nak; 2009 goto rnr_nak;
1999 qp->r_rcv_len = 0; 2010 qp->r_rcv_len = 0;
2000 if (opcode == OP(SEND_ONLY)) 2011 if (opcode == OP(SEND_ONLY))
2001 goto no_immediate_data; 2012 goto send_last;
2002 /* FALLTHROUGH for SEND_ONLY_WITH_IMMEDIATE */ 2013 /* FALLTHROUGH */
2003 case OP(SEND_LAST_WITH_IMMEDIATE): 2014 case OP(SEND_LAST_WITH_IMMEDIATE):
2004send_last_imm: 2015send_last_imm:
2005 wc.ex.imm_data = ohdr->u.imm_data; 2016 wc.ex.imm_data = ohdr->u.imm_data;
2006 hdrsize += 4; 2017 hdrsize += 4;
2007 wc.wc_flags = IB_WC_WITH_IMM; 2018 wc.wc_flags = IB_WC_WITH_IMM;
2008 goto send_last; 2019 /* FALLTHROUGH */
2009 case OP(SEND_LAST): 2020 case OP(SEND_LAST):
2010 case OP(RDMA_WRITE_LAST): 2021 case OP(RDMA_WRITE_LAST):
2011no_immediate_data:
2012 wc.wc_flags = 0;
2013 wc.ex.imm_data = 0;
2014send_last: 2022send_last:
2015 /* Get the number of bytes the message was padded by. */ 2023 /* Get the number of bytes the message was padded by. */
2016 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 2024 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -2024,7 +2032,11 @@ send_last:
2024 if (unlikely(wc.byte_len > qp->r_len)) 2032 if (unlikely(wc.byte_len > qp->r_len))
2025 goto nack_inv; 2033 goto nack_inv;
2026 qib_copy_sge(&qp->r_sge, data, tlen, 1); 2034 qib_copy_sge(&qp->r_sge, data, tlen, 1);
2027 qib_put_ss(&qp->r_sge); 2035 while (qp->r_sge.num_sge) {
2036 atomic_dec(&qp->r_sge.sge.mr->refcount);
2037 if (--qp->r_sge.num_sge)
2038 qp->r_sge.sge = *qp->r_sge.sg_list++;
2039 }
2028 qp->r_msn++; 2040 qp->r_msn++;
2029 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) 2041 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
2030 break; 2042 break;
@@ -2039,11 +2051,6 @@ send_last:
2039 wc.src_qp = qp->remote_qpn; 2051 wc.src_qp = qp->remote_qpn;
2040 wc.slid = qp->remote_ah_attr.dlid; 2052 wc.slid = qp->remote_ah_attr.dlid;
2041 wc.sl = qp->remote_ah_attr.sl; 2053 wc.sl = qp->remote_ah_attr.sl;
2042 /* zero fields that are N/A */
2043 wc.vendor_err = 0;
2044 wc.pkey_index = 0;
2045 wc.dlid_path_bits = 0;
2046 wc.port_num = 0;
2047 /* Signal completion event if the solicited bit is set. */ 2054 /* Signal completion event if the solicited bit is set. */
2048 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 2055 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
2049 (ohdr->bth[0] & 2056 (ohdr->bth[0] &
@@ -2082,7 +2089,7 @@ send_last:
2082 if (opcode == OP(RDMA_WRITE_FIRST)) 2089 if (opcode == OP(RDMA_WRITE_FIRST))
2083 goto send_middle; 2090 goto send_middle;
2084 else if (opcode == OP(RDMA_WRITE_ONLY)) 2091 else if (opcode == OP(RDMA_WRITE_ONLY))
2085 goto no_immediate_data; 2092 goto send_last;
2086 ret = qib_get_rwqe(qp, 1); 2093 ret = qib_get_rwqe(qp, 1);
2087 if (ret < 0) 2094 if (ret < 0)
2088 goto nack_op_err; 2095 goto nack_op_err;
@@ -2112,7 +2119,7 @@ send_last:
2112 } 2119 }
2113 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 2120 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2114 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { 2121 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2115 qib_put_mr(e->rdma_sge.mr); 2122 atomic_dec(&e->rdma_sge.mr->refcount);
2116 e->rdma_sge.mr = NULL; 2123 e->rdma_sge.mr = NULL;
2117 } 2124 }
2118 reth = &ohdr->u.rc.reth; 2125 reth = &ohdr->u.rc.reth;
@@ -2184,7 +2191,7 @@ send_last:
2184 } 2191 }
2185 e = &qp->s_ack_queue[qp->r_head_ack_queue]; 2192 e = &qp->s_ack_queue[qp->r_head_ack_queue];
2186 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) { 2193 if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
2187 qib_put_mr(e->rdma_sge.mr); 2194 atomic_dec(&e->rdma_sge.mr->refcount);
2188 e->rdma_sge.mr = NULL; 2195 e->rdma_sge.mr = NULL;
2189 } 2196 }
2190 ateth = &ohdr->u.atomic_eth; 2197 ateth = &ohdr->u.atomic_eth;
@@ -2206,7 +2213,7 @@ send_last:
2206 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 2213 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
2207 be64_to_cpu(ateth->compare_data), 2214 be64_to_cpu(ateth->compare_data),
2208 sdata); 2215 sdata);
2209 qib_put_mr(qp->r_sge.sge.mr); 2216 atomic_dec(&qp->r_sge.sge.mr->refcount);
2210 qp->r_sge.num_sge = 0; 2217 qp->r_sge.num_sge = 0;
2211 e->opcode = opcode; 2218 e->opcode = opcode;
2212 e->sent = 0; 2219 e->sent = 0;
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
index 357b6cfcd46..eb78d9367f0 100644
--- a/drivers/infiniband/hw/qib/qib_ruc.c
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -110,7 +110,7 @@ bad_lkey:
110 while (j) { 110 while (j) {
111 struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge; 111 struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
112 112
113 qib_put_mr(sge->mr); 113 atomic_dec(&sge->mr->refcount);
114 } 114 }
115 ss->num_sge = 0; 115 ss->num_sge = 0;
116 memset(&wc, 0, sizeof(wc)); 116 memset(&wc, 0, sizeof(wc));
@@ -260,15 +260,12 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
260 260
261/* 261/*
262 * 262 *
263 * This should be called with the QP r_lock held. 263 * This should be called with the QP s_lock held.
264 *
265 * The s_lock will be acquired around the qib_migrate_qp() call.
266 */ 264 */
267int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, 265int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
268 int has_grh, struct qib_qp *qp, u32 bth0) 266 int has_grh, struct qib_qp *qp, u32 bth0)
269{ 267{
270 __be64 guid; 268 __be64 guid;
271 unsigned long flags;
272 269
273 if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) { 270 if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
274 if (!has_grh) { 271 if (!has_grh) {
@@ -298,9 +295,7 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
298 if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid || 295 if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
299 ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num) 296 ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
300 goto err; 297 goto err;
301 spin_lock_irqsave(&qp->s_lock, flags);
302 qib_migrate_qp(qp); 298 qib_migrate_qp(qp);
303 spin_unlock_irqrestore(&qp->s_lock, flags);
304 } else { 299 } else {
305 if (!has_grh) { 300 if (!has_grh) {
306 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 301 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
@@ -501,7 +496,7 @@ again:
501 (u64) atomic64_add_return(sdata, maddr) - sdata : 496 (u64) atomic64_add_return(sdata, maddr) - sdata :
502 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, 497 (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
503 sdata, wqe->wr.wr.atomic.swap); 498 sdata, wqe->wr.wr.atomic.swap);
504 qib_put_mr(qp->r_sge.sge.mr); 499 atomic_dec(&qp->r_sge.sge.mr->refcount);
505 qp->r_sge.num_sge = 0; 500 qp->r_sge.num_sge = 0;
506 goto send_comp; 501 goto send_comp;
507 502
@@ -525,7 +520,7 @@ again:
525 sge->sge_length -= len; 520 sge->sge_length -= len;
526 if (sge->sge_length == 0) { 521 if (sge->sge_length == 0) {
527 if (!release) 522 if (!release)
528 qib_put_mr(sge->mr); 523 atomic_dec(&sge->mr->refcount);
529 if (--sqp->s_sge.num_sge) 524 if (--sqp->s_sge.num_sge)
530 *sge = *sqp->s_sge.sg_list++; 525 *sge = *sqp->s_sge.sg_list++;
531 } else if (sge->length == 0 && sge->mr->lkey) { 526 } else if (sge->length == 0 && sge->mr->lkey) {
@@ -542,7 +537,11 @@ again:
542 sqp->s_len -= len; 537 sqp->s_len -= len;
543 } 538 }
544 if (release) 539 if (release)
545 qib_put_ss(&qp->r_sge); 540 while (qp->r_sge.num_sge) {
541 atomic_dec(&qp->r_sge.sge.mr->refcount);
542 if (--qp->r_sge.num_sge)
543 qp->r_sge.sge = *qp->r_sge.sg_list++;
544 }
546 545
547 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) 546 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
548 goto send_comp; 547 goto send_comp;
@@ -684,17 +683,17 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
684 nwords = (qp->s_cur_size + extra_bytes) >> 2; 683 nwords = (qp->s_cur_size + extra_bytes) >> 2;
685 lrh0 = QIB_LRH_BTH; 684 lrh0 = QIB_LRH_BTH;
686 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) { 685 if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
687 qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, 686 qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
688 &qp->remote_ah_attr.grh, 687 &qp->remote_ah_attr.grh,
689 qp->s_hdrwords, nwords); 688 qp->s_hdrwords, nwords);
690 lrh0 = QIB_LRH_GRH; 689 lrh0 = QIB_LRH_GRH;
691 } 690 }
692 lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 | 691 lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
693 qp->remote_ah_attr.sl << 4; 692 qp->remote_ah_attr.sl << 4;
694 qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); 693 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
695 qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid); 694 qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
696 qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); 695 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
697 qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid | 696 qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
698 qp->remote_ah_attr.src_path_bits); 697 qp->remote_ah_attr.src_path_bits);
699 bth0 |= qib_get_pkey(ibp, qp->s_pkey_index); 698 bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
700 bth0 |= extra_bytes << 20; 699 bth0 |= extra_bytes << 20;
@@ -754,7 +753,7 @@ void qib_do_send(struct work_struct *work)
754 * If the packet cannot be sent now, return and 753 * If the packet cannot be sent now, return and
755 * the send tasklet will be woken up later. 754 * the send tasklet will be woken up later.
756 */ 755 */
757 if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords, 756 if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
758 qp->s_cur_sge, qp->s_cur_size)) 757 qp->s_cur_sge, qp->s_cur_size))
759 break; 758 break;
760 /* Record that s_hdr is empty. */ 759 /* Record that s_hdr is empty. */
@@ -778,7 +777,7 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
778 for (i = 0; i < wqe->wr.num_sge; i++) { 777 for (i = 0; i < wqe->wr.num_sge; i++) {
779 struct qib_sge *sge = &wqe->sg_list[i]; 778 struct qib_sge *sge = &wqe->sg_list[i];
780 779
781 qib_put_mr(sge->mr); 780 atomic_dec(&sge->mr->refcount);
782 } 781 }
783 if (qp->ibqp.qp_type == IB_QPT_UD || 782 if (qp->ibqp.qp_type == IB_QPT_UD ||
784 qp->ibqp.qp_type == IB_QPT_SMI || 783 qp->ibqp.qp_type == IB_QPT_SMI ||
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
index 50a8a0d4fe6..e9f9f8bc320 100644
--- a/drivers/infiniband/hw/qib/qib_sd7220.c
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -38,7 +38,6 @@
38 38
39#include <linux/pci.h> 39#include <linux/pci.h>
40#include <linux/delay.h> 40#include <linux/delay.h>
41#include <linux/module.h>
42#include <linux/firmware.h> 41#include <linux/firmware.h>
43 42
44#include "qib.h" 43#include "qib.h"
@@ -300,7 +299,7 @@ bail:
300} 299}
301 300
302static void qib_sd_trimdone_monitor(struct qib_devdata *dd, 301static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
303 const char *where) 302 const char *where)
304{ 303{
305 int ret, chn, baduns; 304 int ret, chn, baduns;
306 u64 val; 305 u64 val;
@@ -342,17 +341,15 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
342 ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, 341 ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
343 IB_CTRL2(chn), 0, 0); 342 IB_CTRL2(chn), 0, 0);
344 if (ret < 0) 343 if (ret < 0)
345 qib_dev_err(dd, 344 qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
346 "Failed checking TRIMDONE, chn %d (%s)\n", 345 " (%s)\n", chn, where);
347 chn, where);
348 346
349 if (!(ret & 0x10)) { 347 if (!(ret & 0x10)) {
350 int probe; 348 int probe;
351 349
352 baduns |= (1 << chn); 350 baduns |= (1 << chn);
353 qib_dev_err(dd, 351 qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
354 "TRIMDONE cleared on chn %d (%02X). (%s)\n", 352 " (%s)\n", chn, ret, where);
355 chn, ret, where);
356 probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES, 353 probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
357 IB_PGUDP(0), 0, 0); 354 IB_PGUDP(0), 0, 0);
358 qib_dev_err(dd, "probe is %d (%02X)\n", 355 qib_dev_err(dd, "probe is %d (%02X)\n",
@@ -372,13 +369,13 @@ static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
372 /* Read CTRL reg for each channel to check TRIMDONE */ 369 /* Read CTRL reg for each channel to check TRIMDONE */
373 if (baduns & (1 << chn)) { 370 if (baduns & (1 << chn)) {
374 qib_dev_err(dd, 371 qib_dev_err(dd,
375 "Resetting TRIMDONE on chn %d (%s)\n", 372 "Reseting TRIMDONE on chn %d (%s)\n",
376 chn, where); 373 chn, where);
377 ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, 374 ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
378 IB_CTRL2(chn), 0x10, 0x10); 375 IB_CTRL2(chn), 0x10, 0x10);
379 if (ret < 0) 376 if (ret < 0)
380 qib_dev_err(dd, 377 qib_dev_err(dd, "Failed re-setting "
381 "Failed re-setting TRIMDONE, chn %d (%s)\n", 378 "TRIMDONE, chn %d (%s)\n",
382 chn, where); 379 chn, where);
383 } 380 }
384 } 381 }
@@ -1146,10 +1143,10 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
1146 if (ret < 0) { 1143 if (ret < 0) {
1147 int sloc = loc >> EPB_ADDR_SHF; 1144 int sloc = loc >> EPB_ADDR_SHF;
1148 1145
1149 qib_dev_err(dd, 1146 qib_dev_err(dd, "pre-read failed: elt %d,"
1150 "pre-read failed: elt %d, addr 0x%X, chnl %d\n", 1147 " addr 0x%X, chnl %d\n",
1151 (sloc & 0xF), 1148 (sloc & 0xF),
1152 (sloc >> 9) & 0x3f, chnl); 1149 (sloc >> 9) & 0x3f, chnl);
1153 return ret; 1150 return ret;
1154 } 1151 }
1155 val = (ret & ~mask) | (val & mask); 1152 val = (ret & ~mask) | (val & mask);
@@ -1159,9 +1156,9 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
1159 if (ret < 0) { 1156 if (ret < 0) {
1160 int sloc = loc >> EPB_ADDR_SHF; 1157 int sloc = loc >> EPB_ADDR_SHF;
1161 1158
1162 qib_dev_err(dd, 1159 qib_dev_err(dd, "Global WR failed: elt %d,"
1163 "Global WR failed: elt %d, addr 0x%X, val %02X\n", 1160 " addr 0x%X, val %02X\n",
1164 (sloc & 0xF), (sloc >> 9) & 0x3f, val); 1161 (sloc & 0xF), (sloc >> 9) & 0x3f, val);
1165 } 1162 }
1166 return ret; 1163 return ret;
1167 } 1164 }
@@ -1175,10 +1172,11 @@ static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
1175 if (ret < 0) { 1172 if (ret < 0) {
1176 int sloc = loc >> EPB_ADDR_SHF; 1173 int sloc = loc >> EPB_ADDR_SHF;
1177 1174
1178 qib_dev_err(dd, 1175 qib_dev_err(dd, "Write failed: elt %d,"
1179 "Write failed: elt %d, addr 0x%X, chnl %d, val 0x%02X, mask 0x%02X\n", 1176 " addr 0x%X, chnl %d, val 0x%02X,"
1180 (sloc & 0xF), (sloc >> 9) & 0x3f, chnl, 1177 " mask 0x%02X\n",
1181 val & 0xFF, mask & 0xFF); 1178 (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
1179 val & 0xFF, mask & 0xFF);
1182 break; 1180 break;
1183 } 1181 }
1184 } 1182 }
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
index 3fc51443121..cad44491320 100644
--- a/drivers/infiniband/hw/qib/qib_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2007 - 2012 QLogic Corporation. All rights reserved.
4 * 3 *
5 * This software is available to you under a choice of one of two 4 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU 5 * licenses. You may choose to be licensed under the terms of the GNU
@@ -33,7 +32,6 @@
33 32
34#include <linux/spinlock.h> 33#include <linux/spinlock.h>
35#include <linux/netdevice.h> 34#include <linux/netdevice.h>
36#include <linux/moduleparam.h>
37 35
38#include "qib.h" 36#include "qib.h"
39#include "qib_common.h" 37#include "qib_common.h"
@@ -277,8 +275,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
277 GFP_KERNEL); 275 GFP_KERNEL);
278 276
279 if (!ppd->sdma_descq) { 277 if (!ppd->sdma_descq) {
280 qib_dev_err(ppd->dd, 278 qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor "
281 "failed to allocate SendDMA descriptor FIFO memory\n"); 279 "FIFO memory\n");
282 goto bail; 280 goto bail;
283 } 281 }
284 282
@@ -286,8 +284,8 @@ static int alloc_sdma(struct qib_pportdata *ppd)
286 ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev, 284 ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
287 PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL); 285 PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
288 if (!ppd->sdma_head_dma) { 286 if (!ppd->sdma_head_dma) {
289 qib_dev_err(ppd->dd, 287 qib_dev_err(ppd->dd, "failed to allocate SendDMA "
290 "failed to allocate SendDMA head memory\n"); 288 "head memory\n");
291 goto cleanup_descq; 289 goto cleanup_descq;
292 } 290 }
293 ppd->sdma_head_dma[0] = 0; 291 ppd->sdma_head_dma[0] = 0;
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
index d6235931a1b..c3ec8efc2ed 100644
--- a/drivers/infiniband/hw/qib/qib_srq.c
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -107,11 +107,6 @@ struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
107 u32 sz; 107 u32 sz;
108 struct ib_srq *ret; 108 struct ib_srq *ret;
109 109
110 if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
111 ret = ERR_PTR(-ENOSYS);
112 goto done;
113 }
114
115 if (srq_init_attr->attr.max_sge == 0 || 110 if (srq_init_attr->attr.max_sge == 0 ||
116 srq_init_attr->attr.max_sge > ib_qib_max_srq_sges || 111 srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
117 srq_init_attr->attr.max_wr == 0 || 112 srq_init_attr->attr.max_wr == 0 ||
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
index 034cc821de5..14d129de432 100644
--- a/drivers/infiniband/hw/qib/qib_sysfs.c
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2006 PathScale, Inc. All rights reserved.
5 * 4 *
6 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -34,7 +33,41 @@
34#include <linux/ctype.h> 33#include <linux/ctype.h>
35 34
36#include "qib.h" 35#include "qib.h"
37#include "qib_mad.h" 36
37/**
38 * qib_parse_ushort - parse an unsigned short value in an arbitrary base
39 * @str: the string containing the number
40 * @valp: where to put the result
41 *
42 * Returns the number of bytes consumed, or negative value on error.
43 */
44static int qib_parse_ushort(const char *str, unsigned short *valp)
45{
46 unsigned long val;
47 char *end;
48 int ret;
49
50 if (!isdigit(str[0])) {
51 ret = -EINVAL;
52 goto bail;
53 }
54
55 val = simple_strtoul(str, &end, 0);
56
57 if (val > 0xffff) {
58 ret = -EINVAL;
59 goto bail;
60 }
61
62 *valp = val;
63
64 ret = end + 1 - str;
65 if (ret == 0)
66 ret = -EINVAL;
67
68bail:
69 return ret;
70}
38 71
39/* start of per-port functions */ 72/* start of per-port functions */
40/* 73/*
@@ -57,11 +90,7 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
57 int ret; 90 int ret;
58 u16 val; 91 u16 val;
59 92
60 ret = kstrtou16(buf, 0, &val); 93 ret = qib_parse_ushort(buf, &val);
61 if (ret) {
62 qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
63 return ret;
64 }
65 94
66 /* 95 /*
67 * Set the "intentional" heartbeat enable per either of 96 * Set the "intentional" heartbeat enable per either of
@@ -70,7 +99,10 @@ static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
70 * because entering loopback mode overrides it and automatically 99 * because entering loopback mode overrides it and automatically
71 * disables heartbeat. 100 * disables heartbeat.
72 */ 101 */
73 ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val); 102 if (ret >= 0)
103 ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
104 if (ret < 0)
105 qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
74 return ret < 0 ? ret : count; 106 return ret < 0 ? ret : count;
75} 107}
76 108
@@ -94,14 +126,12 @@ static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
94 int ret; 126 int ret;
95 u16 val; 127 u16 val;
96 128
97 ret = kstrtou16(buf, 0, &val); 129 ret = qib_parse_ushort(buf, &val);
98 if (ret) { 130 if (ret > 0)
131 qib_set_led_override(ppd, val);
132 else
99 qib_dev_err(dd, "attempt to set invalid LED override\n"); 133 qib_dev_err(dd, "attempt to set invalid LED override\n");
100 return ret; 134 return ret < 0 ? ret : count;
101 }
102
103 qib_set_led_override(ppd, val);
104 return count;
105} 135}
106 136
107static ssize_t show_status(struct qib_pportdata *ppd, char *buf) 137static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
@@ -120,7 +150,7 @@ static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
120 * For userland compatibility, these offsets must remain fixed. 150 * For userland compatibility, these offsets must remain fixed.
121 * They are strings for QIB_STATUS_* 151 * They are strings for QIB_STATUS_*
122 */ 152 */
123static const char * const qib_status_str[] = { 153static const char *qib_status_str[] = {
124 "Initted", 154 "Initted",
125 "", 155 "",
126 "", 156 "",
@@ -201,98 +231,6 @@ static struct attribute *port_default_attributes[] = {
201 NULL 231 NULL
202}; 232};
203 233
204/*
205 * Start of per-port congestion control structures and support code
206 */
207
208/*
209 * Congestion control table size followed by table entries
210 */
211static ssize_t read_cc_table_bin(struct file *filp, struct kobject *kobj,
212 struct bin_attribute *bin_attr,
213 char *buf, loff_t pos, size_t count)
214{
215 int ret;
216 struct qib_pportdata *ppd =
217 container_of(kobj, struct qib_pportdata, pport_cc_kobj);
218
219 if (!qib_cc_table_size || !ppd->ccti_entries_shadow)
220 return -EINVAL;
221
222 ret = ppd->total_cct_entry * sizeof(struct ib_cc_table_entry_shadow)
223 + sizeof(__be16);
224
225 if (pos > ret)
226 return -EINVAL;
227
228 if (count > ret - pos)
229 count = ret - pos;
230
231 if (!count)
232 return count;
233
234 spin_lock(&ppd->cc_shadow_lock);
235 memcpy(buf, ppd->ccti_entries_shadow, count);
236 spin_unlock(&ppd->cc_shadow_lock);
237
238 return count;
239}
240
241static void qib_port_release(struct kobject *kobj)
242{
243 /* nothing to do since memory is freed by qib_free_devdata() */
244}
245
246static struct kobj_type qib_port_cc_ktype = {
247 .release = qib_port_release,
248};
249
250static struct bin_attribute cc_table_bin_attr = {
251 .attr = {.name = "cc_table_bin", .mode = 0444},
252 .read = read_cc_table_bin,
253 .size = PAGE_SIZE,
254};
255
256/*
257 * Congestion settings: port control, control map and an array of 16
258 * entries for the congestion entries - increase, timer, event log
259 * trigger threshold and the minimum injection rate delay.
260 */
261static ssize_t read_cc_setting_bin(struct file *filp, struct kobject *kobj,
262 struct bin_attribute *bin_attr,
263 char *buf, loff_t pos, size_t count)
264{
265 int ret;
266 struct qib_pportdata *ppd =
267 container_of(kobj, struct qib_pportdata, pport_cc_kobj);
268
269 if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
270 return -EINVAL;
271
272 ret = sizeof(struct ib_cc_congestion_setting_attr_shadow);
273
274 if (pos > ret)
275 return -EINVAL;
276 if (count > ret - pos)
277 count = ret - pos;
278
279 if (!count)
280 return count;
281
282 spin_lock(&ppd->cc_shadow_lock);
283 memcpy(buf, ppd->congestion_entries_shadow, count);
284 spin_unlock(&ppd->cc_shadow_lock);
285
286 return count;
287}
288
289static struct bin_attribute cc_setting_bin_attr = {
290 .attr = {.name = "cc_settings_bin", .mode = 0444},
291 .read = read_cc_setting_bin,
292 .size = PAGE_SIZE,
293};
294
295
296static ssize_t qib_portattr_show(struct kobject *kobj, 234static ssize_t qib_portattr_show(struct kobject *kobj,
297 struct attribute *attr, char *buf) 235 struct attribute *attr, char *buf)
298{ 236{
@@ -315,6 +253,10 @@ static ssize_t qib_portattr_store(struct kobject *kobj,
315 return pattr->store(ppd, buf, len); 253 return pattr->store(ppd, buf, len);
316} 254}
317 255
256static void qib_port_release(struct kobject *kobj)
257{
258 /* nothing to do since memory is freed by qib_free_devdata() */
259}
318 260
319static const struct sysfs_ops qib_port_ops = { 261static const struct sysfs_ops qib_port_ops = {
320 .show = qib_portattr_show, 262 .show = qib_portattr_show,
@@ -469,12 +411,12 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
469 struct qib_pportdata *ppd = 411 struct qib_pportdata *ppd =
470 container_of(kobj, struct qib_pportdata, diagc_kobj); 412 container_of(kobj, struct qib_pportdata, diagc_kobj);
471 struct qib_ibport *qibp = &ppd->ibport_data; 413 struct qib_ibport *qibp = &ppd->ibport_data;
472 u32 val; 414 char *endp;
473 int ret; 415 long val = simple_strtol(buf, &endp, 0);
416
417 if (val < 0 || endp == buf)
418 return -EINVAL;
474 419
475 ret = kstrtou32(buf, 0, &val);
476 if (ret)
477 return ret;
478 *(u32 *)((char *) qibp + dattr->counter) = val; 420 *(u32 *)((char *) qibp + dattr->counter) = val;
479 return size; 421 return size;
480} 422}
@@ -561,11 +503,8 @@ static ssize_t show_nctxts(struct device *device,
561 struct qib_devdata *dd = dd_from_dev(dev); 503 struct qib_devdata *dd = dd_from_dev(dev);
562 504
563 /* Return the number of user ports (contexts) available. */ 505 /* Return the number of user ports (contexts) available. */
564 /* The calculation below deals with a special case where 506 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
565 * cfgctxts is set to 1 on a single-port board. */ 507 dd->first_user_ctxt);
566 return scnprintf(buf, PAGE_SIZE, "%u\n",
567 (dd->first_user_ctxt > dd->cfgctxts) ? 0 :
568 (dd->cfgctxts - dd->first_user_ctxt));
569} 508}
570 509
571static ssize_t show_nfreectxts(struct device *device, 510static ssize_t show_nfreectxts(struct device *device,
@@ -576,7 +515,8 @@ static ssize_t show_nfreectxts(struct device *device,
576 struct qib_devdata *dd = dd_from_dev(dev); 515 struct qib_devdata *dd = dd_from_dev(dev);
577 516
578 /* Return the number of free user ports (contexts) available. */ 517 /* Return the number of free user ports (contexts) available. */
579 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->freectxts); 518 return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
519 dd->first_user_ctxt - (u32)qib_stats.sps_ctxts);
580} 520}
581 521
582static ssize_t show_serial(struct device *device, 522static ssize_t show_serial(struct device *device,
@@ -707,9 +647,8 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
707 int ret; 647 int ret;
708 648
709 if (!port_num || port_num > dd->num_pports) { 649 if (!port_num || port_num > dd->num_pports) {
710 qib_dev_err(dd, 650 qib_dev_err(dd, "Skipping infiniband class with "
711 "Skipping infiniband class with invalid port %u\n", 651 "invalid port %u\n", port_num);
712 port_num);
713 ret = -ENODEV; 652 ret = -ENODEV;
714 goto bail; 653 goto bail;
715 } 654 }
@@ -718,9 +657,8 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
718 ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj, 657 ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
719 "linkcontrol"); 658 "linkcontrol");
720 if (ret) { 659 if (ret) {
721 qib_dev_err(dd, 660 qib_dev_err(dd, "Skipping linkcontrol sysfs info, "
722 "Skipping linkcontrol sysfs info, (err %d) port %u\n", 661 "(err %d) port %u\n", ret, port_num);
723 ret, port_num);
724 goto bail; 662 goto bail;
725 } 663 }
726 kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); 664 kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
@@ -728,70 +666,26 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
728 ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj, 666 ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
729 "sl2vl"); 667 "sl2vl");
730 if (ret) { 668 if (ret) {
731 qib_dev_err(dd, 669 qib_dev_err(dd, "Skipping sl2vl sysfs info, "
732 "Skipping sl2vl sysfs info, (err %d) port %u\n", 670 "(err %d) port %u\n", ret, port_num);
733 ret, port_num); 671 goto bail_sl;
734 goto bail_link;
735 } 672 }
736 kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); 673 kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
737 674
738 ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj, 675 ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
739 "diag_counters"); 676 "diag_counters");
740 if (ret) { 677 if (ret) {
741 qib_dev_err(dd, 678 qib_dev_err(dd, "Skipping diag_counters sysfs info, "
742 "Skipping diag_counters sysfs info, (err %d) port %u\n", 679 "(err %d) port %u\n", ret, port_num);
743 ret, port_num);
744 goto bail_sl;
745 }
746 kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
747
748 if (!qib_cc_table_size || !ppd->congestion_entries_shadow)
749 return 0;
750
751 ret = kobject_init_and_add(&ppd->pport_cc_kobj, &qib_port_cc_ktype,
752 kobj, "CCMgtA");
753 if (ret) {
754 qib_dev_err(dd,
755 "Skipping Congestion Control sysfs info, (err %d) port %u\n",
756 ret, port_num);
757 goto bail_diagc; 680 goto bail_diagc;
758 } 681 }
759 682 kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
760 kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
761
762 ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
763 &cc_setting_bin_attr);
764 if (ret) {
765 qib_dev_err(dd,
766 "Skipping Congestion Control setting sysfs info, (err %d) port %u\n",
767 ret, port_num);
768 goto bail_cc;
769 }
770
771 ret = sysfs_create_bin_file(&ppd->pport_cc_kobj,
772 &cc_table_bin_attr);
773 if (ret) {
774 qib_dev_err(dd,
775 "Skipping Congestion Control table sysfs info, (err %d) port %u\n",
776 ret, port_num);
777 goto bail_cc_entry_bin;
778 }
779
780 qib_devinfo(dd->pcidev,
781 "IB%u: Congestion Control Agent enabled for port %d\n",
782 dd->unit, port_num);
783 683
784 return 0; 684 return 0;
785 685
786bail_cc_entry_bin:
787 sysfs_remove_bin_file(&ppd->pport_cc_kobj, &cc_setting_bin_attr);
788bail_cc:
789 kobject_put(&ppd->pport_cc_kobj);
790bail_diagc: 686bail_diagc:
791 kobject_put(&ppd->diagc_kobj);
792bail_sl:
793 kobject_put(&ppd->sl2vl_kobj); 687 kobject_put(&ppd->sl2vl_kobj);
794bail_link: 688bail_sl:
795 kobject_put(&ppd->pport_kobj); 689 kobject_put(&ppd->pport_kobj);
796bail: 690bail:
797 return ret; 691 return ret;
@@ -824,15 +718,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
824 718
825 for (i = 0; i < dd->num_pports; i++) { 719 for (i = 0; i < dd->num_pports; i++) {
826 ppd = &dd->pport[i]; 720 ppd = &dd->pport[i];
827 if (qib_cc_table_size &&
828 ppd->congestion_entries_shadow) {
829 sysfs_remove_bin_file(&ppd->pport_cc_kobj,
830 &cc_setting_bin_attr);
831 sysfs_remove_bin_file(&ppd->pport_cc_kobj,
832 &cc_table_bin_attr);
833 kobject_put(&ppd->pport_cc_kobj);
834 }
835 kobject_put(&ppd->sl2vl_kobj);
836 kobject_put(&ppd->pport_kobj); 721 kobject_put(&ppd->pport_kobj);
722 kobject_put(&ppd->sl2vl_kobj);
837 } 723 }
838} 724}
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
index 647f7beb1b0..ddde72e11ed 100644
--- a/drivers/infiniband/hw/qib/qib_twsi.c
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 4 *
6 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -450,9 +449,8 @@ int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
450 goto failed_write; 449 goto failed_write;
451 ret = qib_twsi_wr(dd, addr, 0); 450 ret = qib_twsi_wr(dd, addr, 0);
452 if (ret) { 451 if (ret) {
453 qib_dev_err(dd, 452 qib_dev_err(dd, "Failed to write interface"
454 "Failed to write interface write addr %02X\n", 453 " write addr %02X\n", addr);
455 addr);
456 goto failed_write; 454 goto failed_write;
457 } 455 }
458 } 456 }
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 31d3561400a..7f36454c225 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -36,7 +36,6 @@
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/netdevice.h> 37#include <linux/netdevice.h>
38#include <linux/vmalloc.h> 38#include <linux/vmalloc.h>
39#include <linux/moduleparam.h>
40 39
41#include "qib.h" 40#include "qib.h"
42 41
@@ -295,7 +294,6 @@ u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
295 294
296 nbufs = last - first + 1; /* number in range to check */ 295 nbufs = last - first + 1; /* number in range to check */
297 if (dd->upd_pio_shadow) { 296 if (dd->upd_pio_shadow) {
298update_shadow:
299 /* 297 /*
300 * Minor optimization. If we had no buffers on last call, 298 * Minor optimization. If we had no buffers on last call,
301 * start out by doing the update; continue and do scan even 299 * start out by doing the update; continue and do scan even
@@ -305,39 +303,37 @@ update_shadow:
305 updated++; 303 updated++;
306 } 304 }
307 i = first; 305 i = first;
306rescan:
308 /* 307 /*
309 * While test_and_set_bit() is atomic, we do that and then the 308 * While test_and_set_bit() is atomic, we do that and then the
310 * change_bit(), and the pair is not. See if this is the cause 309 * change_bit(), and the pair is not. See if this is the cause
311 * of the remaining armlaunch errors. 310 * of the remaining armlaunch errors.
312 */ 311 */
313 spin_lock_irqsave(&dd->pioavail_lock, flags); 312 spin_lock_irqsave(&dd->pioavail_lock, flags);
314 if (dd->last_pio >= first && dd->last_pio <= last)
315 i = dd->last_pio + 1;
316 if (!first)
317 /* adjust to min possible */
318 nbufs = last - dd->min_kernel_pio + 1;
319 for (j = 0; j < nbufs; j++, i++) { 313 for (j = 0; j < nbufs; j++, i++) {
320 if (i > last) 314 if (i > last)
321 i = !first ? dd->min_kernel_pio : first; 315 i = first;
322 if (__test_and_set_bit((2 * i) + 1, shadow)) 316 if (__test_and_set_bit((2 * i) + 1, shadow))
323 continue; 317 continue;
324 /* flip generation bit */ 318 /* flip generation bit */
325 __change_bit(2 * i, shadow); 319 __change_bit(2 * i, shadow);
326 /* remember that the buffer can be written to now */ 320 /* remember that the buffer can be written to now */
327 __set_bit(i, dd->pio_writing); 321 __set_bit(i, dd->pio_writing);
328 if (!first && first != last) /* first == last on VL15, avoid */
329 dd->last_pio = i;
330 break; 322 break;
331 } 323 }
332 spin_unlock_irqrestore(&dd->pioavail_lock, flags); 324 spin_unlock_irqrestore(&dd->pioavail_lock, flags);
333 325
334 if (j == nbufs) { 326 if (j == nbufs) {
335 if (!updated) 327 if (!updated) {
336 /* 328 /*
337 * First time through; shadow exhausted, but may be 329 * First time through; shadow exhausted, but may be
338 * buffers available, try an update and then rescan. 330 * buffers available, try an update and then rescan.
339 */ 331 */
340 goto update_shadow; 332 update_send_bufs(dd);
333 updated++;
334 i = first;
335 goto rescan;
336 }
341 no_send_bufs(dd); 337 no_send_bufs(dd);
342 buf = NULL; 338 buf = NULL;
343 } else { 339 } else {
@@ -425,20 +421,14 @@ void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
425 __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT 421 __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
426 + start, dd->pioavailshadow); 422 + start, dd->pioavailshadow);
427 __set_bit(start, dd->pioavailkernel); 423 __set_bit(start, dd->pioavailkernel);
428 if ((start >> 1) < dd->min_kernel_pio)
429 dd->min_kernel_pio = start >> 1;
430 } else { 424 } else {
431 __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT, 425 __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
432 dd->pioavailshadow); 426 dd->pioavailshadow);
433 __clear_bit(start, dd->pioavailkernel); 427 __clear_bit(start, dd->pioavailkernel);
434 if ((start >> 1) > dd->min_kernel_pio)
435 dd->min_kernel_pio = start >> 1;
436 } 428 }
437 start += 2; 429 start += 2;
438 } 430 }
439 431
440 if (dd->min_kernel_pio > 0 && dd->last_pio < dd->min_kernel_pio - 1)
441 dd->last_pio = dd->min_kernel_pio - 1;
442 spin_unlock_irqrestore(&dd->pioavail_lock, flags); 432 spin_unlock_irqrestore(&dd->pioavail_lock, flags);
443 433
444 dd->f_txchk_change(dd, ostart, len, avail, rcd); 434 dd->f_txchk_change(dd, ostart, len, avail, rcd);
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
index aa3a8035bb6..32ccf3c824c 100644
--- a/drivers/infiniband/hw/qib/qib_uc.c
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -51,7 +51,7 @@ int qib_make_uc_req(struct qib_qp *qp)
51 u32 hwords; 51 u32 hwords;
52 u32 bth0; 52 u32 bth0;
53 u32 len; 53 u32 len;
54 u32 pmtu = qp->pmtu; 54 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
55 int ret = 0; 55 int ret = 0;
56 56
57 spin_lock_irqsave(&qp->s_lock, flags); 57 spin_lock_irqsave(&qp->s_lock, flags);
@@ -72,9 +72,9 @@ int qib_make_uc_req(struct qib_qp *qp)
72 goto done; 72 goto done;
73 } 73 }
74 74
75 ohdr = &qp->s_hdr->u.oth; 75 ohdr = &qp->s_hdr.u.oth;
76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH) 76 if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
77 ohdr = &qp->s_hdr->u.l.oth; 77 ohdr = &qp->s_hdr.u.l.oth;
78 78
79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */ 79 /* header size in 32-bit words LRH+BTH = (8+12)/4. */
80 hwords = 5; 80 hwords = 5;
@@ -243,12 +243,13 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
243 int has_grh, void *data, u32 tlen, struct qib_qp *qp) 243 int has_grh, void *data, u32 tlen, struct qib_qp *qp)
244{ 244{
245 struct qib_other_headers *ohdr; 245 struct qib_other_headers *ohdr;
246 unsigned long flags;
246 u32 opcode; 247 u32 opcode;
247 u32 hdrsize; 248 u32 hdrsize;
248 u32 psn; 249 u32 psn;
249 u32 pad; 250 u32 pad;
250 struct ib_wc wc; 251 struct ib_wc wc;
251 u32 pmtu = qp->pmtu; 252 u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
252 struct ib_reth *reth; 253 struct ib_reth *reth;
253 int ret; 254 int ret;
254 255
@@ -262,11 +263,14 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
262 } 263 }
263 264
264 opcode = be32_to_cpu(ohdr->bth[0]); 265 opcode = be32_to_cpu(ohdr->bth[0]);
266 spin_lock_irqsave(&qp->s_lock, flags);
265 if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode)) 267 if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
266 return; 268 goto sunlock;
269 spin_unlock_irqrestore(&qp->s_lock, flags);
267 270
268 psn = be32_to_cpu(ohdr->bth[2]); 271 psn = be32_to_cpu(ohdr->bth[2]);
269 opcode >>= 24; 272 opcode >>= 24;
273 memset(&wc, 0, sizeof wc);
270 274
271 /* Compare the PSN verses the expected PSN. */ 275 /* Compare the PSN verses the expected PSN. */
272 if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) { 276 if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
@@ -281,7 +285,11 @@ inv:
281 set_bit(QIB_R_REWIND_SGE, &qp->r_aflags); 285 set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
282 qp->r_sge.num_sge = 0; 286 qp->r_sge.num_sge = 0;
283 } else 287 } else
284 qib_put_ss(&qp->r_sge); 288 while (qp->r_sge.num_sge) {
289 atomic_dec(&qp->r_sge.sge.mr->refcount);
290 if (--qp->r_sge.num_sge)
291 qp->r_sge.sge = *qp->r_sge.sg_list++;
292 }
285 qp->r_state = OP(SEND_LAST); 293 qp->r_state = OP(SEND_LAST);
286 switch (opcode) { 294 switch (opcode) {
287 case OP(SEND_FIRST): 295 case OP(SEND_FIRST):
@@ -362,7 +370,7 @@ send_first:
362 } 370 }
363 qp->r_rcv_len = 0; 371 qp->r_rcv_len = 0;
364 if (opcode == OP(SEND_ONLY)) 372 if (opcode == OP(SEND_ONLY))
365 goto no_immediate_data; 373 goto send_last;
366 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE)) 374 else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
367 goto send_last_imm; 375 goto send_last_imm;
368 /* FALLTHROUGH */ 376 /* FALLTHROUGH */
@@ -381,11 +389,8 @@ send_last_imm:
381 wc.ex.imm_data = ohdr->u.imm_data; 389 wc.ex.imm_data = ohdr->u.imm_data;
382 hdrsize += 4; 390 hdrsize += 4;
383 wc.wc_flags = IB_WC_WITH_IMM; 391 wc.wc_flags = IB_WC_WITH_IMM;
384 goto send_last; 392 /* FALLTHROUGH */
385 case OP(SEND_LAST): 393 case OP(SEND_LAST):
386no_immediate_data:
387 wc.ex.imm_data = 0;
388 wc.wc_flags = 0;
389send_last: 394send_last:
390 /* Get the number of bytes the message was padded by. */ 395 /* Get the number of bytes the message was padded by. */
391 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3; 396 pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
@@ -399,20 +404,20 @@ send_last:
399 if (unlikely(wc.byte_len > qp->r_len)) 404 if (unlikely(wc.byte_len > qp->r_len))
400 goto rewind; 405 goto rewind;
401 wc.opcode = IB_WC_RECV; 406 wc.opcode = IB_WC_RECV;
402 qib_copy_sge(&qp->r_sge, data, tlen, 0);
403 qib_put_ss(&qp->s_rdma_read_sge);
404last_imm: 407last_imm:
408 qib_copy_sge(&qp->r_sge, data, tlen, 0);
409 while (qp->s_rdma_read_sge.num_sge) {
410 atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
411 if (--qp->s_rdma_read_sge.num_sge)
412 qp->s_rdma_read_sge.sge =
413 *qp->s_rdma_read_sge.sg_list++;
414 }
405 wc.wr_id = qp->r_wr_id; 415 wc.wr_id = qp->r_wr_id;
406 wc.status = IB_WC_SUCCESS; 416 wc.status = IB_WC_SUCCESS;
407 wc.qp = &qp->ibqp; 417 wc.qp = &qp->ibqp;
408 wc.src_qp = qp->remote_qpn; 418 wc.src_qp = qp->remote_qpn;
409 wc.slid = qp->remote_ah_attr.dlid; 419 wc.slid = qp->remote_ah_attr.dlid;
410 wc.sl = qp->remote_ah_attr.sl; 420 wc.sl = qp->remote_ah_attr.sl;
411 /* zero fields that are N/A */
412 wc.vendor_err = 0;
413 wc.pkey_index = 0;
414 wc.dlid_path_bits = 0;
415 wc.port_num = 0;
416 /* Signal completion event if the solicited bit is set. */ 421 /* Signal completion event if the solicited bit is set. */
417 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 422 qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
418 (ohdr->bth[0] & 423 (ohdr->bth[0] &
@@ -484,7 +489,13 @@ rdma_last_imm:
484 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 489 if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
485 goto drop; 490 goto drop;
486 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags)) 491 if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
487 qib_put_ss(&qp->s_rdma_read_sge); 492 while (qp->s_rdma_read_sge.num_sge) {
493 atomic_dec(&qp->s_rdma_read_sge.sge.mr->
494 refcount);
495 if (--qp->s_rdma_read_sge.num_sge)
496 qp->s_rdma_read_sge.sge =
497 *qp->s_rdma_read_sge.sg_list++;
498 }
488 else { 499 else {
489 ret = qib_get_rwqe(qp, 1); 500 ret = qib_get_rwqe(qp, 1);
490 if (ret < 0) 501 if (ret < 0)
@@ -494,8 +505,6 @@ rdma_last_imm:
494 } 505 }
495 wc.byte_len = qp->r_len; 506 wc.byte_len = qp->r_len;
496 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM; 507 wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
497 qib_copy_sge(&qp->r_sge, data, tlen, 1);
498 qib_put_ss(&qp->r_sge);
499 goto last_imm; 508 goto last_imm;
500 509
501 case OP(RDMA_WRITE_LAST): 510 case OP(RDMA_WRITE_LAST):
@@ -511,7 +520,11 @@ rdma_last:
511 if (unlikely(tlen + qp->r_rcv_len != qp->r_len)) 520 if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
512 goto drop; 521 goto drop;
513 qib_copy_sge(&qp->r_sge, data, tlen, 1); 522 qib_copy_sge(&qp->r_sge, data, tlen, 1);
514 qib_put_ss(&qp->r_sge); 523 while (qp->r_sge.num_sge) {
524 atomic_dec(&qp->r_sge.sge.mr->refcount);
525 if (--qp->r_sge.num_sge)
526 qp->r_sge.sge = *qp->r_sge.sg_list++;
527 }
515 break; 528 break;
516 529
517 default: 530 default:
@@ -533,4 +546,6 @@ op_err:
533 qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR); 546 qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
534 return; 547 return;
535 548
549sunlock:
550 spin_unlock_irqrestore(&qp->s_lock, flags);
536} 551}
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
index d6c7fe7f88d..828609fa4d2 100644
--- a/drivers/infiniband/hw/qib/qib_ud.c
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -194,7 +194,11 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
194 } 194 }
195 length -= len; 195 length -= len;
196 } 196 }
197 qib_put_ss(&qp->r_sge); 197 while (qp->r_sge.num_sge) {
198 atomic_dec(&qp->r_sge.sge.mr->refcount);
199 if (--qp->r_sge.num_sge)
200 qp->r_sge.sge = *qp->r_sge.sg_list++;
201 }
198 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) 202 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
199 goto bail_unlock; 203 goto bail_unlock;
200 wc.wr_id = qp->r_wr_id; 204 wc.wr_id = qp->r_wr_id;
@@ -317,11 +321,11 @@ int qib_make_ud_req(struct qib_qp *qp)
317 321
318 if (ah_attr->ah_flags & IB_AH_GRH) { 322 if (ah_attr->ah_flags & IB_AH_GRH) {
319 /* Header size in 32-bit words. */ 323 /* Header size in 32-bit words. */
320 qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh, 324 qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
321 &ah_attr->grh, 325 &ah_attr->grh,
322 qp->s_hdrwords, nwords); 326 qp->s_hdrwords, nwords);
323 lrh0 = QIB_LRH_GRH; 327 lrh0 = QIB_LRH_GRH;
324 ohdr = &qp->s_hdr->u.l.oth; 328 ohdr = &qp->s_hdr.u.l.oth;
325 /* 329 /*
326 * Don't worry about sending to locally attached multicast 330 * Don't worry about sending to locally attached multicast
327 * QPs. It is unspecified by the spec. what happens. 331 * QPs. It is unspecified by the spec. what happens.
@@ -329,7 +333,7 @@ int qib_make_ud_req(struct qib_qp *qp)
329 } else { 333 } else {
330 /* Header size in 32-bit words. */ 334 /* Header size in 32-bit words. */
331 lrh0 = QIB_LRH_BTH; 335 lrh0 = QIB_LRH_BTH;
332 ohdr = &qp->s_hdr->u.oth; 336 ohdr = &qp->s_hdr.u.oth;
333 } 337 }
334 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) { 338 if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
335 qp->s_hdrwords++; 339 qp->s_hdrwords++;
@@ -342,15 +346,15 @@ int qib_make_ud_req(struct qib_qp *qp)
342 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */ 346 lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
343 else 347 else
344 lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12; 348 lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
345 qp->s_hdr->lrh[0] = cpu_to_be16(lrh0); 349 qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
346 qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */ 350 qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
347 qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC); 351 qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
348 lid = ppd->lid; 352 lid = ppd->lid;
349 if (lid) { 353 if (lid) {
350 lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1); 354 lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
351 qp->s_hdr->lrh[3] = cpu_to_be16(lid); 355 qp->s_hdr.lrh[3] = cpu_to_be16(lid);
352 } else 356 } else
353 qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE; 357 qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
354 if (wqe->wr.send_flags & IB_SEND_SOLICITED) 358 if (wqe->wr.send_flags & IB_SEND_SOLICITED)
355 bth0 |= IB_BTH_SOLICITED; 359 bth0 |= IB_BTH_SOLICITED;
356 bth0 |= extra_bytes << 20; 360 bth0 |= extra_bytes << 20;
@@ -552,7 +556,11 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
552 } else 556 } else
553 qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1); 557 qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
554 qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1); 558 qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
555 qib_put_ss(&qp->r_sge); 559 while (qp->r_sge.num_sge) {
560 atomic_dec(&qp->r_sge.sge.mr->refcount);
561 if (--qp->r_sge.num_sge)
562 qp->r_sge.sge = *qp->r_sge.sg_list++;
563 }
556 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) 564 if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
557 return; 565 return;
558 wc.wr_id = qp->r_wr_id; 566 wc.wr_id = qp->r_wr_id;
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index 2bc1d2b9629..7689e49c13c 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -74,7 +74,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
74 goto bail_release; 74 goto bail_release;
75 } 75 }
76 76
77 current->mm->pinned_vm += num_pages; 77 current->mm->locked_vm += num_pages;
78 78
79 ret = 0; 79 ret = 0;
80 goto bail; 80 goto bail;
@@ -151,7 +151,7 @@ void qib_release_user_pages(struct page **p, size_t num_pages)
151 __qib_release_user_pages(p, num_pages, 1); 151 __qib_release_user_pages(p, num_pages, 1);
152 152
153 if (current->mm) { 153 if (current->mm) {
154 current->mm->pinned_vm -= num_pages; 154 current->mm->locked_vm -= num_pages;
155 up_write(&current->mm->mmap_sem); 155 up_write(&current->mm->mmap_sem);
156 } 156 }
157} 157}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index ba51a4715a1..9fab4048885 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -35,16 +35,14 @@
35#include <rdma/ib_mad.h> 35#include <rdma/ib_mad.h>
36#include <rdma/ib_user_verbs.h> 36#include <rdma/ib_user_verbs.h>
37#include <linux/io.h> 37#include <linux/io.h>
38#include <linux/module.h>
39#include <linux/utsname.h> 38#include <linux/utsname.h>
40#include <linux/rculist.h> 39#include <linux/rculist.h>
41#include <linux/mm.h> 40#include <linux/mm.h>
42#include <linux/random.h>
43 41
44#include "qib.h" 42#include "qib.h"
45#include "qib_common.h" 43#include "qib_common.h"
46 44
47static unsigned int ib_qib_qp_table_size = 256; 45static unsigned int ib_qib_qp_table_size = 251;
48module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO); 46module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
49MODULE_PARM_DESC(qp_table_size, "QP table size"); 47MODULE_PARM_DESC(qp_table_size, "QP table size");
50 48
@@ -183,7 +181,7 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
183 sge->sge_length -= len; 181 sge->sge_length -= len;
184 if (sge->sge_length == 0) { 182 if (sge->sge_length == 0) {
185 if (release) 183 if (release)
186 qib_put_mr(sge->mr); 184 atomic_dec(&sge->mr->refcount);
187 if (--ss->num_sge) 185 if (--ss->num_sge)
188 *sge = *ss->sg_list++; 186 *sge = *ss->sg_list++;
189 } else if (sge->length == 0 && sge->mr->lkey) { 187 } else if (sge->length == 0 && sge->mr->lkey) {
@@ -224,7 +222,7 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
224 sge->sge_length -= len; 222 sge->sge_length -= len;
225 if (sge->sge_length == 0) { 223 if (sge->sge_length == 0) {
226 if (release) 224 if (release)
227 qib_put_mr(sge->mr); 225 atomic_dec(&sge->mr->refcount);
228 if (--ss->num_sge) 226 if (--ss->num_sge)
229 *sge = *ss->sg_list++; 227 *sge = *ss->sg_list++;
230 } else if (sge->length == 0 && sge->mr->lkey) { 228 } else if (sge->length == 0 && sge->mr->lkey) {
@@ -333,8 +331,7 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
333 * @qp: the QP to post on 331 * @qp: the QP to post on
334 * @wr: the work request to send 332 * @wr: the work request to send
335 */ 333 */
336static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr, 334static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
337 int *scheduled)
338{ 335{
339 struct qib_swqe *wqe; 336 struct qib_swqe *wqe;
340 u32 next; 337 u32 next;
@@ -436,17 +433,11 @@ bail_inval_free:
436 while (j) { 433 while (j) {
437 struct qib_sge *sge = &wqe->sg_list[--j]; 434 struct qib_sge *sge = &wqe->sg_list[--j];
438 435
439 qib_put_mr(sge->mr); 436 atomic_dec(&sge->mr->refcount);
440 } 437 }
441bail_inval: 438bail_inval:
442 ret = -EINVAL; 439 ret = -EINVAL;
443bail: 440bail:
444 if (!ret && !wr->next &&
445 !qib_sdma_empty(
446 dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
447 qib_schedule_send(qp);
448 *scheduled = 1;
449 }
450 spin_unlock_irqrestore(&qp->s_lock, flags); 441 spin_unlock_irqrestore(&qp->s_lock, flags);
451 return ret; 442 return ret;
452} 443}
@@ -464,10 +455,9 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
464{ 455{
465 struct qib_qp *qp = to_iqp(ibqp); 456 struct qib_qp *qp = to_iqp(ibqp);
466 int err = 0; 457 int err = 0;
467 int scheduled = 0;
468 458
469 for (; wr; wr = wr->next) { 459 for (; wr; wr = wr->next) {
470 err = qib_post_one_send(qp, wr, &scheduled); 460 err = qib_post_one_send(qp, wr);
471 if (err) { 461 if (err) {
472 *bad_wr = wr; 462 *bad_wr = wr;
473 goto bail; 463 goto bail;
@@ -475,8 +465,7 @@ static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
475 } 465 }
476 466
477 /* Try to do the send work in the caller's context. */ 467 /* Try to do the send work in the caller's context. */
478 if (!scheduled) 468 qib_do_send(&qp->s_work);
479 qib_do_send(&qp->s_work);
480 469
481bail: 470bail:
482 return err; 471 return err;
@@ -670,25 +659,17 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
670 if (atomic_dec_return(&mcast->refcount) <= 1) 659 if (atomic_dec_return(&mcast->refcount) <= 1)
671 wake_up(&mcast->wait); 660 wake_up(&mcast->wait);
672 } else { 661 } else {
673 if (rcd->lookaside_qp) { 662 qp = qib_lookup_qpn(ibp, qp_num);
674 if (rcd->lookaside_qpn != qp_num) { 663 if (!qp)
675 if (atomic_dec_and_test( 664 goto drop;
676 &rcd->lookaside_qp->refcount))
677 wake_up(
678 &rcd->lookaside_qp->wait);
679 rcd->lookaside_qp = NULL;
680 }
681 }
682 if (!rcd->lookaside_qp) {
683 qp = qib_lookup_qpn(ibp, qp_num);
684 if (!qp)
685 goto drop;
686 rcd->lookaside_qp = qp;
687 rcd->lookaside_qpn = qp_num;
688 } else
689 qp = rcd->lookaside_qp;
690 ibp->n_unicast_rcv++; 665 ibp->n_unicast_rcv++;
691 qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp); 666 qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
667 /*
668 * Notify qib_destroy_qp() if it is waiting
669 * for us to finish.
670 */
671 if (atomic_dec_and_test(&qp->refcount))
672 wake_up(&qp->wait);
692 } 673 }
693 return; 674 return;
694 675
@@ -922,8 +903,8 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
922 __raw_writel(last, piobuf); 903 __raw_writel(last, piobuf);
923} 904}
924 905
925static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, 906static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
926 struct qib_qp *qp) 907 struct qib_qp *qp, int *retp)
927{ 908{
928 struct qib_verbs_txreq *tx; 909 struct qib_verbs_txreq *tx;
929 unsigned long flags; 910 unsigned long flags;
@@ -935,9 +916,8 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
935 struct list_head *l = dev->txreq_free.next; 916 struct list_head *l = dev->txreq_free.next;
936 917
937 list_del(l); 918 list_del(l);
938 spin_unlock(&dev->pending_lock);
939 spin_unlock_irqrestore(&qp->s_lock, flags);
940 tx = list_entry(l, struct qib_verbs_txreq, txreq.list); 919 tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
920 *retp = 0;
941 } else { 921 } else {
942 if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && 922 if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
943 list_empty(&qp->iowait)) { 923 list_empty(&qp->iowait)) {
@@ -945,33 +925,14 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
945 qp->s_flags |= QIB_S_WAIT_TX; 925 qp->s_flags |= QIB_S_WAIT_TX;
946 list_add_tail(&qp->iowait, &dev->txwait); 926 list_add_tail(&qp->iowait, &dev->txwait);
947 } 927 }
928 tx = NULL;
948 qp->s_flags &= ~QIB_S_BUSY; 929 qp->s_flags &= ~QIB_S_BUSY;
949 spin_unlock(&dev->pending_lock); 930 *retp = -EBUSY;
950 spin_unlock_irqrestore(&qp->s_lock, flags);
951 tx = ERR_PTR(-EBUSY);
952 } 931 }
953 return tx;
954}
955
956static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
957 struct qib_qp *qp)
958{
959 struct qib_verbs_txreq *tx;
960 unsigned long flags;
961 932
962 spin_lock_irqsave(&dev->pending_lock, flags); 933 spin_unlock(&dev->pending_lock);
963 /* assume the list non empty */ 934 spin_unlock_irqrestore(&qp->s_lock, flags);
964 if (likely(!list_empty(&dev->txreq_free))) {
965 struct list_head *l = dev->txreq_free.next;
966 935
967 list_del(l);
968 spin_unlock_irqrestore(&dev->pending_lock, flags);
969 tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
970 } else {
971 /* call slow path to get the extra lock */
972 spin_unlock_irqrestore(&dev->pending_lock, flags);
973 tx = __get_txreq(dev, qp);
974 }
975 return tx; 936 return tx;
976} 937}
977 938
@@ -987,7 +948,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
987 if (atomic_dec_and_test(&qp->refcount)) 948 if (atomic_dec_and_test(&qp->refcount))
988 wake_up(&qp->wait); 949 wake_up(&qp->wait);
989 if (tx->mr) { 950 if (tx->mr) {
990 qib_put_mr(tx->mr); 951 atomic_dec(&tx->mr->refcount);
991 tx->mr = NULL; 952 tx->mr = NULL;
992 } 953 }
993 if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) { 954 if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1151,9 +1112,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
1151 goto bail; 1112 goto bail;
1152 } 1113 }
1153 1114
1154 tx = get_txreq(dev, qp); 1115 tx = get_txreq(dev, qp, &ret);
1155 if (IS_ERR(tx)) 1116 if (!tx)
1156 goto bail_tx; 1117 goto bail;
1157 1118
1158 control = dd->f_setpbc_control(ppd, plen, qp->s_srate, 1119 control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
1159 be16_to_cpu(hdr->lrh[0]) >> 12); 1120 be16_to_cpu(hdr->lrh[0]) >> 12);
@@ -1224,9 +1185,6 @@ unaligned:
1224 ibp->n_unaligned++; 1185 ibp->n_unaligned++;
1225bail: 1186bail:
1226 return ret; 1187 return ret;
1227bail_tx:
1228 ret = PTR_ERR(tx);
1229 goto bail;
1230} 1188}
1231 1189
1232/* 1190/*
@@ -1345,7 +1303,7 @@ done:
1345 } 1303 }
1346 qib_sendbuf_done(dd, pbufn); 1304 qib_sendbuf_done(dd, pbufn);
1347 if (qp->s_rdma_mr) { 1305 if (qp->s_rdma_mr) {
1348 qib_put_mr(qp->s_rdma_mr); 1306 atomic_dec(&qp->s_rdma_mr->refcount);
1349 qp->s_rdma_mr = NULL; 1307 qp->s_rdma_mr = NULL;
1350 } 1308 }
1351 if (qp->s_wqe) { 1309 if (qp->s_wqe) {
@@ -1854,23 +1812,6 @@ bail:
1854 return ret; 1812 return ret;
1855} 1813}
1856 1814
1857struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
1858{
1859 struct ib_ah_attr attr;
1860 struct ib_ah *ah = ERR_PTR(-EINVAL);
1861 struct qib_qp *qp0;
1862
1863 memset(&attr, 0, sizeof attr);
1864 attr.dlid = dlid;
1865 attr.port_num = ppd_from_ibp(ibp)->port;
1866 rcu_read_lock();
1867 qp0 = rcu_dereference(ibp->qp0);
1868 if (qp0)
1869 ah = ib_create_ah(qp0->ibqp.pd, &attr);
1870 rcu_read_unlock();
1871 return ah;
1872}
1873
1874/** 1815/**
1875 * qib_destroy_ah - destroy an address handle 1816 * qib_destroy_ah - destroy an address handle
1876 * @ibah: the AH to destroy 1817 * @ibah: the AH to destroy
@@ -2033,8 +1974,6 @@ static void init_ibport(struct qib_pportdata *ppd)
2033 ibp->z_excessive_buffer_overrun_errors = 1974 ibp->z_excessive_buffer_overrun_errors =
2034 cntrs.excessive_buffer_overrun_errors; 1975 cntrs.excessive_buffer_overrun_errors;
2035 ibp->z_vl15_dropped = cntrs.vl15_dropped; 1976 ibp->z_vl15_dropped = cntrs.vl15_dropped;
2036 RCU_INIT_POINTER(ibp->qp0, NULL);
2037 RCU_INIT_POINTER(ibp->qp1, NULL);
2038} 1977}
2039 1978
2040/** 1979/**
@@ -2051,15 +1990,12 @@ int qib_register_ib_device(struct qib_devdata *dd)
2051 int ret; 1990 int ret;
2052 1991
2053 dev->qp_table_size = ib_qib_qp_table_size; 1992 dev->qp_table_size = ib_qib_qp_table_size;
2054 get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd)); 1993 dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
2055 dev->qp_table = kmalloc(dev->qp_table_size * sizeof *dev->qp_table,
2056 GFP_KERNEL); 1994 GFP_KERNEL);
2057 if (!dev->qp_table) { 1995 if (!dev->qp_table) {
2058 ret = -ENOMEM; 1996 ret = -ENOMEM;
2059 goto err_qpt; 1997 goto err_qpt;
2060 } 1998 }
2061 for (i = 0; i < dev->qp_table_size; i++)
2062 RCU_INIT_POINTER(dev->qp_table[i], NULL);
2063 1999
2064 for (i = 0; i < dd->num_pports; i++) 2000 for (i = 0; i < dd->num_pports; i++)
2065 init_ibport(ppd + i); 2001 init_ibport(ppd + i);
@@ -2086,15 +2022,13 @@ int qib_register_ib_device(struct qib_devdata *dd)
2086 spin_lock_init(&dev->lk_table.lock); 2022 spin_lock_init(&dev->lk_table.lock);
2087 dev->lk_table.max = 1 << ib_qib_lkey_table_size; 2023 dev->lk_table.max = 1 << ib_qib_lkey_table_size;
2088 lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table); 2024 lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
2089 dev->lk_table.table = (struct qib_mregion __rcu **) 2025 dev->lk_table.table = (struct qib_mregion **)
2090 __get_free_pages(GFP_KERNEL, get_order(lk_tab_size)); 2026 __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
2091 if (dev->lk_table.table == NULL) { 2027 if (dev->lk_table.table == NULL) {
2092 ret = -ENOMEM; 2028 ret = -ENOMEM;
2093 goto err_lk; 2029 goto err_lk;
2094 } 2030 }
2095 RCU_INIT_POINTER(dev->dma_mr, NULL); 2031 memset(dev->lk_table.table, 0, lk_tab_size);
2096 for (i = 0; i < dev->lk_table.max; i++)
2097 RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
2098 INIT_LIST_HEAD(&dev->pending_mmaps); 2032 INIT_LIST_HEAD(&dev->pending_mmaps);
2099 spin_lock_init(&dev->pending_lock); 2033 spin_lock_init(&dev->pending_lock);
2100 dev->mmap_offset = PAGE_SIZE; 2034 dev->mmap_offset = PAGE_SIZE;
@@ -2224,7 +2158,7 @@ int qib_register_ib_device(struct qib_devdata *dd)
2224 ibdev->dma_ops = &qib_dma_mapping_ops; 2158 ibdev->dma_ops = &qib_dma_mapping_ops;
2225 2159
2226 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc), 2160 snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
2227 "QLogic Infiniband HCA %s", init_utsname()->nodename); 2161 QIB_IDSTR " %s", init_utsname()->nodename);
2228 2162
2229 ret = ib_register_device(ibdev, qib_create_port_files); 2163 ret = ib_register_device(ibdev, qib_create_port_files);
2230 if (ret) 2164 if (ret)
@@ -2317,17 +2251,3 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
2317 get_order(lk_tab_size)); 2251 get_order(lk_tab_size));
2318 kfree(dev->qp_table); 2252 kfree(dev->qp_table);
2319} 2253}
2320
2321/*
2322 * This must be called with s_lock held.
2323 */
2324void qib_schedule_send(struct qib_qp *qp)
2325{
2326 if (qib_send_ok(qp)) {
2327 struct qib_ibport *ibp =
2328 to_iport(qp->ibqp.device, qp->port_num);
2329 struct qib_pportdata *ppd = ppd_from_ibp(ibp);
2330
2331 queue_work(ppd->qib_wq, &qp->s_work);
2332 }
2333}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index aff8b2c1788..95e5b47223b 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved. 3 * All rights reserved.
4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved. 4 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * 5 *
6 * This software is available to you under a choice of one of two 6 * This software is available to you under a choice of one of two
@@ -41,7 +41,6 @@
41#include <linux/interrupt.h> 41#include <linux/interrupt.h>
42#include <linux/kref.h> 42#include <linux/kref.h>
43#include <linux/workqueue.h> 43#include <linux/workqueue.h>
44#include <linux/completion.h>
45#include <rdma/ib_pack.h> 44#include <rdma/ib_pack.h>
46#include <rdma/ib_user_verbs.h> 45#include <rdma/ib_user_verbs.h>
47 46
@@ -303,9 +302,6 @@ struct qib_mregion {
303 u32 max_segs; /* number of qib_segs in all the arrays */ 302 u32 max_segs; /* number of qib_segs in all the arrays */
304 u32 mapsz; /* size of the map array */ 303 u32 mapsz; /* size of the map array */
305 u8 page_shift; /* 0 - non unform/non powerof2 sizes */ 304 u8 page_shift; /* 0 - non unform/non powerof2 sizes */
306 u8 lkey_published; /* in global table */
307 struct completion comp; /* complete when refcount goes to zero */
308 struct rcu_head list;
309 atomic_t refcount; 305 atomic_t refcount;
310 struct qib_segarray *map[0]; /* the segments */ 306 struct qib_segarray *map[0]; /* the segments */
311}; 307};
@@ -371,10 +367,9 @@ struct qib_rwq {
371 367
372struct qib_rq { 368struct qib_rq {
373 struct qib_rwq *wq; 369 struct qib_rwq *wq;
370 spinlock_t lock; /* protect changes in this struct */
374 u32 size; /* size of RWQE array */ 371 u32 size; /* size of RWQE array */
375 u8 max_sge; 372 u8 max_sge;
376 spinlock_t lock /* protect changes in this struct */
377 ____cacheline_aligned_in_smp;
378}; 373};
379 374
380struct qib_srq { 375struct qib_srq {
@@ -417,75 +412,31 @@ struct qib_ack_entry {
417 */ 412 */
418struct qib_qp { 413struct qib_qp {
419 struct ib_qp ibqp; 414 struct ib_qp ibqp;
420 /* read mostly fields above and below */ 415 struct qib_qp *next; /* link list for QPN hash table */
416 struct qib_qp *timer_next; /* link list for qib_ib_timer() */
417 struct list_head iowait; /* link for wait PIO buf */
418 struct list_head rspwait; /* link for waititing to respond */
421 struct ib_ah_attr remote_ah_attr; 419 struct ib_ah_attr remote_ah_attr;
422 struct ib_ah_attr alt_ah_attr; 420 struct ib_ah_attr alt_ah_attr;
423 struct qib_qp __rcu *next; /* link list for QPN hash table */ 421 struct qib_ib_header s_hdr; /* next packet header to send */
424 struct qib_swqe *s_wq; /* send work queue */ 422 atomic_t refcount;
425 struct qib_mmap_info *ip;
426 struct qib_ib_header *s_hdr; /* next packet header to send */
427 unsigned long timeout_jiffies; /* computed from timeout */
428
429 enum ib_mtu path_mtu;
430 u32 remote_qpn;
431 u32 pmtu; /* decoded from path_mtu */
432 u32 qkey; /* QKEY for this QP (for UD or RD) */
433 u32 s_size; /* send work queue size */
434 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
435
436 u8 state; /* QP state */
437 u8 qp_access_flags;
438 u8 alt_timeout; /* Alternate path timeout for this QP */
439 u8 timeout; /* Timeout for this QP */
440 u8 s_srate;
441 u8 s_mig_state;
442 u8 port_num;
443 u8 s_pkey_index; /* PKEY index to use */
444 u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
445 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
446 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
447 u8 s_retry_cnt; /* number of times to retry */
448 u8 s_rnr_retry_cnt;
449 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
450 u8 s_max_sge; /* size of s_wq->sg_list */
451 u8 s_draining;
452
453 /* start of read/write fields */
454
455 atomic_t refcount ____cacheline_aligned_in_smp;
456 wait_queue_head_t wait; 423 wait_queue_head_t wait;
457 424 wait_queue_head_t wait_dma;
458 425 struct timer_list s_timer;
459 struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1] 426 struct work_struct s_work;
460 ____cacheline_aligned_in_smp; 427 struct qib_mmap_info *ip;
461 struct qib_sge_state s_rdma_read_sge;
462
463 spinlock_t r_lock ____cacheline_aligned_in_smp; /* used for APM */
464 unsigned long r_aflags;
465 u64 r_wr_id; /* ID for current receive WQE */
466 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
467 u32 r_len; /* total length of r_sge */
468 u32 r_rcv_len; /* receive data len processed */
469 u32 r_psn; /* expected rcv packet sequence number */
470 u32 r_msn; /* message sequence number */
471
472 u8 r_state; /* opcode of last packet received */
473 u8 r_flags;
474 u8 r_head_ack_queue; /* index into s_ack_queue[] */
475
476 struct list_head rspwait; /* link for waititing to respond */
477
478 struct qib_sge_state r_sge; /* current receive data */
479 struct qib_rq r_rq; /* receive work queue */
480
481 spinlock_t s_lock ____cacheline_aligned_in_smp;
482 struct qib_sge_state *s_cur_sge; 428 struct qib_sge_state *s_cur_sge;
483 u32 s_flags;
484 struct qib_verbs_txreq *s_tx; 429 struct qib_verbs_txreq *s_tx;
485 struct qib_swqe *s_wqe;
486 struct qib_sge_state s_sge; /* current send request data */
487 struct qib_mregion *s_rdma_mr; 430 struct qib_mregion *s_rdma_mr;
431 struct qib_sge_state s_sge; /* current send request data */
432 struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
433 struct qib_sge_state s_ack_rdma_sge;
434 struct qib_sge_state s_rdma_read_sge;
435 struct qib_sge_state r_sge; /* current receive data */
436 spinlock_t r_lock; /* used for APM */
437 spinlock_t s_lock;
488 atomic_t s_dma_busy; 438 atomic_t s_dma_busy;
439 u32 s_flags;
489 u32 s_cur_size; /* size of send packet in bytes */ 440 u32 s_cur_size; /* size of send packet in bytes */
490 u32 s_len; /* total length of s_sge */ 441 u32 s_len; /* total length of s_sge */
491 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */ 442 u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
@@ -496,34 +447,58 @@ struct qib_qp {
496 u32 s_psn; /* current packet sequence number */ 447 u32 s_psn; /* current packet sequence number */
497 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */ 448 u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
498 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */ 449 u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
499 u32 s_head; /* new entries added here */ 450 u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
500 u32 s_tail; /* next entry to process */ 451 u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
501 u32 s_cur; /* current work queue entry */ 452 u64 r_wr_id; /* ID for current receive WQE */
502 u32 s_acked; /* last un-ACK'ed entry */ 453 unsigned long r_aflags;
503 u32 s_last; /* last completed entry */ 454 u32 r_len; /* total length of r_sge */
504 u32 s_ssn; /* SSN of tail entry */ 455 u32 r_rcv_len; /* receive data len processed */
505 u32 s_lsn; /* limit sequence number (credit) */ 456 u32 r_psn; /* expected rcv packet sequence number */
457 u32 r_msn; /* message sequence number */
506 u16 s_hdrwords; /* size of s_hdr in 32 bit words */ 458 u16 s_hdrwords; /* size of s_hdr in 32 bit words */
507 u16 s_rdma_ack_cnt; 459 u16 s_rdma_ack_cnt;
460 u8 state; /* QP state */
508 u8 s_state; /* opcode of last packet sent */ 461 u8 s_state; /* opcode of last packet sent */
509 u8 s_ack_state; /* opcode of packet to ACK */ 462 u8 s_ack_state; /* opcode of packet to ACK */
510 u8 s_nak_state; /* non-zero if NAK is pending */ 463 u8 s_nak_state; /* non-zero if NAK is pending */
464 u8 r_state; /* opcode of last packet received */
511 u8 r_nak_state; /* non-zero if NAK is pending */ 465 u8 r_nak_state; /* non-zero if NAK is pending */
466 u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
467 u8 r_flags;
468 u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
469 u8 r_head_ack_queue; /* index into s_ack_queue[] */
470 u8 qp_access_flags;
471 u8 s_max_sge; /* size of s_wq->sg_list */
472 u8 s_retry_cnt; /* number of times to retry */
473 u8 s_rnr_retry_cnt;
512 u8 s_retry; /* requester retry counter */ 474 u8 s_retry; /* requester retry counter */
513 u8 s_rnr_retry; /* requester RNR retry counter */ 475 u8 s_rnr_retry; /* requester RNR retry counter */
476 u8 s_pkey_index; /* PKEY index to use */
477 u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
478 u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
514 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */ 479 u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
515 u8 s_tail_ack_queue; /* index into s_ack_queue[] */ 480 u8 s_tail_ack_queue; /* index into s_ack_queue[] */
516 481 u8 s_srate;
517 struct qib_sge_state s_ack_rdma_sge; 482 u8 s_draining;
518 struct timer_list s_timer; 483 u8 s_mig_state;
519 struct list_head iowait; /* link for wait PIO buf */ 484 u8 timeout; /* Timeout for this QP */
520 485 u8 alt_timeout; /* Alternate path timeout for this QP */
521 struct work_struct s_work; 486 u8 port_num;
522 487 enum ib_mtu path_mtu;
523 wait_queue_head_t wait_dma; 488 u32 remote_qpn;
524 489 u32 qkey; /* QKEY for this QP (for UD or RD) */
525 struct qib_sge r_sg_list[0] /* verified SGEs */ 490 u32 s_size; /* send work queue size */
526 ____cacheline_aligned_in_smp; 491 u32 s_head; /* new entries added here */
492 u32 s_tail; /* next entry to process */
493 u32 s_cur; /* current work queue entry */
494 u32 s_acked; /* last un-ACK'ed entry */
495 u32 s_last; /* last completed entry */
496 u32 s_ssn; /* SSN of tail entry */
497 u32 s_lsn; /* limit sequence number (credit) */
498 struct qib_swqe *s_wq; /* send work queue */
499 struct qib_swqe *s_wqe;
500 struct qib_rq r_rq; /* receive work queue */
501 struct qib_sge r_sg_list[0]; /* verified SGEs */
527}; 502};
528 503
529/* 504/*
@@ -650,7 +625,7 @@ struct qib_lkey_table {
650 u32 next; /* next unused index (speeds search) */ 625 u32 next; /* next unused index (speeds search) */
651 u32 gen; /* generation count */ 626 u32 gen; /* generation count */
652 u32 max; /* size of the table */ 627 u32 max; /* size of the table */
653 struct qib_mregion __rcu **table; 628 struct qib_mregion **table;
654}; 629};
655 630
656struct qib_opcode_stats { 631struct qib_opcode_stats {
@@ -659,8 +634,8 @@ struct qib_opcode_stats {
659}; 634};
660 635
661struct qib_ibport { 636struct qib_ibport {
662 struct qib_qp __rcu *qp0; 637 struct qib_qp *qp0;
663 struct qib_qp __rcu *qp1; 638 struct qib_qp *qp1;
664 struct ib_mad_agent *send_agent; /* agent for SMI (traps) */ 639 struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
665 struct qib_ah *sm_ah; 640 struct qib_ah *sm_ah;
666 struct qib_ah *smi_ah; 641 struct qib_ah *smi_ah;
@@ -727,13 +702,12 @@ struct qib_ibport {
727 struct qib_opcode_stats opstats[128]; 702 struct qib_opcode_stats opstats[128];
728}; 703};
729 704
730
731struct qib_ibdev { 705struct qib_ibdev {
732 struct ib_device ibdev; 706 struct ib_device ibdev;
733 struct list_head pending_mmaps; 707 struct list_head pending_mmaps;
734 spinlock_t mmap_offset_lock; /* protect mmap_offset */ 708 spinlock_t mmap_offset_lock; /* protect mmap_offset */
735 u32 mmap_offset; 709 u32 mmap_offset;
736 struct qib_mregion __rcu *dma_mr; 710 struct qib_mregion *dma_mr;
737 711
738 /* QP numbers are shared by all IB ports */ 712 /* QP numbers are shared by all IB ports */
739 struct qib_qpn_table qpn_table; 713 struct qib_qpn_table qpn_table;
@@ -744,13 +718,12 @@ struct qib_ibdev {
744 struct list_head memwait; /* list for wait kernel memory */ 718 struct list_head memwait; /* list for wait kernel memory */
745 struct list_head txreq_free; 719 struct list_head txreq_free;
746 struct timer_list mem_timer; 720 struct timer_list mem_timer;
747 struct qib_qp __rcu **qp_table; 721 struct qib_qp **qp_table;
748 struct qib_pio_header *pio_hdrs; 722 struct qib_pio_header *pio_hdrs;
749 dma_addr_t pio_hdrs_phys; 723 dma_addr_t pio_hdrs_phys;
750 /* list of QPs waiting for RNR timer */ 724 /* list of QPs waiting for RNR timer */
751 spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */ 725 spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
752 u32 qp_table_size; /* size of the hash table */ 726 unsigned qp_table_size; /* size of the hash table */
753 u32 qp_rnd; /* random bytes for hash */
754 spinlock_t qpt_lock; 727 spinlock_t qpt_lock;
755 728
756 u32 n_piowait; 729 u32 n_piowait;
@@ -837,7 +810,11 @@ extern struct workqueue_struct *qib_cq_wq;
837/* 810/*
838 * This must be called with s_lock held. 811 * This must be called with s_lock held.
839 */ 812 */
840void qib_schedule_send(struct qib_qp *qp); 813static inline void qib_schedule_send(struct qib_qp *qp)
814{
815 if (qib_send_ok(qp))
816 queue_work(ib_wq, &qp->s_work);
817}
841 818
842static inline int qib_pkey_ok(u16 pkey1, u16 pkey2) 819static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
843{ 820{
@@ -934,8 +911,6 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
934 911
935int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); 912int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
936 913
937struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
938
939void qib_rc_rnr_retry(unsigned long arg); 914void qib_rc_rnr_retry(unsigned long arg);
940 915
941void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr); 916void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
@@ -947,9 +922,9 @@ int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
947void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, 922void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
948 int has_grh, void *data, u32 tlen, struct qib_qp *qp); 923 int has_grh, void *data, u32 tlen, struct qib_qp *qp);
949 924
950int qib_alloc_lkey(struct qib_mregion *mr, int dma_region); 925int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
951 926
952void qib_free_lkey(struct qib_mregion *mr); 927int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
953 928
954int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd, 929int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
955 struct qib_sge *isge, struct ib_sge *sge, int acc); 930 struct qib_sge *isge, struct ib_sge *sge, int acc);
@@ -1017,29 +992,6 @@ int qib_unmap_fmr(struct list_head *fmr_list);
1017 992
1018int qib_dealloc_fmr(struct ib_fmr *ibfmr); 993int qib_dealloc_fmr(struct ib_fmr *ibfmr);
1019 994
1020static inline void qib_get_mr(struct qib_mregion *mr)
1021{
1022 atomic_inc(&mr->refcount);
1023}
1024
1025void mr_rcu_callback(struct rcu_head *list);
1026
1027static inline void qib_put_mr(struct qib_mregion *mr)
1028{
1029 if (unlikely(atomic_dec_and_test(&mr->refcount)))
1030 call_rcu(&mr->list, mr_rcu_callback);
1031}
1032
1033static inline void qib_put_ss(struct qib_sge_state *ss)
1034{
1035 while (ss->num_sge) {
1036 qib_put_mr(ss->sge.mr);
1037 if (--ss->num_sge)
1038 ss->sge = *ss->sg_list++;
1039 }
1040}
1041
1042
1043void qib_release_mmap_info(struct kref *ref); 995void qib_release_mmap_info(struct kref *ref);
1044 996
1045struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size, 997struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
index 1d7281c5a02..561b8bca406 100644
--- a/drivers/infiniband/hw/qib/qib_wc_x86_64.c
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -1,6 +1,5 @@
1/* 1/*
2 * Copyright (c) 2012 Intel Corporation. All rights reserved. 2 * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2006 - 2012 QLogic Corporation. All rights reserved.
4 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. 3 * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
5 * 4 *
6 * This software is available to you under a choice of one of two 5 * This software is available to you under a choice of one of two
@@ -103,10 +102,10 @@ int qib_enable_wc(struct qib_devdata *dd)
103 u64 atmp; 102 u64 atmp;
104 atmp = pioaddr & ~(piolen - 1); 103 atmp = pioaddr & ~(piolen - 1);
105 if (atmp < addr || (atmp + piolen) > (addr + len)) { 104 if (atmp < addr || (atmp + piolen) > (addr + len)) {
106 qib_dev_err(dd, 105 qib_dev_err(dd, "No way to align address/size "
107 "No way to align address/size (%llx/%llx), no WC mtrr\n", 106 "(%llx/%llx), no WC mtrr\n",
108 (unsigned long long) atmp, 107 (unsigned long long) atmp,
109 (unsigned long long) piolen << 1); 108 (unsigned long long) piolen << 1);
110 ret = -ENODEV; 109 ret = -ENODEV;
111 } else { 110 } else {
112 pioaddr = atmp; 111 pioaddr = atmp;
@@ -121,7 +120,8 @@ int qib_enable_wc(struct qib_devdata *dd)
121 if (cookie < 0) { 120 if (cookie < 0) {
122 { 121 {
123 qib_devinfo(dd->pcidev, 122 qib_devinfo(dd->pcidev,
124 "mtrr_add() WC for PIO bufs failed (%d)\n", 123 "mtrr_add() WC for PIO bufs "
124 "failed (%d)\n",
125 cookie); 125 cookie);
126 ret = -EINVAL; 126 ret = -EINVAL;
127 } 127 }
diff --git a/drivers/infiniband/ulp/ipoib/Makefile b/drivers/infiniband/ulp/ipoib/Makefile
index e5430dd5076..3090100f0de 100644
--- a/drivers/infiniband/ulp/ipoib/Makefile
+++ b/drivers/infiniband/ulp/ipoib/Makefile
@@ -5,8 +5,7 @@ ib_ipoib-y := ipoib_main.o \
5 ipoib_multicast.o \ 5 ipoib_multicast.o \
6 ipoib_verbs.o \ 6 ipoib_verbs.o \
7 ipoib_vlan.o \ 7 ipoib_vlan.o \
8 ipoib_ethtool.o \ 8 ipoib_ethtool.o
9 ipoib_netlink.o
10ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o 9ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_CM) += ipoib_cm.o
11ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o 10ib_ipoib-$(CONFIG_INFINIBAND_IPOIB_DEBUG) += ipoib_fs.o
12 11
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 07ca6fd5546..b3cc1e062b1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -44,7 +44,6 @@
44#include <linux/mutex.h> 44#include <linux/mutex.h>
45 45
46#include <net/neighbour.h> 46#include <net/neighbour.h>
47#include <net/sch_generic.h>
48 47
49#include <linux/atomic.h> 48#include <linux/atomic.h>
50 49
@@ -92,8 +91,6 @@ enum {
92 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
93 IPOIB_FLAG_ADMIN_CM = 9, 92 IPOIB_FLAG_ADMIN_CM = 9,
94 IPOIB_FLAG_UMCAST = 10, 93 IPOIB_FLAG_UMCAST = 10,
95 IPOIB_STOP_NEIGH_GC = 11,
96 IPOIB_NEIGH_TBL_FLUSH = 12,
97 94
98 IPOIB_MAX_BACKOFF_SECONDS = 16, 95 IPOIB_MAX_BACKOFF_SECONDS = 16,
99 96
@@ -104,10 +101,6 @@ enum {
104 101
105 MAX_SEND_CQE = 16, 102 MAX_SEND_CQE = 16,
106 IPOIB_CM_COPYBREAK = 256, 103 IPOIB_CM_COPYBREAK = 256,
107
108 IPOIB_NON_CHILD = 0,
109 IPOIB_LEGACY_CHILD = 1,
110 IPOIB_RTNL_CHILD = 2,
111}; 104};
112 105
113#define IPOIB_OP_RECV (1ul << 31) 106#define IPOIB_OP_RECV (1ul << 31)
@@ -124,9 +117,8 @@ struct ipoib_header {
124 u16 reserved; 117 u16 reserved;
125}; 118};
126 119
127struct ipoib_cb { 120struct ipoib_pseudoheader {
128 struct qdisc_skb_cb qdisc_cb; 121 u8 hwaddr[INFINIBAND_ALEN];
129 u8 hwaddr[INFINIBAND_ALEN];
130}; 122};
131 123
132/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ 124/* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */
@@ -266,23 +258,6 @@ struct ipoib_ethtool_st {
266 u16 max_coalesced_frames; 258 u16 max_coalesced_frames;
267}; 259};
268 260
269struct ipoib_neigh_table;
270
271struct ipoib_neigh_hash {
272 struct ipoib_neigh_table *ntbl;
273 struct ipoib_neigh __rcu **buckets;
274 struct rcu_head rcu;
275 u32 mask;
276 u32 size;
277};
278
279struct ipoib_neigh_table {
280 struct ipoib_neigh_hash __rcu *htbl;
281 atomic_t entries;
282 struct completion flushed;
283 struct completion deleted;
284};
285
286/* 261/*
287 * Device private locking: network stack tx_lock protects members used 262 * Device private locking: network stack tx_lock protects members used
288 * in TX fast path, lock protects everything else. lock nests inside 263 * in TX fast path, lock protects everything else. lock nests inside
@@ -302,8 +277,6 @@ struct ipoib_dev_priv {
302 struct rb_root path_tree; 277 struct rb_root path_tree;
303 struct list_head path_list; 278 struct list_head path_list;
304 279
305 struct ipoib_neigh_table ntbl;
306
307 struct ipoib_mcast *broadcast; 280 struct ipoib_mcast *broadcast;
308 struct list_head multicast_list; 281 struct list_head multicast_list;
309 struct rb_root multicast_tree; 282 struct rb_root multicast_tree;
@@ -316,7 +289,7 @@ struct ipoib_dev_priv {
316 struct work_struct flush_heavy; 289 struct work_struct flush_heavy;
317 struct work_struct restart_task; 290 struct work_struct restart_task;
318 struct delayed_work ah_reap_task; 291 struct delayed_work ah_reap_task;
319 struct delayed_work neigh_reap_task; 292
320 struct ib_device *ca; 293 struct ib_device *ca;
321 u8 port; 294 u8 port;
322 u16 pkey; 295 u16 pkey;
@@ -357,7 +330,6 @@ struct ipoib_dev_priv {
357 struct net_device *parent; 330 struct net_device *parent;
358 struct list_head child_intfs; 331 struct list_head child_intfs;
359 struct list_head list; 332 struct list_head list;
360 int child_type;
361 333
362#ifdef CONFIG_INFINIBAND_IPOIB_CM 334#ifdef CONFIG_INFINIBAND_IPOIB_CM
363 struct ipoib_cm_dev_priv cm; 335 struct ipoib_cm_dev_priv cm;
@@ -403,16 +375,13 @@ struct ipoib_neigh {
403#ifdef CONFIG_INFINIBAND_IPOIB_CM 375#ifdef CONFIG_INFINIBAND_IPOIB_CM
404 struct ipoib_cm_tx *cm; 376 struct ipoib_cm_tx *cm;
405#endif 377#endif
406 u8 daddr[INFINIBAND_ALEN]; 378 union ib_gid dgid;
407 struct sk_buff_head queue; 379 struct sk_buff_head queue;
408 380
381 struct neighbour *neighbour;
409 struct net_device *dev; 382 struct net_device *dev;
410 383
411 struct list_head list; 384 struct list_head list;
412 struct ipoib_neigh __rcu *hnext;
413 struct rcu_head rcu;
414 atomic_t refcnt;
415 unsigned long alive;
416}; 385};
417 386
418#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN) 387#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
@@ -423,17 +392,21 @@ static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
423 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE; 392 return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
424} 393}
425 394
426void ipoib_neigh_dtor(struct ipoib_neigh *neigh); 395/*
427static inline void ipoib_neigh_put(struct ipoib_neigh *neigh) 396 * We stash a pointer to our private neighbour information after our
397 * hardware address in neigh->ha. The ALIGN() expression here makes
398 * sure that this pointer is stored aligned so that an unaligned
399 * load is not needed to dereference it.
400 */
401static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh)
428{ 402{
429 if (atomic_dec_and_test(&neigh->refcnt)) 403 return (void*) neigh + ALIGN(offsetof(struct neighbour, ha) +
430 ipoib_neigh_dtor(neigh); 404 INFINIBAND_ALEN, sizeof(void *));
431} 405}
432struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr); 406
433struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, 407struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh,
434 struct net_device *dev); 408 struct net_device *dev);
435void ipoib_neigh_free(struct ipoib_neigh *neigh); 409void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh);
436void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid);
437 410
438extern struct workqueue_struct *ipoib_workqueue; 411extern struct workqueue_struct *ipoib_workqueue;
439 412
@@ -450,6 +423,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
450{ 423{
451 kref_put(&ah->ref, ipoib_free_ah); 424 kref_put(&ah->ref, ipoib_free_ah);
452} 425}
426
453int ipoib_open(struct net_device *dev); 427int ipoib_open(struct net_device *dev);
454int ipoib_add_pkey_attr(struct net_device *dev); 428int ipoib_add_pkey_attr(struct net_device *dev);
455int ipoib_add_umcast_attr(struct net_device *dev); 429int ipoib_add_umcast_attr(struct net_device *dev);
@@ -479,7 +453,7 @@ void ipoib_dev_cleanup(struct net_device *dev);
479 453
480void ipoib_mcast_join_task(struct work_struct *work); 454void ipoib_mcast_join_task(struct work_struct *work);
481void ipoib_mcast_carrier_on_task(struct work_struct *work); 455void ipoib_mcast_carrier_on_task(struct work_struct *work);
482void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); 456void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb);
483 457
484void ipoib_mcast_restart_task(struct work_struct *work); 458void ipoib_mcast_restart_task(struct work_struct *work);
485int ipoib_mcast_start_thread(struct net_device *dev); 459int ipoib_mcast_start_thread(struct net_device *dev);
@@ -517,17 +491,6 @@ void ipoib_event(struct ib_event_handler *handler,
517int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey); 491int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey);
518int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey); 492int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey);
519 493
520int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
521 u16 pkey, int child_type);
522
523int __init ipoib_netlink_init(void);
524void __exit ipoib_netlink_fini(void);
525
526void ipoib_set_umcast(struct net_device *ndev, int umcast_val);
527int ipoib_set_mode(struct net_device *dev, const char *buf);
528
529void ipoib_setup(struct net_device *dev);
530
531void ipoib_pkey_poll(struct work_struct *work); 494void ipoib_pkey_poll(struct work_struct *work);
532int ipoib_pkey_dev_delay_open(struct net_device *dev); 495int ipoib_pkey_dev_delay_open(struct net_device *dev);
533void ipoib_drain_cq(struct net_device *dev); 496void ipoib_drain_cq(struct net_device *dev);
@@ -535,14 +498,14 @@ void ipoib_drain_cq(struct net_device *dev);
535void ipoib_set_ethtool_ops(struct net_device *dev); 498void ipoib_set_ethtool_ops(struct net_device *dev);
536int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); 499int ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
537 500
501#ifdef CONFIG_INFINIBAND_IPOIB_CM
502
538#define IPOIB_FLAGS_RC 0x80 503#define IPOIB_FLAGS_RC 0x80
539#define IPOIB_FLAGS_UC 0x40 504#define IPOIB_FLAGS_UC 0x40
540 505
541/* We don't support UC connections at the moment */ 506/* We don't support UC connections at the moment */
542#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC)) 507#define IPOIB_CM_SUPPORTED(ha) (ha[0] & (IPOIB_FLAGS_RC))
543 508
544#ifdef CONFIG_INFINIBAND_IPOIB_CM
545
546extern int ipoib_max_conn_qp; 509extern int ipoib_max_conn_qp;
547 510
548static inline int ipoib_cm_admin_enabled(struct net_device *dev) 511static inline int ipoib_cm_admin_enabled(struct net_device *dev)
@@ -552,10 +515,10 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
552 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 515 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
553} 516}
554 517
555static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) 518static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
556{ 519{
557 struct ipoib_dev_priv *priv = netdev_priv(dev); 520 struct ipoib_dev_priv *priv = netdev_priv(dev);
558 return IPOIB_CM_SUPPORTED(hwaddr) && 521 return IPOIB_CM_SUPPORTED(n->ha) &&
559 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags); 522 test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
560} 523}
561 524
@@ -610,7 +573,7 @@ static inline int ipoib_cm_admin_enabled(struct net_device *dev)
610{ 573{
611 return 0; 574 return 0;
612} 575}
613static inline int ipoib_cm_enabled(struct net_device *dev, u8 *hwaddr) 576static inline int ipoib_cm_enabled(struct net_device *dev, struct neighbour *n)
614 577
615{ 578{
616 return 0; 579 return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 03103d2bd64..39913a065f9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -37,7 +37,6 @@
37#include <linux/delay.h> 37#include <linux/delay.h>
38#include <linux/slab.h> 38#include <linux/slab.h>
39#include <linux/vmalloc.h> 39#include <linux/vmalloc.h>
40#include <linux/moduleparam.h>
41 40
42#include "ipoib.h" 41#include "ipoib.h"
43 42
@@ -85,7 +84,7 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
85 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 84 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
86 85
87 for (i = 0; i < frags; ++i) 86 for (i = 0; i < frags; ++i)
88 ib_dma_unmap_page(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE); 87 ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
89} 88}
90 89
91static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) 90static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
@@ -170,7 +169,7 @@ static struct sk_buff *ipoib_cm_alloc_rx_skb(struct net_device *dev,
170 goto partial_error; 169 goto partial_error;
171 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE); 170 skb_fill_page_desc(skb, i, page, 0, PAGE_SIZE);
172 171
173 mapping[i + 1] = ib_dma_map_page(priv->ca, page, 172 mapping[i + 1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[i].page,
174 0, PAGE_SIZE, DMA_FROM_DEVICE); 173 0, PAGE_SIZE, DMA_FROM_DEVICE);
175 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1]))) 174 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[i + 1])))
176 goto partial_error; 175 goto partial_error;
@@ -184,7 +183,7 @@ partial_error:
184 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE); 183 ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_CM_HEAD_SIZE, DMA_FROM_DEVICE);
185 184
186 for (; i > 0; --i) 185 for (; i > 0; --i)
187 ib_dma_unmap_page(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE); 186 ib_dma_unmap_single(priv->ca, mapping[i], PAGE_SIZE, DMA_FROM_DEVICE);
188 187
189 dev_kfree_skb_any(skb); 188 dev_kfree_skb_any(skb);
190 return NULL; 189 return NULL;
@@ -538,13 +537,12 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
538 537
539 if (length == 0) { 538 if (length == 0) {
540 /* don't need this page */ 539 /* don't need this page */
541 skb_fill_page_desc(toskb, i, skb_frag_page(frag), 540 skb_fill_page_desc(toskb, i, frag->page, 0, PAGE_SIZE);
542 0, PAGE_SIZE);
543 --skb_shinfo(skb)->nr_frags; 541 --skb_shinfo(skb)->nr_frags;
544 } else { 542 } else {
545 size = min(length, (unsigned) PAGE_SIZE); 543 size = min(length, (unsigned) PAGE_SIZE);
546 544
547 skb_frag_size_set(frag, size); 545 frag->size = size;
548 skb->data_len += size; 546 skb->data_len += size;
549 skb->truesize += size; 547 skb->truesize += size;
550 skb->len += size; 548 skb->len += size;
@@ -752,9 +750,6 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
752 dev->trans_start = jiffies; 750 dev->trans_start = jiffies;
753 ++tx->tx_head; 751 ++tx->tx_head;
754 752
755 skb_orphan(skb);
756 skb_dst_drop(skb);
757
758 if (++priv->tx_outstanding == ipoib_sendq_size) { 753 if (++priv->tx_outstanding == ipoib_sendq_size) {
759 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", 754 ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
760 tx->qp->qp_num); 755 tx->qp->qp_num);
@@ -814,7 +809,9 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
814 if (neigh) { 809 if (neigh) {
815 neigh->cm = NULL; 810 neigh->cm = NULL;
816 list_del(&neigh->list); 811 list_del(&neigh->list);
817 ipoib_neigh_free(neigh); 812 if (neigh->ah)
813 ipoib_put_ah(neigh->ah);
814 ipoib_neigh_free(dev, neigh);
818 815
819 tx->neigh = NULL; 816 tx->neigh = NULL;
820 } 817 }
@@ -1231,7 +1228,9 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
1231 if (neigh) { 1228 if (neigh) {
1232 neigh->cm = NULL; 1229 neigh->cm = NULL;
1233 list_del(&neigh->list); 1230 list_del(&neigh->list);
1234 ipoib_neigh_free(neigh); 1231 if (neigh->ah)
1232 ipoib_put_ah(neigh->ah);
1233 ipoib_neigh_free(dev, neigh);
1235 1234
1236 tx->neigh = NULL; 1235 tx->neigh = NULL;
1237 } 1236 }
@@ -1274,15 +1273,12 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
1274void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) 1273void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
1275{ 1274{
1276 struct ipoib_dev_priv *priv = netdev_priv(tx->dev); 1275 struct ipoib_dev_priv *priv = netdev_priv(tx->dev);
1277 unsigned long flags;
1278 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) { 1276 if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
1279 spin_lock_irqsave(&priv->lock, flags);
1280 list_move(&tx->list, &priv->cm.reap_list); 1277 list_move(&tx->list, &priv->cm.reap_list);
1281 queue_work(ipoib_workqueue, &priv->cm.reap_task); 1278 queue_work(ipoib_workqueue, &priv->cm.reap_task);
1282 ipoib_dbg(priv, "Reap connection for gid %pI6\n", 1279 ipoib_dbg(priv, "Reap connection for gid %pI6\n",
1283 tx->neigh->daddr + 4); 1280 tx->neigh->dgid.raw);
1284 tx->neigh = NULL; 1281 tx->neigh = NULL;
1285 spin_unlock_irqrestore(&priv->lock, flags);
1286 } 1282 }
1287} 1283}
1288 1284
@@ -1306,7 +1302,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1306 p = list_entry(priv->cm.start_list.next, typeof(*p), list); 1302 p = list_entry(priv->cm.start_list.next, typeof(*p), list);
1307 list_del_init(&p->list); 1303 list_del_init(&p->list);
1308 neigh = p->neigh; 1304 neigh = p->neigh;
1309 qpn = IPOIB_QPN(neigh->daddr); 1305 qpn = IPOIB_QPN(neigh->neighbour->ha);
1310 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); 1306 memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
1311 1307
1312 spin_unlock_irqrestore(&priv->lock, flags); 1308 spin_unlock_irqrestore(&priv->lock, flags);
@@ -1322,7 +1318,9 @@ static void ipoib_cm_tx_start(struct work_struct *work)
1322 if (neigh) { 1318 if (neigh) {
1323 neigh->cm = NULL; 1319 neigh->cm = NULL;
1324 list_del(&neigh->list); 1320 list_del(&neigh->list);
1325 ipoib_neigh_free(neigh); 1321 if (neigh->ah)
1322 ipoib_put_ah(neigh->ah);
1323 ipoib_neigh_free(dev, neigh);
1326 } 1324 }
1327 list_del(&p->list); 1325 list_del(&p->list);
1328 kfree(p); 1326 kfree(p);
@@ -1376,7 +1374,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
1376 1374
1377 if (skb->protocol == htons(ETH_P_IP)) 1375 if (skb->protocol == htons(ETH_P_IP))
1378 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1376 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
1379#if IS_ENABLED(CONFIG_IPV6) 1377#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1380 else if (skb->protocol == htons(ETH_P_IPV6)) 1378 else if (skb->protocol == htons(ETH_P_IPV6))
1381 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1379 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1382#endif 1380#endif
@@ -1397,7 +1395,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
1397 int e = skb_queue_empty(&priv->cm.skb_queue); 1395 int e = skb_queue_empty(&priv->cm.skb_queue);
1398 1396
1399 if (skb_dst(skb)) 1397 if (skb_dst(skb))
1400 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); 1398 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
1401 1399
1402 skb_queue_tail(&priv->cm.skb_queue, skb); 1400 skb_queue_tail(&priv->cm.skb_queue, skb);
1403 if (e) 1401 if (e)
@@ -1455,19 +1453,36 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
1455 const char *buf, size_t count) 1453 const char *buf, size_t count)
1456{ 1454{
1457 struct net_device *dev = to_net_dev(d); 1455 struct net_device *dev = to_net_dev(d);
1458 int ret; 1456 struct ipoib_dev_priv *priv = netdev_priv(dev);
1459 1457
1460 if (!rtnl_trylock()) 1458 if (!rtnl_trylock())
1461 return restart_syscall(); 1459 return restart_syscall();
1462 1460
1463 ret = ipoib_set_mode(dev, buf); 1461 /* flush paths if we switch modes so that connections are restarted */
1462 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
1463 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1464 ipoib_warn(priv, "enabling connected mode "
1465 "will cause multicast packet drops\n");
1466 netdev_update_features(dev);
1467 rtnl_unlock();
1468 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
1464 1469
1465 rtnl_unlock(); 1470 ipoib_flush_paths(dev);
1471 return count;
1472 }
1473
1474 if (!strcmp(buf, "datagram\n")) {
1475 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
1476 netdev_update_features(dev);
1477 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
1478 rtnl_unlock();
1479 ipoib_flush_paths(dev);
1466 1480
1467 if (!ret)
1468 return count; 1481 return count;
1482 }
1483 rtnl_unlock();
1469 1484
1470 return ret; 1485 return -EINVAL;
1471} 1486}
1472 1487
1473static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode); 1488static DEVICE_ATTR(mode, S_IWUSR | S_IRUGO, show_mode, set_mode);
@@ -1481,7 +1496,6 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
1481{ 1496{
1482 struct ipoib_dev_priv *priv = netdev_priv(dev); 1497 struct ipoib_dev_priv *priv = netdev_priv(dev);
1483 struct ib_srq_init_attr srq_init_attr = { 1498 struct ib_srq_init_attr srq_init_attr = {
1484 .srq_type = IB_SRQT_BASIC,
1485 .attr = { 1499 .attr = {
1486 .max_wr = ipoib_recvq_size, 1500 .max_wr = ipoib_recvq_size,
1487 .max_sge = max_sge 1501 .max_sge = max_sge
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616..86eae229dc4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -37,7 +37,6 @@
37struct file_operations; 37struct file_operations;
38 38
39#include <linux/debugfs.h> 39#include <linux/debugfs.h>
40#include <linux/export.h>
41 40
42#include "ipoib.h" 41#include "ipoib.h"
43 42
@@ -213,15 +212,16 @@ static int ipoib_path_seq_show(struct seq_file *file, void *iter_ptr)
213 gid_buf, path.pathrec.dlid ? "yes" : "no"); 212 gid_buf, path.pathrec.dlid ? "yes" : "no");
214 213
215 if (path.pathrec.dlid) { 214 if (path.pathrec.dlid) {
216 rate = ib_rate_to_mbps(path.pathrec.rate); 215 rate = ib_rate_to_mult(path.pathrec.rate) * 25;
217 216
218 seq_printf(file, 217 seq_printf(file,
219 " DLID: 0x%04x\n" 218 " DLID: 0x%04x\n"
220 " SL: %12d\n" 219 " SL: %12d\n"
221 " rate: %8d.%d Gb/sec\n", 220 " rate: %*d%s Gb/sec\n",
222 be16_to_cpu(path.pathrec.dlid), 221 be16_to_cpu(path.pathrec.dlid),
223 path.pathrec.sl, 222 path.pathrec.sl,
224 rate / 1000, rate % 1000); 223 10 - ((rate % 10) ? 2 : 0),
224 rate / 10, rate % 10 ? ".5" : "");
225 } 225 }
226 226
227 seq_putc(file, '\n'); 227 seq_putc(file, '\n');
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index a1bca70e20a..81ae61d68a2 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -34,7 +34,6 @@
34 */ 34 */
35 35
36#include <linux/delay.h> 36#include <linux/delay.h>
37#include <linux/moduleparam.h>
38#include <linux/dma-mapping.h> 37#include <linux/dma-mapping.h>
39#include <linux/slab.h> 38#include <linux/slab.h>
40 39
@@ -57,24 +56,21 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
57 struct ib_pd *pd, struct ib_ah_attr *attr) 56 struct ib_pd *pd, struct ib_ah_attr *attr)
58{ 57{
59 struct ipoib_ah *ah; 58 struct ipoib_ah *ah;
60 struct ib_ah *vah;
61 59
62 ah = kmalloc(sizeof *ah, GFP_KERNEL); 60 ah = kmalloc(sizeof *ah, GFP_KERNEL);
63 if (!ah) 61 if (!ah)
64 return ERR_PTR(-ENOMEM); 62 return NULL;
65 63
66 ah->dev = dev; 64 ah->dev = dev;
67 ah->last_send = 0; 65 ah->last_send = 0;
68 kref_init(&ah->ref); 66 kref_init(&ah->ref);
69 67
70 vah = ib_create_ah(pd, attr); 68 ah->ah = ib_create_ah(pd, attr);
71 if (IS_ERR(vah)) { 69 if (IS_ERR(ah->ah)) {
72 kfree(ah); 70 kfree(ah);
73 ah = (struct ipoib_ah *)vah; 71 ah = NULL;
74 } else { 72 } else
75 ah->ah = vah;
76 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); 73 ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah);
77 }
78 74
79 return ah; 75 return ah;
80} 76}
@@ -121,9 +117,9 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
121 117
122 size = length - IPOIB_UD_HEAD_SIZE; 118 size = length - IPOIB_UD_HEAD_SIZE;
123 119
124 skb_frag_size_set(frag, size); 120 frag->size = size;
125 skb->data_len += size; 121 skb->data_len += size;
126 skb->truesize += PAGE_SIZE; 122 skb->truesize += size;
127 } else 123 } else
128 skb_put(skb, length); 124 skb_put(skb, length);
129 125
@@ -156,18 +152,14 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
156 struct ipoib_dev_priv *priv = netdev_priv(dev); 152 struct ipoib_dev_priv *priv = netdev_priv(dev);
157 struct sk_buff *skb; 153 struct sk_buff *skb;
158 int buf_size; 154 int buf_size;
159 int tailroom;
160 u64 *mapping; 155 u64 *mapping;
161 156
162 if (ipoib_ud_need_sg(priv->max_ib_mtu)) { 157 if (ipoib_ud_need_sg(priv->max_ib_mtu))
163 buf_size = IPOIB_UD_HEAD_SIZE; 158 buf_size = IPOIB_UD_HEAD_SIZE;
164 tailroom = 128; /* reserve some tailroom for IP/TCP headers */ 159 else
165 } else {
166 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu); 160 buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
167 tailroom = 0;
168 }
169 161
170 skb = dev_alloc_skb(buf_size + tailroom + 4); 162 skb = dev_alloc_skb(buf_size + 4);
171 if (unlikely(!skb)) 163 if (unlikely(!skb))
172 return NULL; 164 return NULL;
173 165
@@ -190,7 +182,7 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
190 goto partial_error; 182 goto partial_error;
191 skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE); 183 skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
192 mapping[1] = 184 mapping[1] =
193 ib_dma_map_page(priv->ca, page, 185 ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page,
194 0, PAGE_SIZE, DMA_FROM_DEVICE); 186 0, PAGE_SIZE, DMA_FROM_DEVICE);
195 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1]))) 187 if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
196 goto partial_error; 188 goto partial_error;
@@ -300,8 +292,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
300 dev->stats.rx_bytes += skb->len; 292 dev->stats.rx_bytes += skb->len;
301 293
302 skb->dev = dev; 294 skb->dev = dev;
303 if ((dev->features & NETIF_F_RXCSUM) && 295 if ((dev->features & NETIF_F_RXCSUM) && likely(wc->csum_ok))
304 likely(wc->wc_flags & IB_WC_IP_CSUM_OK))
305 skb->ip_summed = CHECKSUM_UNNECESSARY; 296 skb->ip_summed = CHECKSUM_UNNECESSARY;
306 297
307 napi_gro_receive(&priv->napi, skb); 298 napi_gro_receive(&priv->napi, skb);
@@ -331,10 +322,9 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
331 off = 0; 322 off = 0;
332 323
333 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 324 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
334 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 325 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
335 mapping[i + off] = ib_dma_map_page(ca, 326 mapping[i + off] = ib_dma_map_page(ca, frag->page,
336 skb_frag_page(frag), 327 frag->page_offset, frag->size,
337 frag->page_offset, skb_frag_size(frag),
338 DMA_TO_DEVICE); 328 DMA_TO_DEVICE);
339 if (unlikely(ib_dma_mapping_error(ca, mapping[i + off]))) 329 if (unlikely(ib_dma_mapping_error(ca, mapping[i + off])))
340 goto partial_error; 330 goto partial_error;
@@ -343,9 +333,8 @@ static int ipoib_dma_map_tx(struct ib_device *ca,
343 333
344partial_error: 334partial_error:
345 for (; i > 0; --i) { 335 for (; i > 0; --i) {
346 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; 336 skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
347 337 ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE);
348 ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE);
349 } 338 }
350 339
351 if (off) 340 if (off)
@@ -369,9 +358,8 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
369 off = 0; 358 off = 0;
370 359
371 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { 360 for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
372 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; 361 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
373 362 ib_dma_unmap_page(ca, mapping[i + off], frag->size,
374 ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag),
375 DMA_TO_DEVICE); 363 DMA_TO_DEVICE);
376 } 364 }
377} 365}
@@ -521,7 +509,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
521 509
522 for (i = 0; i < nr_frags; ++i) { 510 for (i = 0; i < nr_frags; ++i) {
523 priv->tx_sge[i + off].addr = mapping[i + off]; 511 priv->tx_sge[i + off].addr = mapping[i + off];
524 priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); 512 priv->tx_sge[i + off].length = frags[i].size;
525 } 513 }
526 priv->tx_wr.num_sge = nr_frags + off; 514 priv->tx_wr.num_sge = nr_frags + off;
527 priv->tx_wr.wr_id = wr_id; 515 priv->tx_wr.wr_id = wr_id;
@@ -615,9 +603,8 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
615 603
616 address->last_send = priv->tx_head; 604 address->last_send = priv->tx_head;
617 ++priv->tx_head; 605 ++priv->tx_head;
618
619 skb_orphan(skb); 606 skb_orphan(skb);
620 skb_dst_drop(skb); 607
621 } 608 }
622 609
623 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE)) 610 if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 6fdc9e78da0..a98c414978e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -46,8 +46,7 @@
46#include <linux/ip.h> 46#include <linux/ip.h>
47#include <linux/in.h> 47#include <linux/in.h>
48 48
49#include <linux/jhash.h> 49#include <net/dst.h>
50#include <net/arp.h>
51 50
52MODULE_AUTHOR("Roland Dreier"); 51MODULE_AUTHOR("Roland Dreier");
53MODULE_DESCRIPTION("IP-over-InfiniBand net driver"); 52MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
@@ -85,7 +84,6 @@ struct ib_sa_client ipoib_sa_client;
85 84
86static void ipoib_add_one(struct ib_device *device); 85static void ipoib_add_one(struct ib_device *device);
87static void ipoib_remove_one(struct ib_device *device); 86static void ipoib_remove_one(struct ib_device *device);
88static void ipoib_neigh_reclaim(struct rcu_head *rp);
89 87
90static struct ib_client ipoib_client = { 88static struct ib_client ipoib_client = {
91 .name = "ipoib", 89 .name = "ipoib",
@@ -150,7 +148,7 @@ static int ipoib_stop(struct net_device *dev)
150 148
151 netif_stop_queue(dev); 149 netif_stop_queue(dev);
152 150
153 ipoib_ib_dev_down(dev, 1); 151 ipoib_ib_dev_down(dev, 0);
154 ipoib_ib_dev_stop(dev, 0); 152 ipoib_ib_dev_stop(dev, 0);
155 153
156 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 154 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
@@ -173,12 +171,7 @@ static int ipoib_stop(struct net_device *dev)
173 return 0; 171 return 0;
174} 172}
175 173
176static void ipoib_uninit(struct net_device *dev) 174static u32 ipoib_fix_features(struct net_device *dev, u32 features)
177{
178 ipoib_dev_cleanup(dev);
179}
180
181static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
182{ 175{
183 struct ipoib_dev_priv *priv = netdev_priv(dev); 176 struct ipoib_dev_priv *priv = netdev_priv(dev);
184 177
@@ -215,37 +208,6 @@ static int ipoib_change_mtu(struct net_device *dev, int new_mtu)
215 return 0; 208 return 0;
216} 209}
217 210
218int ipoib_set_mode(struct net_device *dev, const char *buf)
219{
220 struct ipoib_dev_priv *priv = netdev_priv(dev);
221
222 /* flush paths if we switch modes so that connections are restarted */
223 if (IPOIB_CM_SUPPORTED(dev->dev_addr) && !strcmp(buf, "connected\n")) {
224 set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
225 ipoib_warn(priv, "enabling connected mode "
226 "will cause multicast packet drops\n");
227 netdev_update_features(dev);
228 rtnl_unlock();
229 priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
230
231 ipoib_flush_paths(dev);
232 rtnl_lock();
233 return 0;
234 }
235
236 if (!strcmp(buf, "datagram\n")) {
237 clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
238 netdev_update_features(dev);
239 dev_set_mtu(dev, min(priv->mcast_mtu, dev->mtu));
240 rtnl_unlock();
241 ipoib_flush_paths(dev);
242 rtnl_lock();
243 return 0;
244 }
245
246 return -EINVAL;
247}
248
249static struct ipoib_path *__path_find(struct net_device *dev, void *gid) 211static struct ipoib_path *__path_find(struct net_device *dev, void *gid)
250{ 212{
251 struct ipoib_dev_priv *priv = netdev_priv(dev); 213 struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -302,15 +264,30 @@ static int __path_add(struct net_device *dev, struct ipoib_path *path)
302 264
303static void path_free(struct net_device *dev, struct ipoib_path *path) 265static void path_free(struct net_device *dev, struct ipoib_path *path)
304{ 266{
267 struct ipoib_dev_priv *priv = netdev_priv(dev);
268 struct ipoib_neigh *neigh, *tn;
305 struct sk_buff *skb; 269 struct sk_buff *skb;
270 unsigned long flags;
306 271
307 while ((skb = __skb_dequeue(&path->queue))) 272 while ((skb = __skb_dequeue(&path->queue)))
308 dev_kfree_skb_irq(skb); 273 dev_kfree_skb_irq(skb);
309 274
310 ipoib_dbg(netdev_priv(dev), "path_free\n"); 275 spin_lock_irqsave(&priv->lock, flags);
276
277 list_for_each_entry_safe(neigh, tn, &path->neigh_list, list) {
278 /*
279 * It's safe to call ipoib_put_ah() inside priv->lock
280 * here, because we know that path->ah will always
281 * hold one more reference, so ipoib_put_ah() will
282 * never do more than decrement the ref count.
283 */
284 if (neigh->ah)
285 ipoib_put_ah(neigh->ah);
286
287 ipoib_neigh_free(dev, neigh);
288 }
311 289
312 /* remove all neigh connected to this path */ 290 spin_unlock_irqrestore(&priv->lock, flags);
313 ipoib_del_neighs_by_gid(dev, path->pathrec.dgid.raw);
314 291
315 if (path->ah) 292 if (path->ah)
316 ipoib_put_ah(path->ah); 293 ipoib_put_ah(path->ah);
@@ -455,7 +432,7 @@ static void path_rec_completion(int status,
455 432
456 spin_lock_irqsave(&priv->lock, flags); 433 spin_lock_irqsave(&priv->lock, flags);
457 434
458 if (!IS_ERR_OR_NULL(ah)) { 435 if (ah) {
459 path->pathrec = *pathrec; 436 path->pathrec = *pathrec;
460 437
461 old_ah = path->ah; 438 old_ah = path->ah;
@@ -481,15 +458,19 @@ static void path_rec_completion(int status,
481 } 458 }
482 kref_get(&path->ah->ref); 459 kref_get(&path->ah->ref);
483 neigh->ah = path->ah; 460 neigh->ah = path->ah;
461 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
462 sizeof(union ib_gid));
484 463
485 if (ipoib_cm_enabled(dev, neigh->daddr)) { 464 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
486 if (!ipoib_cm_get(neigh)) 465 if (!ipoib_cm_get(neigh))
487 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, 466 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev,
488 path, 467 path,
489 neigh)); 468 neigh));
490 if (!ipoib_cm_get(neigh)) { 469 if (!ipoib_cm_get(neigh)) {
491 list_del(&neigh->list); 470 list_del(&neigh->list);
492 ipoib_neigh_free(neigh); 471 if (neigh->ah)
472 ipoib_put_ah(neigh->ah);
473 ipoib_neigh_free(dev, neigh);
493 continue; 474 continue;
494 } 475 }
495 } 476 }
@@ -574,26 +555,28 @@ static int path_rec_start(struct net_device *dev,
574 return 0; 555 return 0;
575} 556}
576 557
577static void neigh_add_path(struct sk_buff *skb, u8 *daddr, 558/* called with rcu_read_lock */
578 struct net_device *dev) 559static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
579{ 560{
580 struct ipoib_dev_priv *priv = netdev_priv(dev); 561 struct ipoib_dev_priv *priv = netdev_priv(dev);
581 struct ipoib_path *path; 562 struct ipoib_path *path;
582 struct ipoib_neigh *neigh; 563 struct ipoib_neigh *neigh;
564 struct neighbour *n;
583 unsigned long flags; 565 unsigned long flags;
584 566
585 spin_lock_irqsave(&priv->lock, flags); 567 n = dst_get_neighbour(skb_dst(skb));
586 neigh = ipoib_neigh_alloc(daddr, dev); 568 neigh = ipoib_neigh_alloc(n, skb->dev);
587 if (!neigh) { 569 if (!neigh) {
588 spin_unlock_irqrestore(&priv->lock, flags);
589 ++dev->stats.tx_dropped; 570 ++dev->stats.tx_dropped;
590 dev_kfree_skb_any(skb); 571 dev_kfree_skb_any(skb);
591 return; 572 return;
592 } 573 }
593 574
594 path = __path_find(dev, daddr + 4); 575 spin_lock_irqsave(&priv->lock, flags);
576
577 path = __path_find(dev, n->ha + 4);
595 if (!path) { 578 if (!path) {
596 path = path_rec_create(dev, daddr + 4); 579 path = path_rec_create(dev, n->ha + 4);
597 if (!path) 580 if (!path)
598 goto err_path; 581 goto err_path;
599 582
@@ -605,13 +588,17 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
605 if (path->ah) { 588 if (path->ah) {
606 kref_get(&path->ah->ref); 589 kref_get(&path->ah->ref);
607 neigh->ah = path->ah; 590 neigh->ah = path->ah;
591 memcpy(&neigh->dgid.raw, &path->pathrec.dgid.raw,
592 sizeof(union ib_gid));
608 593
609 if (ipoib_cm_enabled(dev, neigh->daddr)) { 594 if (ipoib_cm_enabled(dev, neigh->neighbour)) {
610 if (!ipoib_cm_get(neigh)) 595 if (!ipoib_cm_get(neigh))
611 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh)); 596 ipoib_cm_set(neigh, ipoib_cm_create_tx(dev, path, neigh));
612 if (!ipoib_cm_get(neigh)) { 597 if (!ipoib_cm_get(neigh)) {
613 list_del(&neigh->list); 598 list_del(&neigh->list);
614 ipoib_neigh_free(neigh); 599 if (neigh->ah)
600 ipoib_put_ah(neigh->ah);
601 ipoib_neigh_free(dev, neigh);
615 goto err_drop; 602 goto err_drop;
616 } 603 }
617 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) 604 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
@@ -623,8 +610,7 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
623 } 610 }
624 } else { 611 } else {
625 spin_unlock_irqrestore(&priv->lock, flags); 612 spin_unlock_irqrestore(&priv->lock, flags);
626 ipoib_send(dev, skb, path->ah, IPOIB_QPN(daddr)); 613 ipoib_send(dev, skb, path->ah, IPOIB_QPN(n->ha));
627 ipoib_neigh_put(neigh);
628 return; 614 return;
629 } 615 }
630 } else { 616 } else {
@@ -637,24 +623,42 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
637 } 623 }
638 624
639 spin_unlock_irqrestore(&priv->lock, flags); 625 spin_unlock_irqrestore(&priv->lock, flags);
640 ipoib_neigh_put(neigh);
641 return; 626 return;
642 627
643err_list: 628err_list:
644 list_del(&neigh->list); 629 list_del(&neigh->list);
645 630
646err_path: 631err_path:
647 ipoib_neigh_free(neigh); 632 ipoib_neigh_free(dev, neigh);
648err_drop: 633err_drop:
649 ++dev->stats.tx_dropped; 634 ++dev->stats.tx_dropped;
650 dev_kfree_skb_any(skb); 635 dev_kfree_skb_any(skb);
651 636
652 spin_unlock_irqrestore(&priv->lock, flags); 637 spin_unlock_irqrestore(&priv->lock, flags);
653 ipoib_neigh_put(neigh); 638}
639
640/* called with rcu_read_lock */
641static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev)
642{
643 struct ipoib_dev_priv *priv = netdev_priv(skb->dev);
644 struct dst_entry *dst = skb_dst(skb);
645 struct neighbour *n;
646
647 /* Look up path record for unicasts */
648 n = dst_get_neighbour(dst);
649 if (n->ha[4] != 0xff) {
650 neigh_add_path(skb, dev);
651 return;
652 }
653
654 /* Add in the P_Key for multicasts */
655 n->ha[8] = (priv->pkey >> 8) & 0xff;
656 n->ha[9] = priv->pkey & 0xff;
657 ipoib_mcast_send(dev, n->ha + 4, skb);
654} 658}
655 659
656static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, 660static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
657 struct ipoib_cb *cb) 661 struct ipoib_pseudoheader *phdr)
658{ 662{
659 struct ipoib_dev_priv *priv = netdev_priv(dev); 663 struct ipoib_dev_priv *priv = netdev_priv(dev);
660 struct ipoib_path *path; 664 struct ipoib_path *path;
@@ -662,15 +666,17 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
662 666
663 spin_lock_irqsave(&priv->lock, flags); 667 spin_lock_irqsave(&priv->lock, flags);
664 668
665 path = __path_find(dev, cb->hwaddr + 4); 669 path = __path_find(dev, phdr->hwaddr + 4);
666 if (!path || !path->valid) { 670 if (!path || !path->valid) {
667 int new_path = 0; 671 int new_path = 0;
668 672
669 if (!path) { 673 if (!path) {
670 path = path_rec_create(dev, cb->hwaddr + 4); 674 path = path_rec_create(dev, phdr->hwaddr + 4);
671 new_path = 1; 675 new_path = 1;
672 } 676 }
673 if (path) { 677 if (path) {
678 /* put pseudoheader back on for next time */
679 skb_push(skb, sizeof *phdr);
674 __skb_queue_tail(&path->queue, skb); 680 __skb_queue_tail(&path->queue, skb);
675 681
676 if (!path->query && path_rec_start(dev, path)) { 682 if (!path->query && path_rec_start(dev, path)) {
@@ -694,10 +700,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
694 be16_to_cpu(path->pathrec.dlid)); 700 be16_to_cpu(path->pathrec.dlid));
695 701
696 spin_unlock_irqrestore(&priv->lock, flags); 702 spin_unlock_irqrestore(&priv->lock, flags);
697 ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); 703 ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
698 return; 704 return;
699 } else if ((path->query || !path_rec_start(dev, path)) && 705 } else if ((path->query || !path_rec_start(dev, path)) &&
700 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 706 skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
707 /* put pseudoheader back on for next time */
708 skb_push(skb, sizeof *phdr);
701 __skb_queue_tail(&path->queue, skb); 709 __skb_queue_tail(&path->queue, skb);
702 } else { 710 } else {
703 ++dev->stats.tx_dropped; 711 ++dev->stats.tx_dropped;
@@ -711,80 +719,91 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
711{ 719{
712 struct ipoib_dev_priv *priv = netdev_priv(dev); 720 struct ipoib_dev_priv *priv = netdev_priv(dev);
713 struct ipoib_neigh *neigh; 721 struct ipoib_neigh *neigh;
714 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 722 struct neighbour *n = NULL;
715 struct ipoib_header *header;
716 unsigned long flags; 723 unsigned long flags;
717 724
718 header = (struct ipoib_header *) skb->data; 725 rcu_read_lock();
726 if (likely(skb_dst(skb)))
727 n = dst_get_neighbour(skb_dst(skb));
719 728
720 if (unlikely(cb->hwaddr[4] == 0xff)) { 729 if (likely(n)) {
721 /* multicast, arrange "if" according to probability */ 730 if (unlikely(!*to_ipoib_neigh(n))) {
722 if ((header->proto != htons(ETH_P_IP)) && 731 ipoib_path_lookup(skb, dev);
723 (header->proto != htons(ETH_P_IPV6)) && 732 goto unlock;
724 (header->proto != htons(ETH_P_ARP)) &&
725 (header->proto != htons(ETH_P_RARP))) {
726 /* ethertype not supported by IPoIB */
727 ++dev->stats.tx_dropped;
728 dev_kfree_skb_any(skb);
729 return NETDEV_TX_OK;
730 } 733 }
731 /* Add in the P_Key for multicast*/
732 cb->hwaddr[8] = (priv->pkey >> 8) & 0xff;
733 cb->hwaddr[9] = priv->pkey & 0xff;
734
735 neigh = ipoib_neigh_get(dev, cb->hwaddr);
736 if (likely(neigh))
737 goto send_using_neigh;
738 ipoib_mcast_send(dev, cb->hwaddr, skb);
739 return NETDEV_TX_OK;
740 }
741 734
742 /* unicast, arrange "switch" according to probability */ 735 neigh = *to_ipoib_neigh(n);
743 switch (header->proto) { 736
744 case htons(ETH_P_IP): 737 if (unlikely((memcmp(&neigh->dgid.raw,
745 case htons(ETH_P_IPV6): 738 n->ha + 4,
746 neigh = ipoib_neigh_get(dev, cb->hwaddr); 739 sizeof(union ib_gid))) ||
747 if (unlikely(!neigh)) { 740 (neigh->dev != dev))) {
748 neigh_add_path(skb, cb->hwaddr, dev); 741 spin_lock_irqsave(&priv->lock, flags);
749 return NETDEV_TX_OK; 742 /*
743 * It's safe to call ipoib_put_ah() inside
744 * priv->lock here, because we know that
745 * path->ah will always hold one more reference,
746 * so ipoib_put_ah() will never do more than
747 * decrement the ref count.
748 */
749 if (neigh->ah)
750 ipoib_put_ah(neigh->ah);
751 list_del(&neigh->list);
752 ipoib_neigh_free(dev, neigh);
753 spin_unlock_irqrestore(&priv->lock, flags);
754 ipoib_path_lookup(skb, dev);
755 goto unlock;
750 } 756 }
751 break;
752 case htons(ETH_P_ARP):
753 case htons(ETH_P_RARP):
754 /* for unicast ARP and RARP should always perform path find */
755 unicast_arp_send(skb, dev, cb);
756 return NETDEV_TX_OK;
757 default:
758 /* ethertype not supported by IPoIB */
759 ++dev->stats.tx_dropped;
760 dev_kfree_skb_any(skb);
761 return NETDEV_TX_OK;
762 }
763 757
764send_using_neigh: 758 if (ipoib_cm_get(neigh)) {
765 /* note we now hold a ref to neigh */ 759 if (ipoib_cm_up(neigh)) {
766 if (ipoib_cm_get(neigh)) { 760 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh));
767 if (ipoib_cm_up(neigh)) { 761 goto unlock;
768 ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); 762 }
769 goto unref; 763 } else if (neigh->ah) {
764 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha));
765 goto unlock;
770 } 766 }
771 } else if (neigh->ah) {
772 ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(cb->hwaddr));
773 goto unref;
774 }
775 767
776 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { 768 if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
777 spin_lock_irqsave(&priv->lock, flags); 769 spin_lock_irqsave(&priv->lock, flags);
778 __skb_queue_tail(&neigh->queue, skb); 770 __skb_queue_tail(&neigh->queue, skb);
779 spin_unlock_irqrestore(&priv->lock, flags); 771 spin_unlock_irqrestore(&priv->lock, flags);
772 } else {
773 ++dev->stats.tx_dropped;
774 dev_kfree_skb_any(skb);
775 }
780 } else { 776 } else {
781 ++dev->stats.tx_dropped; 777 struct ipoib_pseudoheader *phdr =
782 dev_kfree_skb_any(skb); 778 (struct ipoib_pseudoheader *) skb->data;
783 } 779 skb_pull(skb, sizeof *phdr);
784 780
785unref: 781 if (phdr->hwaddr[4] == 0xff) {
786 ipoib_neigh_put(neigh); 782 /* Add in the P_Key for multicast*/
783 phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
784 phdr->hwaddr[9] = priv->pkey & 0xff;
785
786 ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
787 } else {
788 /* unicast GID -- should be ARP or RARP reply */
789
790 if ((be16_to_cpup((__be16 *) skb->data) != ETH_P_ARP) &&
791 (be16_to_cpup((__be16 *) skb->data) != ETH_P_RARP)) {
792 ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n",
793 skb_dst(skb) ? "neigh" : "dst",
794 be16_to_cpup((__be16 *) skb->data),
795 IPOIB_QPN(phdr->hwaddr),
796 phdr->hwaddr + 4);
797 dev_kfree_skb_any(skb);
798 ++dev->stats.tx_dropped;
799 goto unlock;
800 }
787 801
802 unicast_arp_send(skb, dev, phdr);
803 }
804 }
805unlock:
806 rcu_read_unlock();
788 return NETDEV_TX_OK; 807 return NETDEV_TX_OK;
789} 808}
790 809
@@ -806,7 +825,8 @@ static int ipoib_hard_header(struct sk_buff *skb,
806 const void *daddr, const void *saddr, unsigned len) 825 const void *daddr, const void *saddr, unsigned len)
807{ 826{
808 struct ipoib_header *header; 827 struct ipoib_header *header;
809 struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; 828 struct dst_entry *dst;
829 struct neighbour *n;
810 830
811 header = (struct ipoib_header *) skb_push(skb, sizeof *header); 831 header = (struct ipoib_header *) skb_push(skb, sizeof *header);
812 832
@@ -814,11 +834,19 @@ static int ipoib_hard_header(struct sk_buff *skb,
814 header->reserved = 0; 834 header->reserved = 0;
815 835
816 /* 836 /*
817 * we don't rely on dst_entry structure, always stuff the 837 * If we don't have a neighbour structure, stuff the
818 * destination address into skb->cb so we can figure out where 838 * destination address onto the front of the skb so we can
819 * to send the packet later. 839 * figure out where to send the packet later.
820 */ 840 */
821 memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); 841 dst = skb_dst(skb);
842 n = NULL;
843 if (dst)
844 n = dst_get_neighbour_raw(dst);
845 if ((!dst || !n) && daddr) {
846 struct ipoib_pseudoheader *phdr =
847 (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr);
848 memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN);
849 }
822 850
823 return 0; 851 return 0;
824} 852}
@@ -835,433 +863,86 @@ static void ipoib_set_mcast_list(struct net_device *dev)
835 queue_work(ipoib_workqueue, &priv->restart_task); 863 queue_work(ipoib_workqueue, &priv->restart_task);
836} 864}
837 865
838static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr) 866static void ipoib_neigh_cleanup(struct neighbour *n)
839{
840 /*
841 * Use only the address parts that contributes to spreading
842 * The subnet prefix is not used as one can not connect to
843 * same remote port (GUID) using the same remote QPN via two
844 * different subnets.
845 */
846 /* qpn octets[1:4) & port GUID octets[12:20) */
847 u32 *daddr_32 = (u32 *) daddr;
848 u32 hv;
849
850 hv = jhash_3words(daddr_32[3], daddr_32[4], 0xFFFFFF & daddr_32[0], 0);
851 return hv & htbl->mask;
852}
853
854struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
855{
856 struct ipoib_dev_priv *priv = netdev_priv(dev);
857 struct ipoib_neigh_table *ntbl = &priv->ntbl;
858 struct ipoib_neigh_hash *htbl;
859 struct ipoib_neigh *neigh = NULL;
860 u32 hash_val;
861
862 rcu_read_lock_bh();
863
864 htbl = rcu_dereference_bh(ntbl->htbl);
865
866 if (!htbl)
867 goto out_unlock;
868
869 hash_val = ipoib_addr_hash(htbl, daddr);
870 for (neigh = rcu_dereference_bh(htbl->buckets[hash_val]);
871 neigh != NULL;
872 neigh = rcu_dereference_bh(neigh->hnext)) {
873 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
874 /* found, take one ref on behalf of the caller */
875 if (!atomic_inc_not_zero(&neigh->refcnt)) {
876 /* deleted */
877 neigh = NULL;
878 goto out_unlock;
879 }
880 neigh->alive = jiffies;
881 goto out_unlock;
882 }
883 }
884
885out_unlock:
886 rcu_read_unlock_bh();
887 return neigh;
888}
889
890static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
891{ 867{
892 struct ipoib_neigh_table *ntbl = &priv->ntbl; 868 struct ipoib_neigh *neigh;
893 struct ipoib_neigh_hash *htbl; 869 struct ipoib_dev_priv *priv = netdev_priv(n->dev);
894 unsigned long neigh_obsolete;
895 unsigned long dt;
896 unsigned long flags; 870 unsigned long flags;
897 int i; 871 struct ipoib_ah *ah = NULL;
898 872
899 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) 873 neigh = *to_ipoib_neigh(n);
874 if (neigh)
875 priv = netdev_priv(neigh->dev);
876 else
900 return; 877 return;
878 ipoib_dbg(priv,
879 "neigh_cleanup for %06x %pI6\n",
880 IPOIB_QPN(n->ha),
881 n->ha + 4);
901 882
902 spin_lock_irqsave(&priv->lock, flags); 883 spin_lock_irqsave(&priv->lock, flags);
903 884
904 htbl = rcu_dereference_protected(ntbl->htbl, 885 if (neigh->ah)
905 lockdep_is_held(&priv->lock)); 886 ah = neigh->ah;
906 887 list_del(&neigh->list);
907 if (!htbl) 888 ipoib_neigh_free(n->dev, neigh);
908 goto out_unlock;
909
910 /* neigh is obsolete if it was idle for two GC periods */
911 dt = 2 * arp_tbl.gc_interval;
912 neigh_obsolete = jiffies - dt;
913 /* handle possible race condition */
914 if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
915 goto out_unlock;
916
917 for (i = 0; i < htbl->size; i++) {
918 struct ipoib_neigh *neigh;
919 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
920
921 while ((neigh = rcu_dereference_protected(*np,
922 lockdep_is_held(&priv->lock))) != NULL) {
923 /* was the neigh idle for two GC periods */
924 if (time_after(neigh_obsolete, neigh->alive)) {
925 rcu_assign_pointer(*np,
926 rcu_dereference_protected(neigh->hnext,
927 lockdep_is_held(&priv->lock)));
928 /* remove from path/mc list */
929 list_del(&neigh->list);
930 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
931 } else {
932 np = &neigh->hnext;
933 }
934
935 }
936 }
937 889
938out_unlock:
939 spin_unlock_irqrestore(&priv->lock, flags); 890 spin_unlock_irqrestore(&priv->lock, flags);
940}
941
942static void ipoib_reap_neigh(struct work_struct *work)
943{
944 struct ipoib_dev_priv *priv =
945 container_of(work, struct ipoib_dev_priv, neigh_reap_task.work);
946 891
947 __ipoib_reap_neigh(priv); 892 if (ah)
948 893 ipoib_put_ah(ah);
949 if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
950 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
951 arp_tbl.gc_interval);
952} 894}
953 895
954 896struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour,
955static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
956 struct net_device *dev) 897 struct net_device *dev)
957{ 898{
958 struct ipoib_neigh *neigh; 899 struct ipoib_neigh *neigh;
959 900
960 neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); 901 neigh = kmalloc(sizeof *neigh, GFP_ATOMIC);
961 if (!neigh) 902 if (!neigh)
962 return NULL; 903 return NULL;
963 904
905 neigh->neighbour = neighbour;
964 neigh->dev = dev; 906 neigh->dev = dev;
965 memcpy(&neigh->daddr, daddr, sizeof(neigh->daddr)); 907 memset(&neigh->dgid.raw, 0, sizeof (union ib_gid));
908 *to_ipoib_neigh(neighbour) = neigh;
966 skb_queue_head_init(&neigh->queue); 909 skb_queue_head_init(&neigh->queue);
967 INIT_LIST_HEAD(&neigh->list);
968 ipoib_cm_set(neigh, NULL); 910 ipoib_cm_set(neigh, NULL);
969 /* one ref on behalf of the caller */
970 atomic_set(&neigh->refcnt, 1);
971 911
972 return neigh; 912 return neigh;
973} 913}
974 914
975struct ipoib_neigh *ipoib_neigh_alloc(u8 *daddr, 915void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh)
976 struct net_device *dev)
977{ 916{
978 struct ipoib_dev_priv *priv = netdev_priv(dev);
979 struct ipoib_neigh_table *ntbl = &priv->ntbl;
980 struct ipoib_neigh_hash *htbl;
981 struct ipoib_neigh *neigh;
982 u32 hash_val;
983
984 htbl = rcu_dereference_protected(ntbl->htbl,
985 lockdep_is_held(&priv->lock));
986 if (!htbl) {
987 neigh = NULL;
988 goto out_unlock;
989 }
990
991 /* need to add a new neigh, but maybe some other thread succeeded?
992 * recalc hash, maybe hash resize took place so we do a search
993 */
994 hash_val = ipoib_addr_hash(htbl, daddr);
995 for (neigh = rcu_dereference_protected(htbl->buckets[hash_val],
996 lockdep_is_held(&priv->lock));
997 neigh != NULL;
998 neigh = rcu_dereference_protected(neigh->hnext,
999 lockdep_is_held(&priv->lock))) {
1000 if (memcmp(daddr, neigh->daddr, INFINIBAND_ALEN) == 0) {
1001 /* found, take one ref on behalf of the caller */
1002 if (!atomic_inc_not_zero(&neigh->refcnt)) {
1003 /* deleted */
1004 neigh = NULL;
1005 break;
1006 }
1007 neigh->alive = jiffies;
1008 goto out_unlock;
1009 }
1010 }
1011
1012 neigh = ipoib_neigh_ctor(daddr, dev);
1013 if (!neigh)
1014 goto out_unlock;
1015
1016 /* one ref on behalf of the hash table */
1017 atomic_inc(&neigh->refcnt);
1018 neigh->alive = jiffies;
1019 /* put in hash */
1020 rcu_assign_pointer(neigh->hnext,
1021 rcu_dereference_protected(htbl->buckets[hash_val],
1022 lockdep_is_held(&priv->lock)));
1023 rcu_assign_pointer(htbl->buckets[hash_val], neigh);
1024 atomic_inc(&ntbl->entries);
1025
1026out_unlock:
1027
1028 return neigh;
1029}
1030
1031void ipoib_neigh_dtor(struct ipoib_neigh *neigh)
1032{
1033 /* neigh reference count was dropprd to zero */
1034 struct net_device *dev = neigh->dev;
1035 struct ipoib_dev_priv *priv = netdev_priv(dev);
1036 struct sk_buff *skb; 917 struct sk_buff *skb;
1037 if (neigh->ah) 918 *to_ipoib_neigh(neigh->neighbour) = NULL;
1038 ipoib_put_ah(neigh->ah);
1039 while ((skb = __skb_dequeue(&neigh->queue))) { 919 while ((skb = __skb_dequeue(&neigh->queue))) {
1040 ++dev->stats.tx_dropped; 920 ++dev->stats.tx_dropped;
1041 dev_kfree_skb_any(skb); 921 dev_kfree_skb_any(skb);
1042 } 922 }
1043 if (ipoib_cm_get(neigh)) 923 if (ipoib_cm_get(neigh))
1044 ipoib_cm_destroy_tx(ipoib_cm_get(neigh)); 924 ipoib_cm_destroy_tx(ipoib_cm_get(neigh));
1045 ipoib_dbg(netdev_priv(dev),
1046 "neigh free for %06x %pI6\n",
1047 IPOIB_QPN(neigh->daddr),
1048 neigh->daddr + 4);
1049 kfree(neigh); 925 kfree(neigh);
1050 if (atomic_dec_and_test(&priv->ntbl.entries)) {
1051 if (test_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags))
1052 complete(&priv->ntbl.flushed);
1053 }
1054}
1055
1056static void ipoib_neigh_reclaim(struct rcu_head *rp)
1057{
1058 /* Called as a result of removal from hash table */
1059 struct ipoib_neigh *neigh = container_of(rp, struct ipoib_neigh, rcu);
1060 /* note TX context may hold another ref */
1061 ipoib_neigh_put(neigh);
1062}
1063
1064void ipoib_neigh_free(struct ipoib_neigh *neigh)
1065{
1066 struct net_device *dev = neigh->dev;
1067 struct ipoib_dev_priv *priv = netdev_priv(dev);
1068 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1069 struct ipoib_neigh_hash *htbl;
1070 struct ipoib_neigh __rcu **np;
1071 struct ipoib_neigh *n;
1072 u32 hash_val;
1073
1074 htbl = rcu_dereference_protected(ntbl->htbl,
1075 lockdep_is_held(&priv->lock));
1076 if (!htbl)
1077 return;
1078
1079 hash_val = ipoib_addr_hash(htbl, neigh->daddr);
1080 np = &htbl->buckets[hash_val];
1081 for (n = rcu_dereference_protected(*np,
1082 lockdep_is_held(&priv->lock));
1083 n != NULL;
1084 n = rcu_dereference_protected(*np,
1085 lockdep_is_held(&priv->lock))) {
1086 if (n == neigh) {
1087 /* found */
1088 rcu_assign_pointer(*np,
1089 rcu_dereference_protected(neigh->hnext,
1090 lockdep_is_held(&priv->lock)));
1091 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1092 return;
1093 } else {
1094 np = &n->hnext;
1095 }
1096 }
1097} 926}
1098 927
1099static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) 928static int ipoib_neigh_setup_dev(struct net_device *dev, struct neigh_parms *parms)
1100{ 929{
1101 struct ipoib_neigh_table *ntbl = &priv->ntbl; 930 parms->neigh_cleanup = ipoib_neigh_cleanup;
1102 struct ipoib_neigh_hash *htbl;
1103 struct ipoib_neigh **buckets;
1104 u32 size;
1105
1106 clear_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1107 ntbl->htbl = NULL;
1108 htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
1109 if (!htbl)
1110 return -ENOMEM;
1111 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1112 size = roundup_pow_of_two(arp_tbl.gc_thresh3);
1113 buckets = kzalloc(size * sizeof(*buckets), GFP_KERNEL);
1114 if (!buckets) {
1115 kfree(htbl);
1116 return -ENOMEM;
1117 }
1118 htbl->size = size;
1119 htbl->mask = (size - 1);
1120 htbl->buckets = buckets;
1121 ntbl->htbl = htbl;
1122 htbl->ntbl = ntbl;
1123 atomic_set(&ntbl->entries, 0);
1124
1125 /* start garbage collection */
1126 clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1127 queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
1128 arp_tbl.gc_interval);
1129 931
1130 return 0; 932 return 0;
1131} 933}
1132 934
1133static void neigh_hash_free_rcu(struct rcu_head *head)
1134{
1135 struct ipoib_neigh_hash *htbl = container_of(head,
1136 struct ipoib_neigh_hash,
1137 rcu);
1138 struct ipoib_neigh __rcu **buckets = htbl->buckets;
1139 struct ipoib_neigh_table *ntbl = htbl->ntbl;
1140
1141 kfree(buckets);
1142 kfree(htbl);
1143 complete(&ntbl->deleted);
1144}
1145
1146void ipoib_del_neighs_by_gid(struct net_device *dev, u8 *gid)
1147{
1148 struct ipoib_dev_priv *priv = netdev_priv(dev);
1149 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1150 struct ipoib_neigh_hash *htbl;
1151 unsigned long flags;
1152 int i;
1153
1154 /* remove all neigh connected to a given path or mcast */
1155 spin_lock_irqsave(&priv->lock, flags);
1156
1157 htbl = rcu_dereference_protected(ntbl->htbl,
1158 lockdep_is_held(&priv->lock));
1159
1160 if (!htbl)
1161 goto out_unlock;
1162
1163 for (i = 0; i < htbl->size; i++) {
1164 struct ipoib_neigh *neigh;
1165 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1166
1167 while ((neigh = rcu_dereference_protected(*np,
1168 lockdep_is_held(&priv->lock))) != NULL) {
1169 /* delete neighs belong to this parent */
1170 if (!memcmp(gid, neigh->daddr + 4, sizeof (union ib_gid))) {
1171 rcu_assign_pointer(*np,
1172 rcu_dereference_protected(neigh->hnext,
1173 lockdep_is_held(&priv->lock)));
1174 /* remove from parent list */
1175 list_del(&neigh->list);
1176 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1177 } else {
1178 np = &neigh->hnext;
1179 }
1180
1181 }
1182 }
1183out_unlock:
1184 spin_unlock_irqrestore(&priv->lock, flags);
1185}
1186
1187static void ipoib_flush_neighs(struct ipoib_dev_priv *priv)
1188{
1189 struct ipoib_neigh_table *ntbl = &priv->ntbl;
1190 struct ipoib_neigh_hash *htbl;
1191 unsigned long flags;
1192 int i, wait_flushed = 0;
1193
1194 init_completion(&priv->ntbl.flushed);
1195
1196 spin_lock_irqsave(&priv->lock, flags);
1197
1198 htbl = rcu_dereference_protected(ntbl->htbl,
1199 lockdep_is_held(&priv->lock));
1200 if (!htbl)
1201 goto out_unlock;
1202
1203 wait_flushed = atomic_read(&priv->ntbl.entries);
1204 if (!wait_flushed)
1205 goto free_htbl;
1206
1207 for (i = 0; i < htbl->size; i++) {
1208 struct ipoib_neigh *neigh;
1209 struct ipoib_neigh __rcu **np = &htbl->buckets[i];
1210
1211 while ((neigh = rcu_dereference_protected(*np,
1212 lockdep_is_held(&priv->lock))) != NULL) {
1213 rcu_assign_pointer(*np,
1214 rcu_dereference_protected(neigh->hnext,
1215 lockdep_is_held(&priv->lock)));
1216 /* remove from path/mc list */
1217 list_del(&neigh->list);
1218 call_rcu(&neigh->rcu, ipoib_neigh_reclaim);
1219 }
1220 }
1221
1222free_htbl:
1223 rcu_assign_pointer(ntbl->htbl, NULL);
1224 call_rcu(&htbl->rcu, neigh_hash_free_rcu);
1225
1226out_unlock:
1227 spin_unlock_irqrestore(&priv->lock, flags);
1228 if (wait_flushed)
1229 wait_for_completion(&priv->ntbl.flushed);
1230}
1231
1232static void ipoib_neigh_hash_uninit(struct net_device *dev)
1233{
1234 struct ipoib_dev_priv *priv = netdev_priv(dev);
1235 int stopped;
1236
1237 ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
1238 init_completion(&priv->ntbl.deleted);
1239 set_bit(IPOIB_NEIGH_TBL_FLUSH, &priv->flags);
1240
1241 /* Stop GC if called at init fail need to cancel work */
1242 stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1243 if (!stopped)
1244 cancel_delayed_work(&priv->neigh_reap_task);
1245
1246 ipoib_flush_neighs(priv);
1247
1248 wait_for_completion(&priv->ntbl.deleted);
1249}
1250
1251
1252int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) 935int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
1253{ 936{
1254 struct ipoib_dev_priv *priv = netdev_priv(dev); 937 struct ipoib_dev_priv *priv = netdev_priv(dev);
1255 938
1256 if (ipoib_neigh_hash_init(priv) < 0)
1257 goto out;
1258 /* Allocate RX/TX "rings" to hold queued skbs */ 939 /* Allocate RX/TX "rings" to hold queued skbs */
1259 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring, 940 priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
1260 GFP_KERNEL); 941 GFP_KERNEL);
1261 if (!priv->rx_ring) { 942 if (!priv->rx_ring) {
1262 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n", 943 printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
1263 ca->name, ipoib_recvq_size); 944 ca->name, ipoib_recvq_size);
1264 goto out_neigh_hash_cleanup; 945 goto out;
1265 } 946 }
1266 947
1267 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring); 948 priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1284,8 +965,6 @@ out_tx_ring_cleanup:
1284out_rx_ring_cleanup: 965out_rx_ring_cleanup:
1285 kfree(priv->rx_ring); 966 kfree(priv->rx_ring);
1286 967
1287out_neigh_hash_cleanup:
1288 ipoib_neigh_hash_uninit(dev);
1289out: 968out:
1290 return -ENOMEM; 969 return -ENOMEM;
1291} 970}
@@ -1293,20 +972,15 @@ out:
1293void ipoib_dev_cleanup(struct net_device *dev) 972void ipoib_dev_cleanup(struct net_device *dev)
1294{ 973{
1295 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv; 974 struct ipoib_dev_priv *priv = netdev_priv(dev), *cpriv, *tcpriv;
1296 LIST_HEAD(head);
1297
1298 ASSERT_RTNL();
1299 975
1300 ipoib_delete_debug_files(dev); 976 ipoib_delete_debug_files(dev);
1301 977
1302 /* Delete any child interfaces first */ 978 /* Delete any child interfaces first */
1303 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { 979 list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
1304 /* Stop GC on child */ 980 unregister_netdev(cpriv->dev);
1305 set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); 981 ipoib_dev_cleanup(cpriv->dev);
1306 cancel_delayed_work(&cpriv->neigh_reap_task); 982 free_netdev(cpriv->dev);
1307 unregister_netdevice_queue(cpriv->dev, &head);
1308 } 983 }
1309 unregister_netdevice_many(&head);
1310 984
1311 ipoib_ib_dev_cleanup(dev); 985 ipoib_ib_dev_cleanup(dev);
1312 986
@@ -1315,8 +989,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
1315 989
1316 priv->rx_ring = NULL; 990 priv->rx_ring = NULL;
1317 priv->tx_ring = NULL; 991 priv->tx_ring = NULL;
1318
1319 ipoib_neigh_hash_uninit(dev);
1320} 992}
1321 993
1322static const struct header_ops ipoib_header_ops = { 994static const struct header_ops ipoib_header_ops = {
@@ -1324,17 +996,17 @@ static const struct header_ops ipoib_header_ops = {
1324}; 996};
1325 997
1326static const struct net_device_ops ipoib_netdev_ops = { 998static const struct net_device_ops ipoib_netdev_ops = {
1327 .ndo_uninit = ipoib_uninit,
1328 .ndo_open = ipoib_open, 999 .ndo_open = ipoib_open,
1329 .ndo_stop = ipoib_stop, 1000 .ndo_stop = ipoib_stop,
1330 .ndo_change_mtu = ipoib_change_mtu, 1001 .ndo_change_mtu = ipoib_change_mtu,
1331 .ndo_fix_features = ipoib_fix_features, 1002 .ndo_fix_features = ipoib_fix_features,
1332 .ndo_start_xmit = ipoib_start_xmit, 1003 .ndo_start_xmit = ipoib_start_xmit,
1333 .ndo_tx_timeout = ipoib_timeout, 1004 .ndo_tx_timeout = ipoib_timeout,
1334 .ndo_set_rx_mode = ipoib_set_mcast_list, 1005 .ndo_set_multicast_list = ipoib_set_mcast_list,
1006 .ndo_neigh_setup = ipoib_neigh_setup_dev,
1335}; 1007};
1336 1008
1337void ipoib_setup(struct net_device *dev) 1009static void ipoib_setup(struct net_device *dev)
1338{ 1010{
1339 struct ipoib_dev_priv *priv = netdev_priv(dev); 1011 struct ipoib_dev_priv *priv = netdev_priv(dev);
1340 1012
@@ -1349,7 +1021,11 @@ void ipoib_setup(struct net_device *dev)
1349 1021
1350 dev->flags |= IFF_BROADCAST | IFF_MULTICAST; 1022 dev->flags |= IFF_BROADCAST | IFF_MULTICAST;
1351 1023
1352 dev->hard_header_len = IPOIB_ENCAP_LEN; 1024 /*
1025 * We add in INFINIBAND_ALEN to allow for the destination
1026 * address "pseudoheader" for skbs without neighbour struct.
1027 */
1028 dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN;
1353 dev->addr_len = INFINIBAND_ALEN; 1029 dev->addr_len = INFINIBAND_ALEN;
1354 dev->type = ARPHRD_INFINIBAND; 1030 dev->type = ARPHRD_INFINIBAND;
1355 dev->tx_queue_len = ipoib_sendq_size * 2; 1031 dev->tx_queue_len = ipoib_sendq_size * 2;
@@ -1380,7 +1056,6 @@ void ipoib_setup(struct net_device *dev)
1380 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy); 1056 INIT_WORK(&priv->flush_heavy, ipoib_ib_dev_flush_heavy);
1381 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task); 1057 INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
1382 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah); 1058 INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
1383 INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
1384} 1059}
1385 1060
1386struct ipoib_dev_priv *ipoib_intf_alloc(const char *name) 1061struct ipoib_dev_priv *ipoib_intf_alloc(const char *name)
@@ -1412,9 +1087,12 @@ static ssize_t show_umcast(struct device *dev,
1412 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags)); 1087 return sprintf(buf, "%d\n", test_bit(IPOIB_FLAG_UMCAST, &priv->flags));
1413} 1088}
1414 1089
1415void ipoib_set_umcast(struct net_device *ndev, int umcast_val) 1090static ssize_t set_umcast(struct device *dev,
1091 struct device_attribute *attr,
1092 const char *buf, size_t count)
1416{ 1093{
1417 struct ipoib_dev_priv *priv = netdev_priv(ndev); 1094 struct ipoib_dev_priv *priv = netdev_priv(to_net_dev(dev));
1095 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1418 1096
1419 if (umcast_val > 0) { 1097 if (umcast_val > 0) {
1420 set_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1098 set_bit(IPOIB_FLAG_UMCAST, &priv->flags);
@@ -1422,15 +1100,6 @@ void ipoib_set_umcast(struct net_device *ndev, int umcast_val)
1422 "by userspace\n"); 1100 "by userspace\n");
1423 } else 1101 } else
1424 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags); 1102 clear_bit(IPOIB_FLAG_UMCAST, &priv->flags);
1425}
1426
1427static ssize_t set_umcast(struct device *dev,
1428 struct device_attribute *attr,
1429 const char *buf, size_t count)
1430{
1431 unsigned long umcast_val = simple_strtoul(buf, NULL, 0);
1432
1433 ipoib_set_umcast(to_net_dev(dev), umcast_val);
1434 1103
1435 return count; 1104 return count;
1436} 1105}
@@ -1553,8 +1222,6 @@ static struct net_device *ipoib_add_port(const char *format,
1553 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 1222 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
1554 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; 1223 priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
1555 1224
1556 priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
1557
1558 result = ib_query_pkey(hca, port, 0, &priv->pkey); 1225 result = ib_query_pkey(hca, port, 0, &priv->pkey);
1559 if (result) { 1226 if (result) {
1560 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", 1227 printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
@@ -1627,9 +1294,6 @@ sysfs_failed:
1627 1294
1628register_failed: 1295register_failed:
1629 ib_unregister_event_handler(&priv->event_handler); 1296 ib_unregister_event_handler(&priv->event_handler);
1630 /* Stop GC if started before flush */
1631 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1632 cancel_delayed_work(&priv->neigh_reap_task);
1633 flush_workqueue(ipoib_workqueue); 1297 flush_workqueue(ipoib_workqueue);
1634 1298
1635event_failed: 1299event_failed:
@@ -1696,12 +1360,10 @@ static void ipoib_remove_one(struct ib_device *device)
1696 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); 1360 dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
1697 rtnl_unlock(); 1361 rtnl_unlock();
1698 1362
1699 /* Stop GC */
1700 set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
1701 cancel_delayed_work(&priv->neigh_reap_task);
1702 flush_workqueue(ipoib_workqueue); 1363 flush_workqueue(ipoib_workqueue);
1703 1364
1704 unregister_netdev(priv->dev); 1365 unregister_netdev(priv->dev);
1366 ipoib_dev_cleanup(priv->dev);
1705 free_netdev(priv->dev); 1367 free_netdev(priv->dev);
1706 } 1368 }
1707 1369
@@ -1753,15 +1415,8 @@ static int __init ipoib_init_module(void)
1753 if (ret) 1415 if (ret)
1754 goto err_sa; 1416 goto err_sa;
1755 1417
1756 ret = ipoib_netlink_init();
1757 if (ret)
1758 goto err_client;
1759
1760 return 0; 1418 return 0;
1761 1419
1762err_client:
1763 ib_unregister_client(&ipoib_client);
1764
1765err_sa: 1420err_sa:
1766 ib_sa_unregister_client(&ipoib_sa_client); 1421 ib_sa_unregister_client(&ipoib_sa_client);
1767 destroy_workqueue(ipoib_workqueue); 1422 destroy_workqueue(ipoib_workqueue);
@@ -1774,7 +1429,6 @@ err_fs:
1774 1429
1775static void __exit ipoib_cleanup_module(void) 1430static void __exit ipoib_cleanup_module(void)
1776{ 1431{
1777 ipoib_netlink_fini();
1778 ib_unregister_client(&ipoib_client); 1432 ib_unregister_client(&ipoib_client);
1779 ib_sa_unregister_client(&ipoib_sa_client); 1433 ib_sa_unregister_client(&ipoib_sa_client);
1780 ipoib_unregister_debugfs(); 1434 ipoib_unregister_debugfs();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index cecb98a4c66..a8d2a891b84 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -34,7 +34,6 @@
34 34
35#include <linux/skbuff.h> 35#include <linux/skbuff.h>
36#include <linux/rtnetlink.h> 36#include <linux/rtnetlink.h>
37#include <linux/moduleparam.h>
38#include <linux/ip.h> 37#include <linux/ip.h>
39#include <linux/in.h> 38#include <linux/in.h>
40#include <linux/igmp.h> 39#include <linux/igmp.h>
@@ -69,13 +68,28 @@ struct ipoib_mcast_iter {
69static void ipoib_mcast_free(struct ipoib_mcast *mcast) 68static void ipoib_mcast_free(struct ipoib_mcast *mcast)
70{ 69{
71 struct net_device *dev = mcast->dev; 70 struct net_device *dev = mcast->dev;
71 struct ipoib_dev_priv *priv = netdev_priv(dev);
72 struct ipoib_neigh *neigh, *tmp;
72 int tx_dropped = 0; 73 int tx_dropped = 0;
73 74
74 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n", 75 ipoib_dbg_mcast(netdev_priv(dev), "deleting multicast group %pI6\n",
75 mcast->mcmember.mgid.raw); 76 mcast->mcmember.mgid.raw);
76 77
77 /* remove all neigh connected to this mcast */ 78 spin_lock_irq(&priv->lock);
78 ipoib_del_neighs_by_gid(dev, mcast->mcmember.mgid.raw); 79
80 list_for_each_entry_safe(neigh, tmp, &mcast->neigh_list, list) {
81 /*
82 * It's safe to call ipoib_put_ah() inside priv->lock
83 * here, because we know that mcast->ah will always
84 * hold one more reference, so ipoib_put_ah() will
85 * never do more than decrement the ref count.
86 */
87 if (neigh->ah)
88 ipoib_put_ah(neigh->ah);
89 ipoib_neigh_free(dev, neigh);
90 }
91
92 spin_unlock_irq(&priv->lock);
79 93
80 if (mcast->ah) 94 if (mcast->ah)
81 ipoib_put_ah(mcast->ah); 95 ipoib_put_ah(mcast->ah);
@@ -175,9 +189,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
175 189
176 mcast->mcmember = *mcmember; 190 mcast->mcmember = *mcmember;
177 191
178 /* Set the multicast MTU and cached Q_Key before we attach if it's 192 /* Set the cached Q_Key before we attach if it's the broadcast group */
179 * the broadcast group.
180 */
181 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4, 193 if (!memcmp(mcast->mcmember.mgid.raw, priv->dev->broadcast + 4,
182 sizeof (union ib_gid))) { 194 sizeof (union ib_gid))) {
183 spin_lock_irq(&priv->lock); 195 spin_lock_irq(&priv->lock);
@@ -185,17 +197,10 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
185 spin_unlock_irq(&priv->lock); 197 spin_unlock_irq(&priv->lock);
186 return -EAGAIN; 198 return -EAGAIN;
187 } 199 }
188 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
189 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey); 200 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
190 spin_unlock_irq(&priv->lock); 201 spin_unlock_irq(&priv->lock);
191 priv->tx_wr.wr.ud.remote_qkey = priv->qkey; 202 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
192 set_qkey = 1; 203 set_qkey = 1;
193
194 if (!ipoib_cm_admin_enabled(dev)) {
195 rtnl_lock();
196 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
197 rtnl_unlock();
198 }
199 } 204 }
200 205
201 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { 206 if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -234,11 +239,8 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
234 av.grh.dgid = mcast->mcmember.mgid; 239 av.grh.dgid = mcast->mcmember.mgid;
235 240
236 ah = ipoib_create_ah(dev, priv->pd, &av); 241 ah = ipoib_create_ah(dev, priv->pd, &av);
237 if (IS_ERR(ah)) { 242 if (!ah) {
238 ipoib_warn(priv, "ib_address_create failed %ld\n", 243 ipoib_warn(priv, "ib_address_create failed\n");
239 -PTR_ERR(ah));
240 /* use original error */
241 return PTR_ERR(ah);
242 } else { 244 } else {
243 spin_lock_irq(&priv->lock); 245 spin_lock_irq(&priv->lock);
244 mcast->ah = ah; 246 mcast->ah = ah;
@@ -256,13 +258,21 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
256 netif_tx_lock_bh(dev); 258 netif_tx_lock_bh(dev);
257 while (!skb_queue_empty(&mcast->pkt_queue)) { 259 while (!skb_queue_empty(&mcast->pkt_queue)) {
258 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); 260 struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue);
261 struct dst_entry *dst = skb_dst(skb);
262 struct neighbour *n = NULL;
259 263
260 netif_tx_unlock_bh(dev); 264 netif_tx_unlock_bh(dev);
261 265
262 skb->dev = dev; 266 skb->dev = dev;
267 if (dst)
268 n = dst_get_neighbour_raw(dst);
269 if (!dst || !n) {
270 /* put pseudoheader back on for next time */
271 skb_push(skb, sizeof (struct ipoib_pseudoheader));
272 }
273
263 if (dev_queue_xmit(skb)) 274 if (dev_queue_xmit(skb))
264 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); 275 ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
265
266 netif_tx_lock_bh(dev); 276 netif_tx_lock_bh(dev);
267 } 277 }
268 netif_tx_unlock_bh(dev); 278 netif_tx_unlock_bh(dev);
@@ -583,6 +593,14 @@ void ipoib_mcast_join_task(struct work_struct *work)
583 return; 593 return;
584 } 594 }
585 595
596 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
597
598 if (!ipoib_cm_admin_enabled(dev)) {
599 rtnl_lock();
600 dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
601 rtnl_unlock();
602 }
603
586 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n"); 604 ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
587 605
588 clear_bit(IPOIB_MCAST_RUN, &priv->flags); 606 clear_bit(IPOIB_MCAST_RUN, &priv->flags);
@@ -641,12 +659,11 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
641 return 0; 659 return 0;
642} 660}
643 661
644void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) 662void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb)
645{ 663{
646 struct ipoib_dev_priv *priv = netdev_priv(dev); 664 struct ipoib_dev_priv *priv = netdev_priv(dev);
647 struct ipoib_mcast *mcast; 665 struct ipoib_mcast *mcast;
648 unsigned long flags; 666 unsigned long flags;
649 void *mgid = daddr + 4;
650 667
651 spin_lock_irqsave(&priv->lock, flags); 668 spin_lock_irqsave(&priv->lock, flags);
652 669
@@ -702,23 +719,25 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
702 719
703out: 720out:
704 if (mcast && mcast->ah) { 721 if (mcast && mcast->ah) {
705 struct ipoib_neigh *neigh; 722 struct dst_entry *dst = skb_dst(skb);
723 struct neighbour *n = NULL;
724
725 rcu_read_lock();
726 if (dst)
727 n = dst_get_neighbour(dst);
728 if (n && !*to_ipoib_neigh(n)) {
729 struct ipoib_neigh *neigh = ipoib_neigh_alloc(n,
730 skb->dev);
706 731
707 spin_unlock_irqrestore(&priv->lock, flags);
708 neigh = ipoib_neigh_get(dev, daddr);
709 spin_lock_irqsave(&priv->lock, flags);
710 if (!neigh) {
711 neigh = ipoib_neigh_alloc(daddr, dev);
712 if (neigh) { 732 if (neigh) {
713 kref_get(&mcast->ah->ref); 733 kref_get(&mcast->ah->ref);
714 neigh->ah = mcast->ah; 734 neigh->ah = mcast->ah;
715 list_add_tail(&neigh->list, &mcast->neigh_list); 735 list_add_tail(&neigh->list, &mcast->neigh_list);
716 } 736 }
717 } 737 }
738 rcu_read_unlock();
718 spin_unlock_irqrestore(&priv->lock, flags); 739 spin_unlock_irqrestore(&priv->lock, flags);
719 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); 740 ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN);
720 if (neigh)
721 ipoib_neigh_put(neigh);
722 return; 741 return;
723 } 742 }
724 743
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
deleted file mode 100644
index 74685936c94..00000000000
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ /dev/null
@@ -1,172 +0,0 @@
1/*
2 * Copyright (c) 2012 Mellanox Technologies. - All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#include <linux/netdevice.h>
34#include <linux/module.h>
35#include <net/rtnetlink.h>
36#include "ipoib.h"
37
38static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
39 [IFLA_IPOIB_PKEY] = { .type = NLA_U16 },
40 [IFLA_IPOIB_MODE] = { .type = NLA_U16 },
41 [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 },
42};
43
44static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
45{
46 struct ipoib_dev_priv *priv = netdev_priv(dev);
47 u16 val;
48
49 if (nla_put_u16(skb, IFLA_IPOIB_PKEY, priv->pkey))
50 goto nla_put_failure;
51
52 val = test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
53 if (nla_put_u16(skb, IFLA_IPOIB_MODE, val))
54 goto nla_put_failure;
55
56 val = test_bit(IPOIB_FLAG_UMCAST, &priv->flags);
57 if (nla_put_u16(skb, IFLA_IPOIB_UMCAST, val))
58 goto nla_put_failure;
59
60 return 0;
61
62nla_put_failure:
63 return -EMSGSIZE;
64}
65
66static int ipoib_changelink(struct net_device *dev,
67 struct nlattr *tb[], struct nlattr *data[])
68{
69 u16 mode, umcast;
70 int ret = 0;
71
72 if (data[IFLA_IPOIB_MODE]) {
73 mode = nla_get_u16(data[IFLA_IPOIB_MODE]);
74 if (mode == IPOIB_MODE_DATAGRAM)
75 ret = ipoib_set_mode(dev, "datagram\n");
76 else if (mode == IPOIB_MODE_CONNECTED)
77 ret = ipoib_set_mode(dev, "connected\n");
78 else
79 ret = -EINVAL;
80
81 if (ret < 0)
82 goto out_err;
83 }
84
85 if (data[IFLA_IPOIB_UMCAST]) {
86 umcast = nla_get_u16(data[IFLA_IPOIB_UMCAST]);
87 ipoib_set_umcast(dev, umcast);
88 }
89
90out_err:
91 return ret;
92}
93
94static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
95 struct nlattr *tb[], struct nlattr *data[])
96{
97 struct net_device *pdev;
98 struct ipoib_dev_priv *ppriv;
99 u16 child_pkey;
100 int err;
101
102 if (!tb[IFLA_LINK])
103 return -EINVAL;
104
105 pdev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
106 if (!pdev)
107 return -ENODEV;
108
109 ppriv = netdev_priv(pdev);
110
111 if (test_bit(IPOIB_FLAG_SUBINTERFACE, &ppriv->flags)) {
112 ipoib_warn(ppriv, "child creation disallowed for child devices\n");
113 return -EINVAL;
114 }
115
116 if (!data || !data[IFLA_IPOIB_PKEY]) {
117 ipoib_dbg(ppriv, "no pkey specified, using parent pkey\n");
118 child_pkey = ppriv->pkey;
119 } else
120 child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
121
122 err = __ipoib_vlan_add(ppriv, netdev_priv(dev), child_pkey, IPOIB_RTNL_CHILD);
123
124 if (!err && data)
125 err = ipoib_changelink(dev, tb, data);
126 return err;
127}
128
129static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
130{
131 struct ipoib_dev_priv *priv, *ppriv;
132
133 priv = netdev_priv(dev);
134 ppriv = netdev_priv(priv->parent);
135
136 mutex_lock(&ppriv->vlan_mutex);
137 unregister_netdevice_queue(dev, head);
138 list_del(&priv->list);
139 mutex_unlock(&ppriv->vlan_mutex);
140}
141
142static size_t ipoib_get_size(const struct net_device *dev)
143{
144 return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
145 nla_total_size(2) + /* IFLA_IPOIB_MODE */
146 nla_total_size(2); /* IFLA_IPOIB_UMCAST */
147}
148
149static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
150 .kind = "ipoib",
151 .maxtype = IFLA_IPOIB_MAX,
152 .policy = ipoib_policy,
153 .priv_size = sizeof(struct ipoib_dev_priv),
154 .setup = ipoib_setup,
155 .newlink = ipoib_new_child_link,
156 .changelink = ipoib_changelink,
157 .dellink = ipoib_unregister_child_dev,
158 .get_size = ipoib_get_size,
159 .fill_info = ipoib_fill_info,
160};
161
162int __init ipoib_netlink_init(void)
163{
164 return rtnl_link_register(&ipoib_link_ops);
165}
166
167void __exit ipoib_netlink_fini(void)
168{
169 rtnl_link_unregister(&ipoib_link_ops);
170}
171
172MODULE_ALIAS_RTNL_LINK("ipoib");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 8292554bccb..d7e9740c724 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -49,11 +49,47 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
49} 49}
50static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); 50static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
51 51
52int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, 52int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
53 u16 pkey, int type)
54{ 53{
54 struct ipoib_dev_priv *ppriv, *priv;
55 char intf_name[IFNAMSIZ];
55 int result; 56 int result;
56 57
58 if (!capable(CAP_NET_ADMIN))
59 return -EPERM;
60
61 ppriv = netdev_priv(pdev);
62
63 if (!rtnl_trylock())
64 return restart_syscall();
65 mutex_lock(&ppriv->vlan_mutex);
66
67 /*
68 * First ensure this isn't a duplicate. We check the parent device and
69 * then all of the child interfaces to make sure the Pkey doesn't match.
70 */
71 if (ppriv->pkey == pkey) {
72 result = -ENOTUNIQ;
73 priv = NULL;
74 goto err;
75 }
76
77 list_for_each_entry(priv, &ppriv->child_intfs, list) {
78 if (priv->pkey == pkey) {
79 result = -ENOTUNIQ;
80 priv = NULL;
81 goto err;
82 }
83 }
84
85 snprintf(intf_name, sizeof intf_name, "%s.%04x",
86 ppriv->dev->name, pkey);
87 priv = ipoib_intf_alloc(intf_name);
88 if (!priv) {
89 result = -ENOMEM;
90 goto err;
91 }
92
57 priv->max_ib_mtu = ppriv->max_ib_mtu; 93 priv->max_ib_mtu = ppriv->max_ib_mtu;
58 /* MTU will be reset when mcast join happens */ 94 /* MTU will be reset when mcast join happens */
59 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); 95 priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
@@ -88,27 +124,24 @@ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
88 124
89 ipoib_create_debug_files(priv->dev); 125 ipoib_create_debug_files(priv->dev);
90 126
91 /* RTNL childs don't need proprietary sysfs entries */ 127 if (ipoib_cm_add_mode_attr(priv->dev))
92 if (type == IPOIB_LEGACY_CHILD) { 128 goto sysfs_failed;
93 if (ipoib_cm_add_mode_attr(priv->dev)) 129 if (ipoib_add_pkey_attr(priv->dev))
94 goto sysfs_failed; 130 goto sysfs_failed;
95 if (ipoib_add_pkey_attr(priv->dev)) 131 if (ipoib_add_umcast_attr(priv->dev))
96 goto sysfs_failed; 132 goto sysfs_failed;
97 if (ipoib_add_umcast_attr(priv->dev)) 133
98 goto sysfs_failed; 134 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
99 135 goto sysfs_failed;
100 if (device_create_file(&priv->dev->dev, &dev_attr_parent))
101 goto sysfs_failed;
102 }
103 136
104 priv->child_type = type;
105 priv->dev->iflink = ppriv->dev->ifindex;
106 list_add_tail(&priv->list, &ppriv->child_intfs); 137 list_add_tail(&priv->list, &ppriv->child_intfs);
107 138
139 mutex_unlock(&ppriv->vlan_mutex);
140 rtnl_unlock();
141
108 return 0; 142 return 0;
109 143
110sysfs_failed: 144sysfs_failed:
111 result = -ENOMEM;
112 ipoib_delete_debug_files(priv->dev); 145 ipoib_delete_debug_files(priv->dev);
113 unregister_netdevice(priv->dev); 146 unregister_netdevice(priv->dev);
114 147
@@ -116,59 +149,10 @@ register_failed:
116 ipoib_dev_cleanup(priv->dev); 149 ipoib_dev_cleanup(priv->dev);
117 150
118err: 151err:
119 return result;
120}
121
122int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
123{
124 struct ipoib_dev_priv *ppriv, *priv;
125 char intf_name[IFNAMSIZ];
126 struct ipoib_dev_priv *tpriv;
127 int result;
128
129 if (!capable(CAP_NET_ADMIN))
130 return -EPERM;
131
132 ppriv = netdev_priv(pdev);
133
134 snprintf(intf_name, sizeof intf_name, "%s.%04x",
135 ppriv->dev->name, pkey);
136 priv = ipoib_intf_alloc(intf_name);
137 if (!priv)
138 return -ENOMEM;
139
140 if (!rtnl_trylock())
141 return restart_syscall();
142
143 mutex_lock(&ppriv->vlan_mutex);
144
145 /*
146 * First ensure this isn't a duplicate. We check the parent device and
147 * then all of the legacy child interfaces to make sure the Pkey
148 * doesn't match.
149 */
150 if (ppriv->pkey == pkey) {
151 result = -ENOTUNIQ;
152 goto out;
153 }
154
155 list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
156 if (tpriv->pkey == pkey &&
157 tpriv->child_type == IPOIB_LEGACY_CHILD) {
158 result = -ENOTUNIQ;
159 goto out;
160 }
161 }
162
163 result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
164
165out:
166 mutex_unlock(&ppriv->vlan_mutex); 152 mutex_unlock(&ppriv->vlan_mutex);
167
168 if (result)
169 free_netdev(priv->dev);
170
171 rtnl_unlock(); 153 rtnl_unlock();
154 if (priv)
155 free_netdev(priv->dev);
172 156
173 return result; 157 return result;
174} 158}
@@ -187,9 +171,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
187 return restart_syscall(); 171 return restart_syscall();
188 mutex_lock(&ppriv->vlan_mutex); 172 mutex_lock(&ppriv->vlan_mutex);
189 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { 173 list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
190 if (priv->pkey == pkey && 174 if (priv->pkey == pkey) {
191 priv->child_type == IPOIB_LEGACY_CHILD) {
192 unregister_netdevice(priv->dev); 175 unregister_netdevice(priv->dev);
176 ipoib_dev_cleanup(priv->dev);
193 list_del(&priv->list); 177 list_del(&priv->list);
194 dev = priv->dev; 178 dev = priv->dev;
195 break; 179 break;
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 0ab8c9cc3a7..9c61b9c2c59 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -57,7 +57,6 @@
57#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
58#include <linux/delay.h> 58#include <linux/delay.h>
59#include <linux/slab.h> 59#include <linux/slab.h>
60#include <linux/module.h>
61 60
62#include <net/sock.h> 61#include <net/sock.h>
63 62
@@ -152,6 +151,7 @@ int iser_initialize_task_headers(struct iscsi_task *task,
152 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; 151 tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
153 tx_desc->tx_sg[0].lkey = device->mr->lkey; 152 tx_desc->tx_sg[0].lkey = device->mr->lkey;
154 153
154 iser_task->headers_initialized = 1;
155 iser_task->iser_conn = iser_conn; 155 iser_task->iser_conn = iser_conn;
156 return 0; 156 return 0;
157} 157}
@@ -166,7 +166,8 @@ iscsi_iser_task_init(struct iscsi_task *task)
166{ 166{
167 struct iscsi_iser_task *iser_task = task->dd_data; 167 struct iscsi_iser_task *iser_task = task->dd_data;
168 168
169 if (iser_initialize_task_headers(task, &iser_task->desc)) 169 if (!iser_task->headers_initialized)
170 if (iser_initialize_task_headers(task, &iser_task->desc))
170 return -ENOMEM; 171 return -ENOMEM;
171 172
172 /* mgmt task */ 173 /* mgmt task */
@@ -277,13 +278,6 @@ iscsi_iser_task_xmit(struct iscsi_task *task)
277static void iscsi_iser_cleanup_task(struct iscsi_task *task) 278static void iscsi_iser_cleanup_task(struct iscsi_task *task)
278{ 279{
279 struct iscsi_iser_task *iser_task = task->dd_data; 280 struct iscsi_iser_task *iser_task = task->dd_data;
280 struct iser_tx_desc *tx_desc = &iser_task->desc;
281
282 struct iscsi_iser_conn *iser_conn = task->conn->dd_data;
283 struct iser_device *device = iser_conn->ib_conn->device;
284
285 ib_dma_unmap_single(device->ib_device,
286 tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE);
287 281
288 /* mgmt tasks do not need special cleanup */ 282 /* mgmt tasks do not need special cleanup */
289 if (!task->sc) 283 if (!task->sc)
@@ -364,9 +358,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
364 } 358 }
365 ib_conn = ep->dd_data; 359 ib_conn = ep->dd_data;
366 360
367 if (iser_alloc_rx_descriptors(ib_conn))
368 return -ENOMEM;
369
370 /* binds the iSER connection retrieved from the previously 361 /* binds the iSER connection retrieved from the previously
371 * connected ep_handle to the iSCSI layer connection. exchanges 362 * connected ep_handle to the iSCSI layer connection. exchanges
372 * connection pointers */ 363 * connection pointers */
@@ -401,6 +392,19 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
401 iser_conn->ib_conn = NULL; 392 iser_conn->ib_conn = NULL;
402} 393}
403 394
395static int
396iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn)
397{
398 struct iscsi_conn *conn = cls_conn->dd_data;
399 int err;
400
401 err = iser_conn_set_full_featured_mode(conn);
402 if (err)
403 return err;
404
405 return iscsi_conn_start(cls_conn);
406}
407
404static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) 408static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session)
405{ 409{
406 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); 410 struct Scsi_Host *shost = iscsi_session_to_shost(cls_session);
@@ -573,9 +577,10 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
573 577
574 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, 578 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr,
575 non_blocking); 579 non_blocking);
576 if (err) 580 if (err) {
581 iscsi_destroy_endpoint(ep);
577 return ERR_PTR(err); 582 return ERR_PTR(err);
578 583 }
579 return ep; 584 return ep;
580} 585}
581 586
@@ -627,59 +632,6 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
627 iser_conn_terminate(ib_conn); 632 iser_conn_terminate(ib_conn);
628} 633}
629 634
630static umode_t iser_attr_is_visible(int param_type, int param)
631{
632 switch (param_type) {
633 case ISCSI_HOST_PARAM:
634 switch (param) {
635 case ISCSI_HOST_PARAM_NETDEV_NAME:
636 case ISCSI_HOST_PARAM_HWADDRESS:
637 case ISCSI_HOST_PARAM_INITIATOR_NAME:
638 return S_IRUGO;
639 default:
640 return 0;
641 }
642 case ISCSI_PARAM:
643 switch (param) {
644 case ISCSI_PARAM_MAX_RECV_DLENGTH:
645 case ISCSI_PARAM_MAX_XMIT_DLENGTH:
646 case ISCSI_PARAM_HDRDGST_EN:
647 case ISCSI_PARAM_DATADGST_EN:
648 case ISCSI_PARAM_CONN_ADDRESS:
649 case ISCSI_PARAM_CONN_PORT:
650 case ISCSI_PARAM_EXP_STATSN:
651 case ISCSI_PARAM_PERSISTENT_ADDRESS:
652 case ISCSI_PARAM_PERSISTENT_PORT:
653 case ISCSI_PARAM_PING_TMO:
654 case ISCSI_PARAM_RECV_TMO:
655 case ISCSI_PARAM_INITIAL_R2T_EN:
656 case ISCSI_PARAM_MAX_R2T:
657 case ISCSI_PARAM_IMM_DATA_EN:
658 case ISCSI_PARAM_FIRST_BURST:
659 case ISCSI_PARAM_MAX_BURST:
660 case ISCSI_PARAM_PDU_INORDER_EN:
661 case ISCSI_PARAM_DATASEQ_INORDER_EN:
662 case ISCSI_PARAM_TARGET_NAME:
663 case ISCSI_PARAM_TPGT:
664 case ISCSI_PARAM_USERNAME:
665 case ISCSI_PARAM_PASSWORD:
666 case ISCSI_PARAM_USERNAME_IN:
667 case ISCSI_PARAM_PASSWORD_IN:
668 case ISCSI_PARAM_FAST_ABORT:
669 case ISCSI_PARAM_ABORT_TMO:
670 case ISCSI_PARAM_LU_RESET_TMO:
671 case ISCSI_PARAM_TGT_RESET_TMO:
672 case ISCSI_PARAM_IFACE_NAME:
673 case ISCSI_PARAM_INITIATOR_NAME:
674 return S_IRUGO;
675 default:
676 return 0;
677 }
678 }
679
680 return 0;
681}
682
683static struct scsi_host_template iscsi_iser_sht = { 635static struct scsi_host_template iscsi_iser_sht = {
684 .module = THIS_MODULE, 636 .module = THIS_MODULE,
685 .name = "iSCSI Initiator over iSER, v." DRV_VER, 637 .name = "iSCSI Initiator over iSER, v." DRV_VER,
@@ -701,6 +653,32 @@ static struct iscsi_transport iscsi_iser_transport = {
701 .owner = THIS_MODULE, 653 .owner = THIS_MODULE,
702 .name = "iser", 654 .name = "iser",
703 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T, 655 .caps = CAP_RECOVERY_L0 | CAP_MULTI_R2T,
656 .param_mask = ISCSI_MAX_RECV_DLENGTH |
657 ISCSI_MAX_XMIT_DLENGTH |
658 ISCSI_HDRDGST_EN |
659 ISCSI_DATADGST_EN |
660 ISCSI_INITIAL_R2T_EN |
661 ISCSI_MAX_R2T |
662 ISCSI_IMM_DATA_EN |
663 ISCSI_FIRST_BURST |
664 ISCSI_MAX_BURST |
665 ISCSI_PDU_INORDER_EN |
666 ISCSI_DATASEQ_INORDER_EN |
667 ISCSI_CONN_PORT |
668 ISCSI_CONN_ADDRESS |
669 ISCSI_EXP_STATSN |
670 ISCSI_PERSISTENT_PORT |
671 ISCSI_PERSISTENT_ADDRESS |
672 ISCSI_TARGET_NAME | ISCSI_TPGT |
673 ISCSI_USERNAME | ISCSI_PASSWORD |
674 ISCSI_USERNAME_IN | ISCSI_PASSWORD_IN |
675 ISCSI_FAST_ABORT | ISCSI_ABORT_TMO |
676 ISCSI_LU_RESET_TMO | ISCSI_TGT_RESET_TMO |
677 ISCSI_PING_TMO | ISCSI_RECV_TMO |
678 ISCSI_IFACE_NAME | ISCSI_INITIATOR_NAME,
679 .host_param_mask = ISCSI_HOST_HWADDRESS |
680 ISCSI_HOST_NETDEV_NAME |
681 ISCSI_HOST_INITIATOR_NAME,
704 /* session management */ 682 /* session management */
705 .create_session = iscsi_iser_session_create, 683 .create_session = iscsi_iser_session_create,
706 .destroy_session = iscsi_iser_session_destroy, 684 .destroy_session = iscsi_iser_session_destroy,
@@ -708,12 +686,11 @@ static struct iscsi_transport iscsi_iser_transport = {
708 .create_conn = iscsi_iser_conn_create, 686 .create_conn = iscsi_iser_conn_create,
709 .bind_conn = iscsi_iser_conn_bind, 687 .bind_conn = iscsi_iser_conn_bind,
710 .destroy_conn = iscsi_iser_conn_destroy, 688 .destroy_conn = iscsi_iser_conn_destroy,
711 .attr_is_visible = iser_attr_is_visible,
712 .set_param = iscsi_iser_set_param, 689 .set_param = iscsi_iser_set_param,
713 .get_conn_param = iscsi_conn_get_param, 690 .get_conn_param = iscsi_conn_get_param,
714 .get_ep_param = iscsi_iser_get_ep_param, 691 .get_ep_param = iscsi_iser_get_ep_param,
715 .get_session_param = iscsi_session_get_param, 692 .get_session_param = iscsi_session_get_param,
716 .start_conn = iscsi_conn_start, 693 .start_conn = iscsi_iser_conn_start,
717 .stop_conn = iscsi_iser_conn_stop, 694 .stop_conn = iscsi_iser_conn_stop,
718 /* iscsi host params */ 695 /* iscsi host params */
719 .get_host_param = iscsi_host_get_param, 696 .get_host_param = iscsi_host_get_param,
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index ef7d3be46c3..db6f3ce9f3b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -177,7 +177,6 @@ struct iser_data_buf {
177 177
178/* fwd declarations */ 178/* fwd declarations */
179struct iser_device; 179struct iser_device;
180struct iser_cq_desc;
181struct iscsi_iser_conn; 180struct iscsi_iser_conn;
182struct iscsi_iser_task; 181struct iscsi_iser_task;
183struct iscsi_endpoint; 182struct iscsi_endpoint;
@@ -227,21 +226,16 @@ struct iser_rx_desc {
227 char pad[ISER_RX_PAD_SIZE]; 226 char pad[ISER_RX_PAD_SIZE];
228} __attribute__((packed)); 227} __attribute__((packed));
229 228
230#define ISER_MAX_CQ 4
231
232struct iser_device { 229struct iser_device {
233 struct ib_device *ib_device; 230 struct ib_device *ib_device;
234 struct ib_pd *pd; 231 struct ib_pd *pd;
235 struct ib_cq *rx_cq[ISER_MAX_CQ]; 232 struct ib_cq *rx_cq;
236 struct ib_cq *tx_cq[ISER_MAX_CQ]; 233 struct ib_cq *tx_cq;
237 struct ib_mr *mr; 234 struct ib_mr *mr;
238 struct tasklet_struct cq_tasklet[ISER_MAX_CQ]; 235 struct tasklet_struct cq_tasklet;
239 struct ib_event_handler event_handler; 236 struct ib_event_handler event_handler;
240 struct list_head ig_list; /* entry in ig devices list */ 237 struct list_head ig_list; /* entry in ig devices list */
241 int refcount; 238 int refcount;
242 int cq_active_qps[ISER_MAX_CQ];
243 int cqs_used;
244 struct iser_cq_desc *cq_desc;
245}; 239};
246 240
247struct iser_conn { 241struct iser_conn {
@@ -263,8 +257,7 @@ struct iser_conn {
263 struct list_head conn_list; /* entry in ig conn list */ 257 struct list_head conn_list; /* entry in ig conn list */
264 258
265 char *login_buf; 259 char *login_buf;
266 char *login_req_buf, *login_resp_buf; 260 u64 login_dma;
267 u64 login_req_dma, login_resp_dma;
268 unsigned int rx_desc_head; 261 unsigned int rx_desc_head;
269 struct iser_rx_desc *rx_descs; 262 struct iser_rx_desc *rx_descs;
270 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; 263 struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
@@ -284,6 +277,7 @@ struct iscsi_iser_task {
284 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ 277 struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */
285 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ 278 struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/
286 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ 279 struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */
280 int headers_initialized;
287}; 281};
288 282
289struct iser_page_vec { 283struct iser_page_vec {
@@ -293,11 +287,6 @@ struct iser_page_vec {
293 int data_size; 287 int data_size;
294}; 288};
295 289
296struct iser_cq_desc {
297 struct iser_device *device;
298 int cq_index;
299};
300
301struct iser_global { 290struct iser_global {
302 struct mutex device_list_mutex;/* */ 291 struct mutex device_list_mutex;/* */
303 struct list_head device_list; /* all iSER devices */ 292 struct list_head device_list; /* all iSER devices */
@@ -377,5 +366,4 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
377void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); 366void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
378int iser_initialize_task_headers(struct iscsi_task *task, 367int iser_initialize_task_headers(struct iscsi_task *task,
379 struct iser_tx_desc *tx_desc); 368 struct iser_tx_desc *tx_desc);
380int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
381#endif 369#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index a00ccd1ca33..f299de6b419 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -170,7 +170,7 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
170} 170}
171 171
172 172
173int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) 173static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
174{ 174{
175 int i, j; 175 int i, j;
176 u64 dma_addr; 176 u64 dma_addr;
@@ -220,6 +220,12 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
220 struct iser_rx_desc *rx_desc; 220 struct iser_rx_desc *rx_desc;
221 struct iser_device *device = ib_conn->device; 221 struct iser_device *device = ib_conn->device;
222 222
223 if (ib_conn->login_buf) {
224 ib_dma_unmap_single(device->ib_device, ib_conn->login_dma,
225 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
226 kfree(ib_conn->login_buf);
227 }
228
223 if (!ib_conn->rx_descs) 229 if (!ib_conn->rx_descs)
224 return; 230 return;
225 231
@@ -230,24 +236,23 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
230 kfree(ib_conn->rx_descs); 236 kfree(ib_conn->rx_descs);
231} 237}
232 238
233static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) 239/**
240 * iser_conn_set_full_featured_mode - (iSER API)
241 */
242int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
234{ 243{
235 struct iscsi_iser_conn *iser_conn = conn->dd_data; 244 struct iscsi_iser_conn *iser_conn = conn->dd_data;
236 245
237 iser_dbg("req op %x flags %x\n", req->opcode, req->flags); 246 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
238 /* check if this is the last login - going to full feature phase */
239 if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE)
240 return 0;
241 247
242 /* 248 /* Check that there is no posted recv or send buffers left - */
243 * Check that there is one posted recv buffer (for the last login 249 /* they must be consumed during the login phase */
244 * response) and no posted send buffers left - they must have been 250 BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0);
245 * consumed during previous login phases. 251 BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
246 */ 252
247 WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); 253 if (iser_alloc_rx_descriptors(iser_conn->ib_conn))
248 WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); 254 return -ENOMEM;
249 255
250 iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
251 /* Initial post receive buffers */ 256 /* Initial post receive buffers */
252 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) 257 if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
253 return -ENOMEM; 258 return -ENOMEM;
@@ -389,7 +394,6 @@ int iser_send_control(struct iscsi_conn *conn,
389 unsigned long data_seg_len; 394 unsigned long data_seg_len;
390 int err = 0; 395 int err = 0;
391 struct iser_device *device; 396 struct iser_device *device;
392 struct iser_conn *ib_conn = iser_conn->ib_conn;
393 397
394 /* build the tx desc regd header and add it to the tx desc dto */ 398 /* build the tx desc regd header and add it to the tx desc dto */
395 mdesc->type = ISCSI_TX_CONTROL; 399 mdesc->type = ISCSI_TX_CONTROL;
@@ -405,19 +409,9 @@ int iser_send_control(struct iscsi_conn *conn,
405 iser_err("data present on non login task!!!\n"); 409 iser_err("data present on non login task!!!\n");
406 goto send_control_error; 410 goto send_control_error;
407 } 411 }
408 412 memcpy(iser_conn->ib_conn->login_buf, task->data,
409 ib_dma_sync_single_for_cpu(device->ib_device,
410 ib_conn->login_req_dma, task->data_count,
411 DMA_TO_DEVICE);
412
413 memcpy(iser_conn->ib_conn->login_req_buf, task->data,
414 task->data_count); 413 task->data_count);
415 414 tx_dsg->addr = iser_conn->ib_conn->login_dma;
416 ib_dma_sync_single_for_device(device->ib_device,
417 ib_conn->login_req_dma, task->data_count,
418 DMA_TO_DEVICE);
419
420 tx_dsg->addr = iser_conn->ib_conn->login_req_dma;
421 tx_dsg->length = task->data_count; 415 tx_dsg->length = task->data_count;
422 tx_dsg->lkey = device->mr->lkey; 416 tx_dsg->lkey = device->mr->lkey;
423 mdesc->num_sge = 2; 417 mdesc->num_sge = 2;
@@ -427,9 +421,6 @@ int iser_send_control(struct iscsi_conn *conn,
427 err = iser_post_recvl(iser_conn->ib_conn); 421 err = iser_post_recvl(iser_conn->ib_conn);
428 if (err) 422 if (err)
429 goto send_control_error; 423 goto send_control_error;
430 err = iser_post_rx_bufs(conn, task->hdr);
431 if (err)
432 goto send_control_error;
433 } 424 }
434 425
435 err = iser_post_send(iser_conn->ib_conn, mdesc); 426 err = iser_post_send(iser_conn->ib_conn, mdesc);
@@ -454,8 +445,8 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
454 int rx_buflen, outstanding, count, err; 445 int rx_buflen, outstanding, count, err;
455 446
456 /* differentiate between login to all other PDUs */ 447 /* differentiate between login to all other PDUs */
457 if ((char *)rx_desc == ib_conn->login_resp_buf) { 448 if ((char *)rx_desc == ib_conn->login_buf) {
458 rx_dma = ib_conn->login_resp_dma; 449 rx_dma = ib_conn->login_dma;
459 rx_buflen = ISER_RX_LOGIN_SIZE; 450 rx_buflen = ISER_RX_LOGIN_SIZE;
460 } else { 451 } else {
461 rx_dma = rx_desc->dma_addr; 452 rx_dma = rx_desc->dma_addr;
@@ -482,7 +473,7 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
482 * for the posted rx bufs refcount to become zero handles everything */ 473 * for the posted rx bufs refcount to become zero handles everything */
483 conn->ib_conn->post_recv_buf_count--; 474 conn->ib_conn->post_recv_buf_count--;
484 475
485 if (rx_dma == ib_conn->login_resp_dma) 476 if (rx_dma == ib_conn->login_dma)
486 return; 477 return;
487 478
488 outstanding = ib_conn->post_recv_buf_count; 479 outstanding = ib_conn->post_recv_buf_count;
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 2033a928d34..fb88d6896b6 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -73,11 +73,11 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
73 73
74 p = mem; 74 p = mem;
75 for_each_sg(sgl, sg, data->size, i) { 75 for_each_sg(sgl, sg, data->size, i) {
76 from = kmap_atomic(sg_page(sg)); 76 from = kmap_atomic(sg_page(sg), KM_USER0);
77 memcpy(p, 77 memcpy(p,
78 from + sg->offset, 78 from + sg->offset,
79 sg->length); 79 sg->length);
80 kunmap_atomic(from); 80 kunmap_atomic(from, KM_USER0);
81 p += sg->length; 81 p += sg->length;
82 } 82 }
83 } 83 }
@@ -133,11 +133,11 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
133 133
134 p = mem; 134 p = mem;
135 for_each_sg(sgl, sg, sg_size, i) { 135 for_each_sg(sgl, sg, sg_size, i) {
136 to = kmap_atomic(sg_page(sg)); 136 to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0);
137 memcpy(to + sg->offset, 137 memcpy(to + sg->offset,
138 p, 138 p,
139 sg->length); 139 sg->length);
140 kunmap_atomic(to); 140 kunmap_atomic(to, KM_SOFTIRQ0);
141 p += sg->length; 141 p += sg->length;
142 } 142 }
143 } 143 }
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 95a49affee4..ede1475bee0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -70,50 +70,32 @@ static void iser_event_handler(struct ib_event_handler *handler,
70 */ 70 */
71static int iser_create_device_ib_res(struct iser_device *device) 71static int iser_create_device_ib_res(struct iser_device *device)
72{ 72{
73 int i, j;
74 struct iser_cq_desc *cq_desc;
75
76 device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
77 iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
78 device->ib_device->name, device->ib_device->num_comp_vectors);
79
80 device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
81 GFP_KERNEL);
82 if (device->cq_desc == NULL)
83 goto cq_desc_err;
84 cq_desc = device->cq_desc;
85
86 device->pd = ib_alloc_pd(device->ib_device); 73 device->pd = ib_alloc_pd(device->ib_device);
87 if (IS_ERR(device->pd)) 74 if (IS_ERR(device->pd))
88 goto pd_err; 75 goto pd_err;
89 76
90 for (i = 0; i < device->cqs_used; i++) { 77 device->rx_cq = ib_create_cq(device->ib_device,
91 cq_desc[i].device = device; 78 iser_cq_callback,
92 cq_desc[i].cq_index = i; 79 iser_cq_event_callback,
80 (void *)device,
81 ISER_MAX_RX_CQ_LEN, 0);
82 if (IS_ERR(device->rx_cq))
83 goto rx_cq_err;
93 84
94 device->rx_cq[i] = ib_create_cq(device->ib_device, 85 device->tx_cq = ib_create_cq(device->ib_device,
95 iser_cq_callback, 86 NULL, iser_cq_event_callback,
96 iser_cq_event_callback, 87 (void *)device,
97 (void *)&cq_desc[i], 88 ISER_MAX_TX_CQ_LEN, 0);
98 ISER_MAX_RX_CQ_LEN, i);
99 if (IS_ERR(device->rx_cq[i]))
100 goto cq_err;
101 89
102 device->tx_cq[i] = ib_create_cq(device->ib_device, 90 if (IS_ERR(device->tx_cq))
103 NULL, iser_cq_event_callback, 91 goto tx_cq_err;
104 (void *)&cq_desc[i],
105 ISER_MAX_TX_CQ_LEN, i);
106 92
107 if (IS_ERR(device->tx_cq[i])) 93 if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP))
108 goto cq_err; 94 goto cq_arm_err;
109 95
110 if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP)) 96 tasklet_init(&device->cq_tasklet,
111 goto cq_err; 97 iser_cq_tasklet_fn,
112 98 (unsigned long)device);
113 tasklet_init(&device->cq_tasklet[i],
114 iser_cq_tasklet_fn,
115 (unsigned long)&cq_desc[i]);
116 }
117 99
118 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | 100 device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
119 IB_ACCESS_REMOTE_WRITE | 101 IB_ACCESS_REMOTE_WRITE |
@@ -131,19 +113,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
131handler_err: 113handler_err:
132 ib_dereg_mr(device->mr); 114 ib_dereg_mr(device->mr);
133dma_mr_err: 115dma_mr_err:
134 for (j = 0; j < device->cqs_used; j++) 116 tasklet_kill(&device->cq_tasklet);
135 tasklet_kill(&device->cq_tasklet[j]); 117cq_arm_err:
136cq_err: 118 ib_destroy_cq(device->tx_cq);
137 for (j = 0; j < i; j++) { 119tx_cq_err:
138 if (device->tx_cq[j]) 120 ib_destroy_cq(device->rx_cq);
139 ib_destroy_cq(device->tx_cq[j]); 121rx_cq_err:
140 if (device->rx_cq[j])
141 ib_destroy_cq(device->rx_cq[j]);
142 }
143 ib_dealloc_pd(device->pd); 122 ib_dealloc_pd(device->pd);
144pd_err: 123pd_err:
145 kfree(device->cq_desc);
146cq_desc_err:
147 iser_err("failed to allocate an IB resource\n"); 124 iser_err("failed to allocate an IB resource\n");
148 return -1; 125 return -1;
149} 126}
@@ -154,24 +131,18 @@ cq_desc_err:
154 */ 131 */
155static void iser_free_device_ib_res(struct iser_device *device) 132static void iser_free_device_ib_res(struct iser_device *device)
156{ 133{
157 int i;
158 BUG_ON(device->mr == NULL); 134 BUG_ON(device->mr == NULL);
159 135
160 for (i = 0; i < device->cqs_used; i++) { 136 tasklet_kill(&device->cq_tasklet);
161 tasklet_kill(&device->cq_tasklet[i]);
162 (void)ib_destroy_cq(device->tx_cq[i]);
163 (void)ib_destroy_cq(device->rx_cq[i]);
164 device->tx_cq[i] = NULL;
165 device->rx_cq[i] = NULL;
166 }
167
168 (void)ib_unregister_event_handler(&device->event_handler); 137 (void)ib_unregister_event_handler(&device->event_handler);
169 (void)ib_dereg_mr(device->mr); 138 (void)ib_dereg_mr(device->mr);
139 (void)ib_destroy_cq(device->tx_cq);
140 (void)ib_destroy_cq(device->rx_cq);
170 (void)ib_dealloc_pd(device->pd); 141 (void)ib_dealloc_pd(device->pd);
171 142
172 kfree(device->cq_desc);
173
174 device->mr = NULL; 143 device->mr = NULL;
144 device->tx_cq = NULL;
145 device->rx_cq = NULL;
175 device->pd = NULL; 146 device->pd = NULL;
176} 147}
177 148
@@ -184,40 +155,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
184{ 155{
185 struct iser_device *device; 156 struct iser_device *device;
186 struct ib_qp_init_attr init_attr; 157 struct ib_qp_init_attr init_attr;
187 int req_err, resp_err, ret = -ENOMEM; 158 int ret = -ENOMEM;
188 struct ib_fmr_pool_param params; 159 struct ib_fmr_pool_param params;
189 int index, min_index = 0;
190 160
191 BUG_ON(ib_conn->device == NULL); 161 BUG_ON(ib_conn->device == NULL);
192 162
193 device = ib_conn->device; 163 device = ib_conn->device;
194 164
195 ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + 165 ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL);
196 ISER_RX_LOGIN_SIZE, GFP_KERNEL);
197 if (!ib_conn->login_buf) 166 if (!ib_conn->login_buf)
198 goto out_err; 167 goto out_err;
199 168
200 ib_conn->login_req_buf = ib_conn->login_buf; 169 ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device,
201 ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; 170 (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE,
202 171 DMA_FROM_DEVICE);
203 ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
204 (void *)ib_conn->login_req_buf,
205 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
206
207 ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
208 (void *)ib_conn->login_resp_buf,
209 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
210
211 req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
212 resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
213
214 if (req_err || resp_err) {
215 if (req_err)
216 ib_conn->login_req_dma = 0;
217 if (resp_err)
218 ib_conn->login_resp_dma = 0;
219 goto out_err;
220 }
221 172
222 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + 173 ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
223 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), 174 (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
@@ -250,20 +201,10 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
250 201
251 memset(&init_attr, 0, sizeof init_attr); 202 memset(&init_attr, 0, sizeof init_attr);
252 203
253 mutex_lock(&ig.connlist_mutex);
254 /* select the CQ with the minimal number of usages */
255 for (index = 0; index < device->cqs_used; index++)
256 if (device->cq_active_qps[index] <
257 device->cq_active_qps[min_index])
258 min_index = index;
259 device->cq_active_qps[min_index]++;
260 mutex_unlock(&ig.connlist_mutex);
261 iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
262
263 init_attr.event_handler = iser_qp_event_callback; 204 init_attr.event_handler = iser_qp_event_callback;
264 init_attr.qp_context = (void *)ib_conn; 205 init_attr.qp_context = (void *)ib_conn;
265 init_attr.send_cq = device->tx_cq[min_index]; 206 init_attr.send_cq = device->tx_cq;
266 init_attr.recv_cq = device->rx_cq[min_index]; 207 init_attr.recv_cq = device->rx_cq;
267 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; 208 init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
268 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; 209 init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
269 init_attr.cap.max_send_sge = 2; 210 init_attr.cap.max_send_sge = 2;
@@ -292,7 +233,6 @@ out_err:
292 */ 233 */
293static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) 234static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
294{ 235{
295 int cq_index;
296 BUG_ON(ib_conn == NULL); 236 BUG_ON(ib_conn == NULL);
297 237
298 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", 238 iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
@@ -303,12 +243,9 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
303 if (ib_conn->fmr_pool != NULL) 243 if (ib_conn->fmr_pool != NULL)
304 ib_destroy_fmr_pool(ib_conn->fmr_pool); 244 ib_destroy_fmr_pool(ib_conn->fmr_pool);
305 245
306 if (ib_conn->qp != NULL) { 246 if (ib_conn->qp != NULL)
307 cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
308 ib_conn->device->cq_active_qps[cq_index]--;
309
310 rdma_destroy_qp(ib_conn->cma_id); 247 rdma_destroy_qp(ib_conn->cma_id);
311 } 248
312 /* if cma handler context, the caller acts s.t the cma destroy the id */ 249 /* if cma handler context, the caller acts s.t the cma destroy the id */
313 if (ib_conn->cma_id != NULL && can_destroy_id) 250 if (ib_conn->cma_id != NULL && can_destroy_id)
314 rdma_destroy_id(ib_conn->cma_id); 251 rdma_destroy_id(ib_conn->cma_id);
@@ -318,18 +255,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
318 ib_conn->cma_id = NULL; 255 ib_conn->cma_id = NULL;
319 kfree(ib_conn->page_vec); 256 kfree(ib_conn->page_vec);
320 257
321 if (ib_conn->login_buf) {
322 if (ib_conn->login_req_dma)
323 ib_dma_unmap_single(ib_conn->device->ib_device,
324 ib_conn->login_req_dma,
325 ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
326 if (ib_conn->login_resp_dma)
327 ib_dma_unmap_single(ib_conn->device->ib_device,
328 ib_conn->login_resp_dma,
329 ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
330 kfree(ib_conn->login_buf);
331 }
332
333 return 0; 258 return 0;
334} 259}
335 260
@@ -657,9 +582,8 @@ id_failure:
657 ib_conn->cma_id = NULL; 582 ib_conn->cma_id = NULL;
658addr_failure: 583addr_failure:
659 ib_conn->state = ISER_CONN_DOWN; 584 ib_conn->state = ISER_CONN_DOWN;
660 iser_conn_put(ib_conn, 1); /* deref ib conn's cma id */
661connect_failure: 585connect_failure:
662 iser_conn_put(ib_conn, 1); /* deref ib conn deallocate */ 586 iser_conn_release(ib_conn, 1);
663 return err; 587 return err;
664} 588}
665 589
@@ -734,11 +658,11 @@ int iser_post_recvl(struct iser_conn *ib_conn)
734 struct ib_sge sge; 658 struct ib_sge sge;
735 int ib_ret; 659 int ib_ret;
736 660
737 sge.addr = ib_conn->login_resp_dma; 661 sge.addr = ib_conn->login_dma;
738 sge.length = ISER_RX_LOGIN_SIZE; 662 sge.length = ISER_RX_LOGIN_SIZE;
739 sge.lkey = ib_conn->device->mr->lkey; 663 sge.lkey = ib_conn->device->mr->lkey;
740 664
741 rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; 665 rx_wr.wr_id = (unsigned long)ib_conn->login_buf;
742 rx_wr.sg_list = &sge; 666 rx_wr.sg_list = &sge;
743 rx_wr.num_sge = 1; 667 rx_wr.num_sge = 1;
744 rx_wr.next = NULL; 668 rx_wr.next = NULL;
@@ -835,9 +759,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
835 } 759 }
836} 760}
837 761
838static int iser_drain_tx_cq(struct iser_device *device, int cq_index) 762static int iser_drain_tx_cq(struct iser_device *device)
839{ 763{
840 struct ib_cq *cq = device->tx_cq[cq_index]; 764 struct ib_cq *cq = device->tx_cq;
841 struct ib_wc wc; 765 struct ib_wc wc;
842 struct iser_tx_desc *tx_desc; 766 struct iser_tx_desc *tx_desc;
843 struct iser_conn *ib_conn; 767 struct iser_conn *ib_conn;
@@ -866,10 +790,8 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
866 790
867static void iser_cq_tasklet_fn(unsigned long data) 791static void iser_cq_tasklet_fn(unsigned long data)
868{ 792{
869 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data; 793 struct iser_device *device = (struct iser_device *)data;
870 struct iser_device *device = cq_desc->device; 794 struct ib_cq *cq = device->rx_cq;
871 int cq_index = cq_desc->cq_index;
872 struct ib_cq *cq = device->rx_cq[cq_index];
873 struct ib_wc wc; 795 struct ib_wc wc;
874 struct iser_rx_desc *desc; 796 struct iser_rx_desc *desc;
875 unsigned long xfer_len; 797 unsigned long xfer_len;
@@ -897,21 +819,19 @@ static void iser_cq_tasklet_fn(unsigned long data)
897 } 819 }
898 completed_rx++; 820 completed_rx++;
899 if (!(completed_rx & 63)) 821 if (!(completed_rx & 63))
900 completed_tx += iser_drain_tx_cq(device, cq_index); 822 completed_tx += iser_drain_tx_cq(device);
901 } 823 }
902 /* #warning "it is assumed here that arming CQ only once its empty" * 824 /* #warning "it is assumed here that arming CQ only once its empty" *
903 * " would not cause interrupts to be missed" */ 825 * " would not cause interrupts to be missed" */
904 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 826 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
905 827
906 completed_tx += iser_drain_tx_cq(device, cq_index); 828 completed_tx += iser_drain_tx_cq(device);
907 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); 829 iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
908} 830}
909 831
910static void iser_cq_callback(struct ib_cq *cq, void *cq_context) 832static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
911{ 833{
912 struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context; 834 struct iser_device *device = (struct iser_device *)cq_context;
913 struct iser_device *device = cq_desc->device;
914 int cq_index = cq_desc->cq_index;
915 835
916 tasklet_schedule(&device->cq_tasklet[cq_index]); 836 tasklet_schedule(&device->cq_tasklet);
917} 837}
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index d5088ce7829..0bfa545675b 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -30,8 +30,6 @@
30 * SOFTWARE. 30 * SOFTWARE.
31 */ 31 */
32 32
33#define pr_fmt(fmt) PFX fmt
34
35#include <linux/module.h> 33#include <linux/module.h>
36#include <linux/init.h> 34#include <linux/init.h>
37#include <linux/slab.h> 35#include <linux/slab.h>
@@ -167,7 +165,7 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
167 165
168static void srp_qp_event(struct ib_event *event, void *context) 166static void srp_qp_event(struct ib_event *event, void *context)
169{ 167{
170 pr_debug("QP event %d\n", event->event); 168 printk(KERN_ERR PFX "QP event %d\n", event->event);
171} 169}
172 170
173static int srp_init_qp(struct srp_target_port *target, 171static int srp_init_qp(struct srp_target_port *target,
@@ -222,29 +220,27 @@ static int srp_new_cm_id(struct srp_target_port *target)
222static int srp_create_target_ib(struct srp_target_port *target) 220static int srp_create_target_ib(struct srp_target_port *target)
223{ 221{
224 struct ib_qp_init_attr *init_attr; 222 struct ib_qp_init_attr *init_attr;
225 struct ib_cq *recv_cq, *send_cq;
226 struct ib_qp *qp;
227 int ret; 223 int ret;
228 224
229 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); 225 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
230 if (!init_attr) 226 if (!init_attr)
231 return -ENOMEM; 227 return -ENOMEM;
232 228
233 recv_cq = ib_create_cq(target->srp_host->srp_dev->dev, 229 target->recv_cq = ib_create_cq(target->srp_host->srp_dev->dev,
234 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0); 230 srp_recv_completion, NULL, target, SRP_RQ_SIZE, 0);
235 if (IS_ERR(recv_cq)) { 231 if (IS_ERR(target->recv_cq)) {
236 ret = PTR_ERR(recv_cq); 232 ret = PTR_ERR(target->recv_cq);
237 goto err; 233 goto err;
238 } 234 }
239 235
240 send_cq = ib_create_cq(target->srp_host->srp_dev->dev, 236 target->send_cq = ib_create_cq(target->srp_host->srp_dev->dev,
241 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0); 237 srp_send_completion, NULL, target, SRP_SQ_SIZE, 0);
242 if (IS_ERR(send_cq)) { 238 if (IS_ERR(target->send_cq)) {
243 ret = PTR_ERR(send_cq); 239 ret = PTR_ERR(target->send_cq);
244 goto err_recv_cq; 240 goto err_recv_cq;
245 } 241 }
246 242
247 ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); 243 ib_req_notify_cq(target->recv_cq, IB_CQ_NEXT_COMP);
248 244
249 init_attr->event_handler = srp_qp_event; 245 init_attr->event_handler = srp_qp_event;
250 init_attr->cap.max_send_wr = SRP_SQ_SIZE; 246 init_attr->cap.max_send_wr = SRP_SQ_SIZE;
@@ -253,41 +249,30 @@ static int srp_create_target_ib(struct srp_target_port *target)
253 init_attr->cap.max_send_sge = 1; 249 init_attr->cap.max_send_sge = 1;
254 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; 250 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
255 init_attr->qp_type = IB_QPT_RC; 251 init_attr->qp_type = IB_QPT_RC;
256 init_attr->send_cq = send_cq; 252 init_attr->send_cq = target->send_cq;
257 init_attr->recv_cq = recv_cq; 253 init_attr->recv_cq = target->recv_cq;
258 254
259 qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr); 255 target->qp = ib_create_qp(target->srp_host->srp_dev->pd, init_attr);
260 if (IS_ERR(qp)) { 256 if (IS_ERR(target->qp)) {
261 ret = PTR_ERR(qp); 257 ret = PTR_ERR(target->qp);
262 goto err_send_cq; 258 goto err_send_cq;
263 } 259 }
264 260
265 ret = srp_init_qp(target, qp); 261 ret = srp_init_qp(target, target->qp);
266 if (ret) 262 if (ret)
267 goto err_qp; 263 goto err_qp;
268 264
269 if (target->qp)
270 ib_destroy_qp(target->qp);
271 if (target->recv_cq)
272 ib_destroy_cq(target->recv_cq);
273 if (target->send_cq)
274 ib_destroy_cq(target->send_cq);
275
276 target->qp = qp;
277 target->recv_cq = recv_cq;
278 target->send_cq = send_cq;
279
280 kfree(init_attr); 265 kfree(init_attr);
281 return 0; 266 return 0;
282 267
283err_qp: 268err_qp:
284 ib_destroy_qp(qp); 269 ib_destroy_qp(target->qp);
285 270
286err_send_cq: 271err_send_cq:
287 ib_destroy_cq(send_cq); 272 ib_destroy_cq(target->send_cq);
288 273
289err_recv_cq: 274err_recv_cq:
290 ib_destroy_cq(recv_cq); 275 ib_destroy_cq(target->recv_cq);
291 276
292err: 277err:
293 kfree(init_attr); 278 kfree(init_attr);
@@ -302,9 +287,6 @@ static void srp_free_target_ib(struct srp_target_port *target)
302 ib_destroy_cq(target->send_cq); 287 ib_destroy_cq(target->send_cq);
303 ib_destroy_cq(target->recv_cq); 288 ib_destroy_cq(target->recv_cq);
304 289
305 target->qp = NULL;
306 target->send_cq = target->recv_cq = NULL;
307
308 for (i = 0; i < SRP_RQ_SIZE; ++i) 290 for (i = 0; i < SRP_RQ_SIZE; ++i)
309 srp_free_iu(target->srp_host, target->rx_ring[i]); 291 srp_free_iu(target->srp_host, target->rx_ring[i]);
310 for (i = 0; i < SRP_SQ_SIZE; ++i) 292 for (i = 0; i < SRP_SQ_SIZE; ++i)
@@ -444,50 +426,34 @@ static int srp_send_req(struct srp_target_port *target)
444 return status; 426 return status;
445} 427}
446 428
447static bool srp_queue_remove_work(struct srp_target_port *target) 429static void srp_disconnect_target(struct srp_target_port *target)
448{ 430{
449 bool changed = false; 431 /* XXX should send SRP_I_LOGOUT request */
450 432
451 spin_lock_irq(&target->lock); 433 init_completion(&target->done);
452 if (target->state != SRP_TARGET_REMOVED) { 434 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
453 target->state = SRP_TARGET_REMOVED; 435 shost_printk(KERN_DEBUG, target->scsi_host,
454 changed = true; 436 PFX "Sending CM DREQ failed\n");
437 return;
455 } 438 }
456 spin_unlock_irq(&target->lock); 439 wait_for_completion(&target->done);
457
458 if (changed)
459 queue_work(system_long_wq, &target->remove_work);
460
461 return changed;
462} 440}
463 441
464static bool srp_change_conn_state(struct srp_target_port *target, 442static bool srp_change_state(struct srp_target_port *target,
465 bool connected) 443 enum srp_target_state old,
444 enum srp_target_state new)
466{ 445{
467 bool changed = false; 446 bool changed = false;
468 447
469 spin_lock_irq(&target->lock); 448 spin_lock_irq(&target->lock);
470 if (target->connected != connected) { 449 if (target->state == old) {
471 target->connected = connected; 450 target->state = new;
472 changed = true; 451 changed = true;
473 } 452 }
474 spin_unlock_irq(&target->lock); 453 spin_unlock_irq(&target->lock);
475
476 return changed; 454 return changed;
477} 455}
478 456
479static void srp_disconnect_target(struct srp_target_port *target)
480{
481 if (srp_change_conn_state(target, false)) {
482 /* XXX should send SRP_I_LOGOUT request */
483
484 if (ib_send_cm_dreq(target->cm_id, NULL, 0)) {
485 shost_printk(KERN_DEBUG, target->scsi_host,
486 PFX "Sending CM DREQ failed\n");
487 }
488 }
489}
490
491static void srp_free_req_data(struct srp_target_port *target) 457static void srp_free_req_data(struct srp_target_port *target)
492{ 458{
493 struct ib_device *ibdev = target->srp_host->srp_dev->dev; 459 struct ib_device *ibdev = target->srp_host->srp_dev->dev;
@@ -506,65 +472,31 @@ static void srp_free_req_data(struct srp_target_port *target)
506 } 472 }
507} 473}
508 474
509/** 475static void srp_remove_work(struct work_struct *work)
510 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
511 * @shost: SCSI host whose attributes to remove from sysfs.
512 *
513 * Note: Any attributes defined in the host template and that did not exist
514 * before invocation of this function will be ignored.
515 */
516static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
517{ 476{
518 struct device_attribute **attr; 477 struct srp_target_port *target =
478 container_of(work, struct srp_target_port, work);
519 479
520 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) 480 if (!srp_change_state(target, SRP_TARGET_DEAD, SRP_TARGET_REMOVED))
521 device_remove_file(&shost->shost_dev, *attr); 481 return;
522}
523 482
524static void srp_remove_target(struct srp_target_port *target) 483 spin_lock(&target->srp_host->target_lock);
525{ 484 list_del(&target->list);
526 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED); 485 spin_unlock(&target->srp_host->target_lock);
527 486
528 srp_del_scsi_host_attr(target->scsi_host);
529 srp_remove_host(target->scsi_host); 487 srp_remove_host(target->scsi_host);
530 scsi_remove_host(target->scsi_host); 488 scsi_remove_host(target->scsi_host);
531 srp_disconnect_target(target);
532 ib_destroy_cm_id(target->cm_id); 489 ib_destroy_cm_id(target->cm_id);
533 srp_free_target_ib(target); 490 srp_free_target_ib(target);
534 srp_free_req_data(target); 491 srp_free_req_data(target);
535 scsi_host_put(target->scsi_host); 492 scsi_host_put(target->scsi_host);
536} 493}
537 494
538static void srp_remove_work(struct work_struct *work)
539{
540 struct srp_target_port *target =
541 container_of(work, struct srp_target_port, remove_work);
542
543 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
544
545 spin_lock(&target->srp_host->target_lock);
546 list_del(&target->list);
547 spin_unlock(&target->srp_host->target_lock);
548
549 srp_remove_target(target);
550}
551
552static void srp_rport_delete(struct srp_rport *rport)
553{
554 struct srp_target_port *target = rport->lld_data;
555
556 srp_queue_remove_work(target);
557}
558
559static int srp_connect_target(struct srp_target_port *target) 495static int srp_connect_target(struct srp_target_port *target)
560{ 496{
561 int retries = 3; 497 int retries = 3;
562 int ret; 498 int ret;
563 499
564 WARN_ON_ONCE(target->connected);
565
566 target->qp_in_error = false;
567
568 ret = srp_lookup_path(target); 500 ret = srp_lookup_path(target);
569 if (ret) 501 if (ret)
570 return ret; 502 return ret;
@@ -584,7 +516,6 @@ static int srp_connect_target(struct srp_target_port *target)
584 */ 516 */
585 switch (target->status) { 517 switch (target->status) {
586 case 0: 518 case 0:
587 srp_change_conn_state(target, true);
588 return 0; 519 return 0;
589 520
590 case SRP_PORT_REDIRECT: 521 case SRP_PORT_REDIRECT:
@@ -637,74 +568,35 @@ static void srp_unmap_data(struct scsi_cmnd *scmnd,
637 scmnd->sc_data_direction); 568 scmnd->sc_data_direction);
638} 569}
639 570
640/** 571static void srp_remove_req(struct srp_target_port *target,
641 * srp_claim_req - Take ownership of the scmnd associated with a request. 572 struct srp_request *req, s32 req_lim_delta)
642 * @target: SRP target port.
643 * @req: SRP request.
644 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
645 * ownership of @req->scmnd if it equals @scmnd.
646 *
647 * Return value:
648 * Either NULL or a pointer to the SCSI command the caller became owner of.
649 */
650static struct scsi_cmnd *srp_claim_req(struct srp_target_port *target,
651 struct srp_request *req,
652 struct scsi_cmnd *scmnd)
653{ 573{
654 unsigned long flags; 574 unsigned long flags;
655 575
656 spin_lock_irqsave(&target->lock, flags); 576 srp_unmap_data(req->scmnd, target, req);
657 if (!scmnd) {
658 scmnd = req->scmnd;
659 req->scmnd = NULL;
660 } else if (req->scmnd == scmnd) {
661 req->scmnd = NULL;
662 } else {
663 scmnd = NULL;
664 }
665 spin_unlock_irqrestore(&target->lock, flags);
666
667 return scmnd;
668}
669
670/**
671 * srp_free_req() - Unmap data and add request to the free request list.
672 */
673static void srp_free_req(struct srp_target_port *target,
674 struct srp_request *req, struct scsi_cmnd *scmnd,
675 s32 req_lim_delta)
676{
677 unsigned long flags;
678
679 srp_unmap_data(scmnd, target, req);
680
681 spin_lock_irqsave(&target->lock, flags); 577 spin_lock_irqsave(&target->lock, flags);
682 target->req_lim += req_lim_delta; 578 target->req_lim += req_lim_delta;
579 req->scmnd = NULL;
683 list_add_tail(&req->list, &target->free_reqs); 580 list_add_tail(&req->list, &target->free_reqs);
684 spin_unlock_irqrestore(&target->lock, flags); 581 spin_unlock_irqrestore(&target->lock, flags);
685} 582}
686 583
687static void srp_reset_req(struct srp_target_port *target, struct srp_request *req) 584static void srp_reset_req(struct srp_target_port *target, struct srp_request *req)
688{ 585{
689 struct scsi_cmnd *scmnd = srp_claim_req(target, req, NULL); 586 req->scmnd->result = DID_RESET << 16;
690 587 req->scmnd->scsi_done(req->scmnd);
691 if (scmnd) { 588 srp_remove_req(target, req, 0);
692 srp_free_req(target, req, scmnd, 0);
693 scmnd->result = DID_RESET << 16;
694 scmnd->scsi_done(scmnd);
695 }
696} 589}
697 590
698static int srp_reconnect_target(struct srp_target_port *target) 591static int srp_reconnect_target(struct srp_target_port *target)
699{ 592{
700 struct Scsi_Host *shost = target->scsi_host; 593 struct ib_qp_attr qp_attr;
594 struct ib_wc wc;
701 int i, ret; 595 int i, ret;
702 596
703 if (target->state != SRP_TARGET_LIVE) 597 if (!srp_change_state(target, SRP_TARGET_LIVE, SRP_TARGET_CONNECTING))
704 return -EAGAIN; 598 return -EAGAIN;
705 599
706 scsi_target_block(&shost->shost_gendev);
707
708 srp_disconnect_target(target); 600 srp_disconnect_target(target);
709 /* 601 /*
710 * Now get a new local CM ID so that we avoid confusing the 602 * Now get a new local CM ID so that we avoid confusing the
@@ -712,11 +604,21 @@ static int srp_reconnect_target(struct srp_target_port *target)
712 */ 604 */
713 ret = srp_new_cm_id(target); 605 ret = srp_new_cm_id(target);
714 if (ret) 606 if (ret)
715 goto unblock; 607 goto err;
716 608
717 ret = srp_create_target_ib(target); 609 qp_attr.qp_state = IB_QPS_RESET;
610 ret = ib_modify_qp(target->qp, &qp_attr, IB_QP_STATE);
611 if (ret)
612 goto err;
613
614 ret = srp_init_qp(target, target->qp);
718 if (ret) 615 if (ret)
719 goto unblock; 616 goto err;
617
618 while (ib_poll_cq(target->recv_cq, 1, &wc) > 0)
619 ; /* nothing */
620 while (ib_poll_cq(target->send_cq, 1, &wc) > 0)
621 ; /* nothing */
720 622
721 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) { 623 for (i = 0; i < SRP_CMD_SQ_SIZE; ++i) {
722 struct srp_request *req = &target->req_ring[i]; 624 struct srp_request *req = &target->req_ring[i];
@@ -728,16 +630,13 @@ static int srp_reconnect_target(struct srp_target_port *target)
728 for (i = 0; i < SRP_SQ_SIZE; ++i) 630 for (i = 0; i < SRP_SQ_SIZE; ++i)
729 list_add(&target->tx_ring[i]->list, &target->free_tx); 631 list_add(&target->tx_ring[i]->list, &target->free_tx);
730 632
633 target->qp_in_error = 0;
731 ret = srp_connect_target(target); 634 ret = srp_connect_target(target);
732
733unblock:
734 scsi_target_unblock(&shost->shost_gendev, ret == 0 ? SDEV_RUNNING :
735 SDEV_TRANSPORT_OFFLINE);
736
737 if (ret) 635 if (ret)
738 goto err; 636 goto err;
739 637
740 shost_printk(KERN_INFO, target->scsi_host, PFX "reconnect succeeded\n"); 638 if (!srp_change_state(target, SRP_TARGET_CONNECTING, SRP_TARGET_LIVE))
639 ret = -EAGAIN;
741 640
742 return ret; 641 return ret;
743 642
@@ -750,8 +649,17 @@ err:
750 * However, we have to defer the real removal because we 649 * However, we have to defer the real removal because we
751 * are in the context of the SCSI error handler now, which 650 * are in the context of the SCSI error handler now, which
752 * will deadlock if we call scsi_remove_host(). 651 * will deadlock if we call scsi_remove_host().
652 *
653 * Schedule our work inside the lock to avoid a race with
654 * the flush_scheduled_work() in srp_remove_one().
753 */ 655 */
754 srp_queue_remove_work(target); 656 spin_lock_irq(&target->lock);
657 if (target->state == SRP_TARGET_CONNECTING) {
658 target->state = SRP_TARGET_DEAD;
659 INIT_WORK(&target->work, srp_remove_work);
660 queue_work(ib_wq, &target->work);
661 }
662 spin_unlock_irq(&target->lock);
755 663
756 return ret; 664 return ret;
757} 665}
@@ -1147,18 +1055,11 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1147 complete(&target->tsk_mgmt_done); 1055 complete(&target->tsk_mgmt_done);
1148 } else { 1056 } else {
1149 req = &target->req_ring[rsp->tag]; 1057 req = &target->req_ring[rsp->tag];
1150 scmnd = srp_claim_req(target, req, NULL); 1058 scmnd = req->scmnd;
1151 if (!scmnd) { 1059 if (!scmnd)
1152 shost_printk(KERN_ERR, target->scsi_host, 1060 shost_printk(KERN_ERR, target->scsi_host,
1153 "Null scmnd for RSP w/tag %016llx\n", 1061 "Null scmnd for RSP w/tag %016llx\n",
1154 (unsigned long long) rsp->tag); 1062 (unsigned long long) rsp->tag);
1155
1156 spin_lock_irqsave(&target->lock, flags);
1157 target->req_lim += be32_to_cpu(rsp->req_lim_delta);
1158 spin_unlock_irqrestore(&target->lock, flags);
1159
1160 return;
1161 }
1162 scmnd->result = rsp->status; 1063 scmnd->result = rsp->status;
1163 1064
1164 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) { 1065 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
@@ -1173,9 +1074,7 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1173 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) 1074 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
1174 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1075 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1175 1076
1176 srp_free_req(target, req, scmnd, 1077 srp_remove_req(target, req, be32_to_cpu(rsp->req_lim_delta));
1177 be32_to_cpu(rsp->req_lim_delta));
1178
1179 scmnd->host_scribble = NULL; 1078 scmnd->host_scribble = NULL;
1180 scmnd->scsi_done(scmnd); 1079 scmnd->scsi_done(scmnd);
1181 } 1080 }
@@ -1298,19 +1197,6 @@ static void srp_handle_recv(struct srp_target_port *target, struct ib_wc *wc)
1298 PFX "Recv failed with error code %d\n", res); 1197 PFX "Recv failed with error code %d\n", res);
1299} 1198}
1300 1199
1301static void srp_handle_qp_err(enum ib_wc_status wc_status,
1302 enum ib_wc_opcode wc_opcode,
1303 struct srp_target_port *target)
1304{
1305 if (target->connected && !target->qp_in_error) {
1306 shost_printk(KERN_ERR, target->scsi_host,
1307 PFX "failed %s status %d\n",
1308 wc_opcode & IB_WC_RECV ? "receive" : "send",
1309 wc_status);
1310 }
1311 target->qp_in_error = true;
1312}
1313
1314static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) 1200static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1315{ 1201{
1316 struct srp_target_port *target = target_ptr; 1202 struct srp_target_port *target = target_ptr;
@@ -1318,11 +1204,15 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
1318 1204
1319 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 1205 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
1320 while (ib_poll_cq(cq, 1, &wc) > 0) { 1206 while (ib_poll_cq(cq, 1, &wc) > 0) {
1321 if (likely(wc.status == IB_WC_SUCCESS)) { 1207 if (wc.status) {
1322 srp_handle_recv(target, &wc); 1208 shost_printk(KERN_ERR, target->scsi_host,
1323 } else { 1209 PFX "failed receive status %d\n",
1324 srp_handle_qp_err(wc.status, wc.opcode, target); 1210 wc.status);
1211 target->qp_in_error = 1;
1212 break;
1325 } 1213 }
1214
1215 srp_handle_recv(target, &wc);
1326 } 1216 }
1327} 1217}
1328 1218
@@ -1333,12 +1223,16 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
1333 struct srp_iu *iu; 1223 struct srp_iu *iu;
1334 1224
1335 while (ib_poll_cq(cq, 1, &wc) > 0) { 1225 while (ib_poll_cq(cq, 1, &wc) > 0) {
1336 if (likely(wc.status == IB_WC_SUCCESS)) { 1226 if (wc.status) {
1337 iu = (struct srp_iu *) (uintptr_t) wc.wr_id; 1227 shost_printk(KERN_ERR, target->scsi_host,
1338 list_add(&iu->list, &target->free_tx); 1228 PFX "failed send status %d\n",
1339 } else { 1229 wc.status);
1340 srp_handle_qp_err(wc.status, wc.opcode, target); 1230 target->qp_in_error = 1;
1231 break;
1341 } 1232 }
1233
1234 iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
1235 list_add(&iu->list, &target->free_tx);
1342 } 1236 }
1343} 1237}
1344 1238
@@ -1352,6 +1246,16 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
1352 unsigned long flags; 1246 unsigned long flags;
1353 int len; 1247 int len;
1354 1248
1249 if (target->state == SRP_TARGET_CONNECTING)
1250 goto err;
1251
1252 if (target->state == SRP_TARGET_DEAD ||
1253 target->state == SRP_TARGET_REMOVED) {
1254 scmnd->result = DID_BAD_TARGET << 16;
1255 scmnd->scsi_done(scmnd);
1256 return 0;
1257 }
1258
1355 spin_lock_irqsave(&target->lock, flags); 1259 spin_lock_irqsave(&target->lock, flags);
1356 iu = __srp_get_tx_iu(target, SRP_IU_CMD); 1260 iu = __srp_get_tx_iu(target, SRP_IU_CMD);
1357 if (!iu) 1261 if (!iu)
@@ -1408,6 +1312,7 @@ err_iu:
1408err_unlock: 1312err_unlock:
1409 spin_unlock_irqrestore(&target->lock, flags); 1313 spin_unlock_irqrestore(&target->lock, flags);
1410 1314
1315err:
1411 return SCSI_MLQUEUE_HOST_BUSY; 1316 return SCSI_MLQUEUE_HOST_BUSY;
1412} 1317}
1413 1318
@@ -1449,33 +1354,6 @@ err:
1449 return -ENOMEM; 1354 return -ENOMEM;
1450} 1355}
1451 1356
1452static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
1453{
1454 uint64_t T_tr_ns, max_compl_time_ms;
1455 uint32_t rq_tmo_jiffies;
1456
1457 /*
1458 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
1459 * table 91), both the QP timeout and the retry count have to be set
1460 * for RC QP's during the RTR to RTS transition.
1461 */
1462 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
1463 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
1464
1465 /*
1466 * Set target->rq_tmo_jiffies to one second more than the largest time
1467 * it can take before an error completion is generated. See also
1468 * C9-140..142 in the IBTA spec for more information about how to
1469 * convert the QP Local ACK Timeout value to nanoseconds.
1470 */
1471 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
1472 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
1473 do_div(max_compl_time_ms, NSEC_PER_MSEC);
1474 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
1475
1476 return rq_tmo_jiffies;
1477}
1478
1479static void srp_cm_rep_handler(struct ib_cm_id *cm_id, 1357static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1480 struct srp_login_rsp *lrsp, 1358 struct srp_login_rsp *lrsp,
1481 struct srp_target_port *target) 1359 struct srp_target_port *target)
@@ -1535,8 +1413,6 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
1535 if (ret) 1413 if (ret)
1536 goto error_free; 1414 goto error_free;
1537 1415
1538 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
1539
1540 ret = ib_modify_qp(target->qp, qp_attr, attr_mask); 1416 ret = ib_modify_qp(target->qp, qp_attr, attr_mask);
1541 if (ret) 1417 if (ret)
1542 goto error_free; 1418 goto error_free;
@@ -1658,7 +1534,6 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1658 case IB_CM_DREQ_RECEIVED: 1534 case IB_CM_DREQ_RECEIVED:
1659 shost_printk(KERN_WARNING, target->scsi_host, 1535 shost_printk(KERN_WARNING, target->scsi_host,
1660 PFX "DREQ received - connection closed\n"); 1536 PFX "DREQ received - connection closed\n");
1661 srp_change_conn_state(target, false);
1662 if (ib_send_cm_drep(cm_id, NULL, 0)) 1537 if (ib_send_cm_drep(cm_id, NULL, 0))
1663 shost_printk(KERN_ERR, target->scsi_host, 1538 shost_printk(KERN_ERR, target->scsi_host,
1664 PFX "Sending CM DREP failed\n"); 1539 PFX "Sending CM DREP failed\n");
@@ -1668,6 +1543,7 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1668 shost_printk(KERN_ERR, target->scsi_host, 1543 shost_printk(KERN_ERR, target->scsi_host,
1669 PFX "connection closed\n"); 1544 PFX "connection closed\n");
1670 1545
1546 comp = 1;
1671 target->status = 0; 1547 target->status = 0;
1672 break; 1548 break;
1673 1549
@@ -1695,6 +1571,10 @@ static int srp_send_tsk_mgmt(struct srp_target_port *target,
1695 struct srp_iu *iu; 1571 struct srp_iu *iu;
1696 struct srp_tsk_mgmt *tsk_mgmt; 1572 struct srp_tsk_mgmt *tsk_mgmt;
1697 1573
1574 if (target->state == SRP_TARGET_DEAD ||
1575 target->state == SRP_TARGET_REMOVED)
1576 return -1;
1577
1698 init_completion(&target->tsk_mgmt_done); 1578 init_completion(&target->tsk_mgmt_done);
1699 1579
1700 spin_lock_irq(&target->lock); 1580 spin_lock_irq(&target->lock);
@@ -1733,18 +1613,25 @@ static int srp_abort(struct scsi_cmnd *scmnd)
1733{ 1613{
1734 struct srp_target_port *target = host_to_target(scmnd->device->host); 1614 struct srp_target_port *target = host_to_target(scmnd->device->host);
1735 struct srp_request *req = (struct srp_request *) scmnd->host_scribble; 1615 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
1616 int ret = SUCCESS;
1736 1617
1737 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n"); 1618 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
1738 1619
1739 if (!req || target->qp_in_error || !srp_claim_req(target, req, scmnd)) 1620 if (!req || target->qp_in_error)
1621 return FAILED;
1622 if (srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1623 SRP_TSK_ABORT_TASK))
1740 return FAILED; 1624 return FAILED;
1741 srp_send_tsk_mgmt(target, req->index, scmnd->device->lun,
1742 SRP_TSK_ABORT_TASK);
1743 srp_free_req(target, req, scmnd, 0);
1744 scmnd->result = DID_ABORT << 16;
1745 scmnd->scsi_done(scmnd);
1746 1625
1747 return SUCCESS; 1626 if (req->scmnd) {
1627 if (!target->tsk_mgmt_status) {
1628 srp_remove_req(target, req, 0);
1629 scmnd->result = DID_ABORT << 16;
1630 } else
1631 ret = FAILED;
1632 }
1633
1634 return ret;
1748} 1635}
1749 1636
1750static int srp_reset_device(struct scsi_cmnd *scmnd) 1637static int srp_reset_device(struct scsi_cmnd *scmnd)
@@ -1784,26 +1671,15 @@ static int srp_reset_host(struct scsi_cmnd *scmnd)
1784 return ret; 1671 return ret;
1785} 1672}
1786 1673
1787static int srp_slave_configure(struct scsi_device *sdev)
1788{
1789 struct Scsi_Host *shost = sdev->host;
1790 struct srp_target_port *target = host_to_target(shost);
1791 struct request_queue *q = sdev->request_queue;
1792 unsigned long timeout;
1793
1794 if (sdev->type == TYPE_DISK) {
1795 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
1796 blk_queue_rq_timeout(q, timeout);
1797 }
1798
1799 return 0;
1800}
1801
1802static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, 1674static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
1803 char *buf) 1675 char *buf)
1804{ 1676{
1805 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1677 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1806 1678
1679 if (target->state == SRP_TARGET_DEAD ||
1680 target->state == SRP_TARGET_REMOVED)
1681 return -ENODEV;
1682
1807 return sprintf(buf, "0x%016llx\n", 1683 return sprintf(buf, "0x%016llx\n",
1808 (unsigned long long) be64_to_cpu(target->id_ext)); 1684 (unsigned long long) be64_to_cpu(target->id_ext));
1809} 1685}
@@ -1813,6 +1689,10 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
1813{ 1689{
1814 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1690 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1815 1691
1692 if (target->state == SRP_TARGET_DEAD ||
1693 target->state == SRP_TARGET_REMOVED)
1694 return -ENODEV;
1695
1816 return sprintf(buf, "0x%016llx\n", 1696 return sprintf(buf, "0x%016llx\n",
1817 (unsigned long long) be64_to_cpu(target->ioc_guid)); 1697 (unsigned long long) be64_to_cpu(target->ioc_guid));
1818} 1698}
@@ -1822,6 +1702,10 @@ static ssize_t show_service_id(struct device *dev,
1822{ 1702{
1823 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1703 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1824 1704
1705 if (target->state == SRP_TARGET_DEAD ||
1706 target->state == SRP_TARGET_REMOVED)
1707 return -ENODEV;
1708
1825 return sprintf(buf, "0x%016llx\n", 1709 return sprintf(buf, "0x%016llx\n",
1826 (unsigned long long) be64_to_cpu(target->service_id)); 1710 (unsigned long long) be64_to_cpu(target->service_id));
1827} 1711}
@@ -1831,6 +1715,10 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
1831{ 1715{
1832 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1716 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1833 1717
1718 if (target->state == SRP_TARGET_DEAD ||
1719 target->state == SRP_TARGET_REMOVED)
1720 return -ENODEV;
1721
1834 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); 1722 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey));
1835} 1723}
1836 1724
@@ -1839,6 +1727,10 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
1839{ 1727{
1840 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1728 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1841 1729
1730 if (target->state == SRP_TARGET_DEAD ||
1731 target->state == SRP_TARGET_REMOVED)
1732 return -ENODEV;
1733
1842 return sprintf(buf, "%pI6\n", target->path.dgid.raw); 1734 return sprintf(buf, "%pI6\n", target->path.dgid.raw);
1843} 1735}
1844 1736
@@ -1847,6 +1739,10 @@ static ssize_t show_orig_dgid(struct device *dev,
1847{ 1739{
1848 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1740 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1849 1741
1742 if (target->state == SRP_TARGET_DEAD ||
1743 target->state == SRP_TARGET_REMOVED)
1744 return -ENODEV;
1745
1850 return sprintf(buf, "%pI6\n", target->orig_dgid); 1746 return sprintf(buf, "%pI6\n", target->orig_dgid);
1851} 1747}
1852 1748
@@ -1855,6 +1751,10 @@ static ssize_t show_req_lim(struct device *dev,
1855{ 1751{
1856 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1752 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1857 1753
1754 if (target->state == SRP_TARGET_DEAD ||
1755 target->state == SRP_TARGET_REMOVED)
1756 return -ENODEV;
1757
1858 return sprintf(buf, "%d\n", target->req_lim); 1758 return sprintf(buf, "%d\n", target->req_lim);
1859} 1759}
1860 1760
@@ -1863,6 +1763,10 @@ static ssize_t show_zero_req_lim(struct device *dev,
1863{ 1763{
1864 struct srp_target_port *target = host_to_target(class_to_shost(dev)); 1764 struct srp_target_port *target = host_to_target(class_to_shost(dev));
1865 1765
1766 if (target->state == SRP_TARGET_DEAD ||
1767 target->state == SRP_TARGET_REMOVED)
1768 return -ENODEV;
1769
1866 return sprintf(buf, "%d\n", target->zero_req_lim); 1770 return sprintf(buf, "%d\n", target->zero_req_lim);
1867} 1771}
1868 1772
@@ -1931,7 +1835,6 @@ static struct scsi_host_template srp_template = {
1931 .module = THIS_MODULE, 1835 .module = THIS_MODULE,
1932 .name = "InfiniBand SRP initiator", 1836 .name = "InfiniBand SRP initiator",
1933 .proc_name = DRV_NAME, 1837 .proc_name = DRV_NAME,
1934 .slave_configure = srp_slave_configure,
1935 .info = srp_target_info, 1838 .info = srp_target_info,
1936 .queuecommand = srp_queuecommand, 1839 .queuecommand = srp_queuecommand,
1937 .eh_abort_handler = srp_abort, 1840 .eh_abort_handler = srp_abort,
@@ -1965,14 +1868,11 @@ static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
1965 return PTR_ERR(rport); 1868 return PTR_ERR(rport);
1966 } 1869 }
1967 1870
1968 rport->lld_data = target;
1969
1970 spin_lock(&host->target_lock); 1871 spin_lock(&host->target_lock);
1971 list_add_tail(&target->list, &host->target_list); 1872 list_add_tail(&target->list, &host->target_list);
1972 spin_unlock(&host->target_lock); 1873 spin_unlock(&host->target_lock);
1973 1874
1974 target->state = SRP_TARGET_LIVE; 1875 target->state = SRP_TARGET_LIVE;
1975 target->connected = false;
1976 1876
1977 scsi_scan_target(&target->scsi_host->shost_gendev, 1877 scsi_scan_target(&target->scsi_host->shost_gendev,
1978 0, target->scsi_id, SCAN_WILD_CARD, 0); 1878 0, target->scsi_id, SCAN_WILD_CARD, 0);
@@ -2089,7 +1989,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2089 goto out; 1989 goto out;
2090 } 1990 }
2091 if (strlen(p) != 32) { 1991 if (strlen(p) != 32) {
2092 pr_warn("bad dest GID parameter '%s'\n", p); 1992 printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p);
2093 kfree(p); 1993 kfree(p);
2094 goto out; 1994 goto out;
2095 } 1995 }
@@ -2104,7 +2004,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2104 2004
2105 case SRP_OPT_PKEY: 2005 case SRP_OPT_PKEY:
2106 if (match_hex(args, &token)) { 2006 if (match_hex(args, &token)) {
2107 pr_warn("bad P_Key parameter '%s'\n", p); 2007 printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p);
2108 goto out; 2008 goto out;
2109 } 2009 }
2110 target->path.pkey = cpu_to_be16(token); 2010 target->path.pkey = cpu_to_be16(token);
@@ -2123,7 +2023,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2123 2023
2124 case SRP_OPT_MAX_SECT: 2024 case SRP_OPT_MAX_SECT:
2125 if (match_int(args, &token)) { 2025 if (match_int(args, &token)) {
2126 pr_warn("bad max sect parameter '%s'\n", p); 2026 printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p);
2127 goto out; 2027 goto out;
2128 } 2028 }
2129 target->scsi_host->max_sectors = token; 2029 target->scsi_host->max_sectors = token;
@@ -2131,8 +2031,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2131 2031
2132 case SRP_OPT_MAX_CMD_PER_LUN: 2032 case SRP_OPT_MAX_CMD_PER_LUN:
2133 if (match_int(args, &token)) { 2033 if (match_int(args, &token)) {
2134 pr_warn("bad max cmd_per_lun parameter '%s'\n", 2034 printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p);
2135 p);
2136 goto out; 2035 goto out;
2137 } 2036 }
2138 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); 2037 target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE);
@@ -2140,14 +2039,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2140 2039
2141 case SRP_OPT_IO_CLASS: 2040 case SRP_OPT_IO_CLASS:
2142 if (match_hex(args, &token)) { 2041 if (match_hex(args, &token)) {
2143 pr_warn("bad IO class parameter '%s'\n", p); 2042 printk(KERN_WARNING PFX "bad IO class parameter '%s' \n", p);
2144 goto out; 2043 goto out;
2145 } 2044 }
2146 if (token != SRP_REV10_IB_IO_CLASS && 2045 if (token != SRP_REV10_IB_IO_CLASS &&
2147 token != SRP_REV16A_IB_IO_CLASS) { 2046 token != SRP_REV16A_IB_IO_CLASS) {
2148 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", 2047 printk(KERN_WARNING PFX "unknown IO class parameter value"
2149 token, SRP_REV10_IB_IO_CLASS, 2048 " %x specified (use %x or %x).\n",
2150 SRP_REV16A_IB_IO_CLASS); 2049 token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS);
2151 goto out; 2050 goto out;
2152 } 2051 }
2153 target->io_class = token; 2052 target->io_class = token;
@@ -2165,8 +2064,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2165 2064
2166 case SRP_OPT_CMD_SG_ENTRIES: 2065 case SRP_OPT_CMD_SG_ENTRIES:
2167 if (match_int(args, &token) || token < 1 || token > 255) { 2066 if (match_int(args, &token) || token < 1 || token > 255) {
2168 pr_warn("bad max cmd_sg_entries parameter '%s'\n", 2067 printk(KERN_WARNING PFX "bad max cmd_sg_entries parameter '%s'\n", p);
2169 p);
2170 goto out; 2068 goto out;
2171 } 2069 }
2172 target->cmd_sg_cnt = token; 2070 target->cmd_sg_cnt = token;
@@ -2174,7 +2072,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2174 2072
2175 case SRP_OPT_ALLOW_EXT_SG: 2073 case SRP_OPT_ALLOW_EXT_SG:
2176 if (match_int(args, &token)) { 2074 if (match_int(args, &token)) {
2177 pr_warn("bad allow_ext_sg parameter '%s'\n", p); 2075 printk(KERN_WARNING PFX "bad allow_ext_sg parameter '%s'\n", p);
2178 goto out; 2076 goto out;
2179 } 2077 }
2180 target->allow_ext_sg = !!token; 2078 target->allow_ext_sg = !!token;
@@ -2183,16 +2081,15 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2183 case SRP_OPT_SG_TABLESIZE: 2081 case SRP_OPT_SG_TABLESIZE:
2184 if (match_int(args, &token) || token < 1 || 2082 if (match_int(args, &token) || token < 1 ||
2185 token > SCSI_MAX_SG_CHAIN_SEGMENTS) { 2083 token > SCSI_MAX_SG_CHAIN_SEGMENTS) {
2186 pr_warn("bad max sg_tablesize parameter '%s'\n", 2084 printk(KERN_WARNING PFX "bad max sg_tablesize parameter '%s'\n", p);
2187 p);
2188 goto out; 2085 goto out;
2189 } 2086 }
2190 target->sg_tablesize = token; 2087 target->sg_tablesize = token;
2191 break; 2088 break;
2192 2089
2193 default: 2090 default:
2194 pr_warn("unknown parameter or missing value '%s' in target creation request\n", 2091 printk(KERN_WARNING PFX "unknown parameter or missing value "
2195 p); 2092 "'%s' in target creation request\n", p);
2196 goto out; 2093 goto out;
2197 } 2094 }
2198 } 2095 }
@@ -2203,8 +2100,9 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target)
2203 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) 2100 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
2204 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && 2101 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
2205 !(srp_opt_tokens[i].token & opt_mask)) 2102 !(srp_opt_tokens[i].token & opt_mask))
2206 pr_warn("target creation request is missing parameter '%s'\n", 2103 printk(KERN_WARNING PFX "target creation request is "
2207 srp_opt_tokens[i].pattern); 2104 "missing parameter '%s'\n",
2105 srp_opt_tokens[i].pattern);
2208 2106
2209out: 2107out:
2210 kfree(options); 2108 kfree(options);
@@ -2251,7 +2149,7 @@ static ssize_t srp_create_target(struct device *dev,
2251 2149
2252 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && 2150 if (!host->srp_dev->fmr_pool && !target->allow_ext_sg &&
2253 target->cmd_sg_cnt < target->sg_tablesize) { 2151 target->cmd_sg_cnt < target->sg_tablesize) {
2254 pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); 2152 printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
2255 target->sg_tablesize = target->cmd_sg_cnt; 2153 target->sg_tablesize = target->cmd_sg_cnt;
2256 } 2154 }
2257 2155
@@ -2262,7 +2160,6 @@ static ssize_t srp_create_target(struct device *dev,
2262 sizeof (struct srp_indirect_buf) + 2160 sizeof (struct srp_indirect_buf) +
2263 target->cmd_sg_cnt * sizeof (struct srp_direct_buf); 2161 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
2264 2162
2265 INIT_WORK(&target->remove_work, srp_remove_work);
2266 spin_lock_init(&target->lock); 2163 spin_lock_init(&target->lock);
2267 INIT_LIST_HEAD(&target->free_tx); 2164 INIT_LIST_HEAD(&target->free_tx);
2268 INIT_LIST_HEAD(&target->free_reqs); 2165 INIT_LIST_HEAD(&target->free_reqs);
@@ -2307,6 +2204,7 @@ static ssize_t srp_create_target(struct device *dev,
2307 if (ret) 2204 if (ret)
2308 goto err_free_ib; 2205 goto err_free_ib;
2309 2206
2207 target->qp_in_error = 0;
2310 ret = srp_connect_target(target); 2208 ret = srp_connect_target(target);
2311 if (ret) { 2209 if (ret) {
2312 shost_printk(KERN_ERR, target->scsi_host, 2210 shost_printk(KERN_ERR, target->scsi_host,
@@ -2411,7 +2309,8 @@ static void srp_add_one(struct ib_device *device)
2411 return; 2309 return;
2412 2310
2413 if (ib_query_device(device, dev_attr)) { 2311 if (ib_query_device(device, dev_attr)) {
2414 pr_warn("Query device failed for %s\n", device->name); 2312 printk(KERN_WARNING PFX "Query device failed for %s\n",
2313 device->name);
2415 goto free_attr; 2314 goto free_attr;
2416 } 2315 }
2417 2316
@@ -2496,7 +2395,8 @@ static void srp_remove_one(struct ib_device *device)
2496{ 2395{
2497 struct srp_device *srp_dev; 2396 struct srp_device *srp_dev;
2498 struct srp_host *host, *tmp_host; 2397 struct srp_host *host, *tmp_host;
2499 struct srp_target_port *target; 2398 LIST_HEAD(target_list);
2399 struct srp_target_port *target, *tmp_target;
2500 2400
2501 srp_dev = ib_get_client_data(device, &srp_client); 2401 srp_dev = ib_get_client_data(device, &srp_client);
2502 2402
@@ -2509,17 +2409,34 @@ static void srp_remove_one(struct ib_device *device)
2509 wait_for_completion(&host->released); 2409 wait_for_completion(&host->released);
2510 2410
2511 /* 2411 /*
2512 * Remove all target ports. 2412 * Mark all target ports as removed, so we stop queueing
2413 * commands and don't try to reconnect.
2513 */ 2414 */
2514 spin_lock(&host->target_lock); 2415 spin_lock(&host->target_lock);
2515 list_for_each_entry(target, &host->target_list, list) 2416 list_for_each_entry(target, &host->target_list, list) {
2516 srp_queue_remove_work(target); 2417 spin_lock_irq(&target->lock);
2418 target->state = SRP_TARGET_REMOVED;
2419 spin_unlock_irq(&target->lock);
2420 }
2517 spin_unlock(&host->target_lock); 2421 spin_unlock(&host->target_lock);
2518 2422
2519 /* 2423 /*
2520 * Wait for target port removal tasks. 2424 * Wait for any reconnection tasks that may have
2425 * started before we marked our target ports as
2426 * removed, and any target port removal tasks.
2521 */ 2427 */
2522 flush_workqueue(system_long_wq); 2428 flush_workqueue(ib_wq);
2429
2430 list_for_each_entry_safe(target, tmp_target,
2431 &host->target_list, list) {
2432 srp_remove_host(target->scsi_host);
2433 scsi_remove_host(target->scsi_host);
2434 srp_disconnect_target(target);
2435 ib_destroy_cm_id(target->cm_id);
2436 srp_free_target_ib(target);
2437 srp_free_req_data(target);
2438 scsi_host_put(target->scsi_host);
2439 }
2523 2440
2524 kfree(host); 2441 kfree(host);
2525 } 2442 }
@@ -2533,7 +2450,6 @@ static void srp_remove_one(struct ib_device *device)
2533} 2450}
2534 2451
2535static struct srp_function_template ib_srp_transport_functions = { 2452static struct srp_function_template ib_srp_transport_functions = {
2536 .rport_delete = srp_rport_delete,
2537}; 2453};
2538 2454
2539static int __init srp_init_module(void) 2455static int __init srp_init_module(void)
@@ -2543,7 +2459,7 @@ static int __init srp_init_module(void)
2543 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); 2459 BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *));
2544 2460
2545 if (srp_sg_tablesize) { 2461 if (srp_sg_tablesize) {
2546 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); 2462 printk(KERN_WARNING PFX "srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
2547 if (!cmd_sg_entries) 2463 if (!cmd_sg_entries)
2548 cmd_sg_entries = srp_sg_tablesize; 2464 cmd_sg_entries = srp_sg_tablesize;
2549 } 2465 }
@@ -2552,15 +2468,14 @@ static int __init srp_init_module(void)
2552 cmd_sg_entries = SRP_DEF_SG_TABLESIZE; 2468 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
2553 2469
2554 if (cmd_sg_entries > 255) { 2470 if (cmd_sg_entries > 255) {
2555 pr_warn("Clamping cmd_sg_entries to 255\n"); 2471 printk(KERN_WARNING PFX "Clamping cmd_sg_entries to 255\n");
2556 cmd_sg_entries = 255; 2472 cmd_sg_entries = 255;
2557 } 2473 }
2558 2474
2559 if (!indirect_sg_entries) 2475 if (!indirect_sg_entries)
2560 indirect_sg_entries = cmd_sg_entries; 2476 indirect_sg_entries = cmd_sg_entries;
2561 else if (indirect_sg_entries < cmd_sg_entries) { 2477 else if (indirect_sg_entries < cmd_sg_entries) {
2562 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", 2478 printk(KERN_WARNING PFX "Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", cmd_sg_entries);
2563 cmd_sg_entries);
2564 indirect_sg_entries = cmd_sg_entries; 2479 indirect_sg_entries = cmd_sg_entries;
2565 } 2480 }
2566 2481
@@ -2571,7 +2486,7 @@ static int __init srp_init_module(void)
2571 2486
2572 ret = class_register(&srp_class); 2487 ret = class_register(&srp_class);
2573 if (ret) { 2488 if (ret) {
2574 pr_err("couldn't register class infiniband_srp\n"); 2489 printk(KERN_ERR PFX "couldn't register class infiniband_srp\n");
2575 srp_release_transport(ib_srp_transport_template); 2490 srp_release_transport(ib_srp_transport_template);
2576 return ret; 2491 return ret;
2577 } 2492 }
@@ -2580,7 +2495,7 @@ static int __init srp_init_module(void)
2580 2495
2581 ret = ib_register_client(&srp_client); 2496 ret = ib_register_client(&srp_client);
2582 if (ret) { 2497 if (ret) {
2583 pr_err("couldn't register IB client\n"); 2498 printk(KERN_ERR PFX "couldn't register IB client\n");
2584 srp_release_transport(ib_srp_transport_template); 2499 srp_release_transport(ib_srp_transport_template);
2585 ib_sa_unregister_client(&srp_sa_client); 2500 ib_sa_unregister_client(&srp_sa_client);
2586 class_unregister(&srp_class); 2501 class_unregister(&srp_class);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h
index de2d0b3c0bf..020caf0c378 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.h
+++ b/drivers/infiniband/ulp/srp/ib_srp.h
@@ -80,7 +80,9 @@ enum {
80 80
81enum srp_target_state { 81enum srp_target_state {
82 SRP_TARGET_LIVE, 82 SRP_TARGET_LIVE,
83 SRP_TARGET_REMOVED, 83 SRP_TARGET_CONNECTING,
84 SRP_TARGET_DEAD,
85 SRP_TARGET_REMOVED
84}; 86};
85 87
86enum srp_iu_type { 88enum srp_iu_type {
@@ -161,9 +163,6 @@ struct srp_target_port {
161 struct ib_sa_query *path_query; 163 struct ib_sa_query *path_query;
162 int path_query_id; 164 int path_query_id;
163 165
164 u32 rq_tmo_jiffies;
165 bool connected;
166
167 struct ib_cm_id *cm_id; 166 struct ib_cm_id *cm_id;
168 167
169 int max_ti_iu_len; 168 int max_ti_iu_len;
@@ -174,12 +173,12 @@ struct srp_target_port {
174 struct srp_iu *rx_ring[SRP_RQ_SIZE]; 173 struct srp_iu *rx_ring[SRP_RQ_SIZE];
175 struct srp_request req_ring[SRP_CMD_SQ_SIZE]; 174 struct srp_request req_ring[SRP_CMD_SQ_SIZE];
176 175
177 struct work_struct remove_work; 176 struct work_struct work;
178 177
179 struct list_head list; 178 struct list_head list;
180 struct completion done; 179 struct completion done;
181 int status; 180 int status;
182 bool qp_in_error; 181 int qp_in_error;
183 182
184 struct completion tsk_mgmt_done; 183 struct completion tsk_mgmt_done;
185 u8 tsk_mgmt_status; 184 u8 tsk_mgmt_status;
diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig
deleted file mode 100644
index 31ee83d528d..00000000000
--- a/drivers/infiniband/ulp/srpt/Kconfig
+++ /dev/null
@@ -1,12 +0,0 @@
1config INFINIBAND_SRPT
2 tristate "InfiniBand SCSI RDMA Protocol target support"
3 depends on INFINIBAND && TARGET_CORE
4 ---help---
5
6 Support for the SCSI RDMA Protocol (SRP) Target driver. The
7 SRP protocol is a protocol that allows an initiator to access
8 a block storage device on another host (target) over a network
9 that supports the RDMA protocol. Currently the RDMA protocol is
10 supported by InfiniBand and by iWarp network hardware. More
11 information about the SRP protocol can be found on the website
12 of the INCITS T10 technical committee (http://www.t10.org/).
diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile
deleted file mode 100644
index e3ee4bdfffa..00000000000
--- a/drivers/infiniband/ulp/srpt/Makefile
+++ /dev/null
@@ -1,2 +0,0 @@
1ccflags-y := -Idrivers/target
2obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o
diff --git a/drivers/infiniband/ulp/srpt/ib_dm_mad.h b/drivers/infiniband/ulp/srpt/ib_dm_mad.h
deleted file mode 100644
index fb1de1f6f29..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_dm_mad.h
+++ /dev/null
@@ -1,139 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
34#ifndef IB_DM_MAD_H
35#define IB_DM_MAD_H
36
37#include <linux/types.h>
38
39#include <rdma/ib_mad.h>
40
41enum {
42 /*
43 * See also section 13.4.7 Status Field, table 115 MAD Common Status
44 * Field Bit Values and also section 16.3.1.1 Status Field in the
45 * InfiniBand Architecture Specification.
46 */
47 DM_MAD_STATUS_UNSUP_METHOD = 0x0008,
48 DM_MAD_STATUS_UNSUP_METHOD_ATTR = 0x000c,
49 DM_MAD_STATUS_INVALID_FIELD = 0x001c,
50 DM_MAD_STATUS_NO_IOC = 0x0100,
51
52 /*
53 * See also the Device Management chapter, section 16.3.3 Attributes,
54 * table 279 Device Management Attributes in the InfiniBand
55 * Architecture Specification.
56 */
57 DM_ATTR_CLASS_PORT_INFO = 0x01,
58 DM_ATTR_IOU_INFO = 0x10,
59 DM_ATTR_IOC_PROFILE = 0x11,
60 DM_ATTR_SVC_ENTRIES = 0x12
61};
62
63struct ib_dm_hdr {
64 u8 reserved[28];
65};
66
67/*
68 * Structure of management datagram sent by the SRP target implementation.
69 * Contains a management datagram header, reliable multi-packet transaction
70 * protocol (RMPP) header and ib_dm_hdr. Notes:
71 * - The SRP target implementation does not use RMPP or ib_dm_hdr when sending
72 * management datagrams.
73 * - The header size must be exactly 64 bytes (IB_MGMT_DEVICE_HDR), since this
74 * is the header size that is passed to ib_create_send_mad() in ib_srpt.c.
75 * - The maximum supported size for a management datagram when not using RMPP
76 * is 256 bytes -- 64 bytes header and 192 (IB_MGMT_DEVICE_DATA) bytes data.
77 */
78struct ib_dm_mad {
79 struct ib_mad_hdr mad_hdr;
80 struct ib_rmpp_hdr rmpp_hdr;
81 struct ib_dm_hdr dm_hdr;
82 u8 data[IB_MGMT_DEVICE_DATA];
83};
84
85/*
86 * IOUnitInfo as defined in section 16.3.3.3 IOUnitInfo of the InfiniBand
87 * Architecture Specification.
88 */
89struct ib_dm_iou_info {
90 __be16 change_id;
91 u8 max_controllers;
92 u8 op_rom;
93 u8 controller_list[128];
94};
95
96/*
97 * IOControllerprofile as defined in section 16.3.3.4 IOControllerProfile of
98 * the InfiniBand Architecture Specification.
99 */
100struct ib_dm_ioc_profile {
101 __be64 guid;
102 __be32 vendor_id;
103 __be32 device_id;
104 __be16 device_version;
105 __be16 reserved1;
106 __be32 subsys_vendor_id;
107 __be32 subsys_device_id;
108 __be16 io_class;
109 __be16 io_subclass;
110 __be16 protocol;
111 __be16 protocol_version;
112 __be16 service_conn;
113 __be16 initiators_supported;
114 __be16 send_queue_depth;
115 u8 reserved2;
116 u8 rdma_read_depth;
117 __be32 send_size;
118 __be32 rdma_size;
119 u8 op_cap_mask;
120 u8 svc_cap_mask;
121 u8 num_svc_entries;
122 u8 reserved3[9];
123 u8 id_string[64];
124};
125
126struct ib_dm_svc_entry {
127 u8 name[40];
128 __be64 id;
129};
130
131/*
132 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
133 * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
134 */
135struct ib_dm_svc_entries {
136 struct ib_dm_svc_entry service_entries[4];
137};
138
139#endif
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
deleted file mode 100644
index c09d41b1a2f..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ /dev/null
@@ -1,4018 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/ctype.h>
40#include <linux/kthread.h>
41#include <linux/string.h>
42#include <linux/delay.h>
43#include <linux/atomic.h>
44#include <scsi/scsi_tcq.h>
45#include <target/configfs_macros.h>
46#include <target/target_core_base.h>
47#include <target/target_core_fabric_configfs.h>
48#include <target/target_core_fabric.h>
49#include <target/target_core_configfs.h>
50#include "ib_srpt.h"
51
52/* Name of this kernel module. */
53#define DRV_NAME "ib_srpt"
54#define DRV_VERSION "2.0.0"
55#define DRV_RELDATE "2011-02-14"
56
57#define SRPT_ID_STRING "Linux SRP target"
58
59#undef pr_fmt
60#define pr_fmt(fmt) DRV_NAME " " fmt
61
62MODULE_AUTHOR("Vu Pham and Bart Van Assche");
63MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
64 "v" DRV_VERSION " (" DRV_RELDATE ")");
65MODULE_LICENSE("Dual BSD/GPL");
66
67/*
68 * Global Variables
69 */
70
71static u64 srpt_service_guid;
72static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */
73static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */
74
75static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE;
76module_param(srp_max_req_size, int, 0444);
77MODULE_PARM_DESC(srp_max_req_size,
78 "Maximum size of SRP request messages in bytes.");
79
80static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE;
81module_param(srpt_srq_size, int, 0444);
82MODULE_PARM_DESC(srpt_srq_size,
83 "Shared receive queue (SRQ) size.");
84
85static int srpt_get_u64_x(char *buffer, struct kernel_param *kp)
86{
87 return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg);
88}
89module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid,
90 0444);
91MODULE_PARM_DESC(srpt_service_guid,
92 "Using this value for ioc_guid, id_ext, and cm_listen_id"
93 " instead of using the node_guid of the first HCA.");
94
95static struct ib_client srpt_client;
96static struct target_fabric_configfs *srpt_target;
97static void srpt_release_channel(struct srpt_rdma_ch *ch);
98static int srpt_queue_status(struct se_cmd *cmd);
99
100/**
101 * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE.
102 */
103static inline
104enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir)
105{
106 switch (dir) {
107 case DMA_TO_DEVICE: return DMA_FROM_DEVICE;
108 case DMA_FROM_DEVICE: return DMA_TO_DEVICE;
109 default: return dir;
110 }
111}
112
113/**
114 * srpt_sdev_name() - Return the name associated with the HCA.
115 *
116 * Examples are ib0, ib1, ...
117 */
118static inline const char *srpt_sdev_name(struct srpt_device *sdev)
119{
120 return sdev->device->name;
121}
122
123static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch)
124{
125 unsigned long flags;
126 enum rdma_ch_state state;
127
128 spin_lock_irqsave(&ch->spinlock, flags);
129 state = ch->state;
130 spin_unlock_irqrestore(&ch->spinlock, flags);
131 return state;
132}
133
134static enum rdma_ch_state
135srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state)
136{
137 unsigned long flags;
138 enum rdma_ch_state prev;
139
140 spin_lock_irqsave(&ch->spinlock, flags);
141 prev = ch->state;
142 ch->state = new_state;
143 spin_unlock_irqrestore(&ch->spinlock, flags);
144 return prev;
145}
146
147/**
148 * srpt_test_and_set_ch_state() - Test and set the channel state.
149 *
150 * Returns true if and only if the channel state has been set to the new state.
151 */
152static bool
153srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old,
154 enum rdma_ch_state new)
155{
156 unsigned long flags;
157 enum rdma_ch_state prev;
158
159 spin_lock_irqsave(&ch->spinlock, flags);
160 prev = ch->state;
161 if (prev == old)
162 ch->state = new;
163 spin_unlock_irqrestore(&ch->spinlock, flags);
164 return prev == old;
165}
166
167/**
168 * srpt_event_handler() - Asynchronous IB event callback function.
169 *
170 * Callback function called by the InfiniBand core when an asynchronous IB
171 * event occurs. This callback may occur in interrupt context. See also
172 * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
173 * Architecture Specification.
174 */
175static void srpt_event_handler(struct ib_event_handler *handler,
176 struct ib_event *event)
177{
178 struct srpt_device *sdev;
179 struct srpt_port *sport;
180
181 sdev = ib_get_client_data(event->device, &srpt_client);
182 if (!sdev || sdev->device != event->device)
183 return;
184
185 pr_debug("ASYNC event= %d on device= %s\n", event->event,
186 srpt_sdev_name(sdev));
187
188 switch (event->event) {
189 case IB_EVENT_PORT_ERR:
190 if (event->element.port_num <= sdev->device->phys_port_cnt) {
191 sport = &sdev->port[event->element.port_num - 1];
192 sport->lid = 0;
193 sport->sm_lid = 0;
194 }
195 break;
196 case IB_EVENT_PORT_ACTIVE:
197 case IB_EVENT_LID_CHANGE:
198 case IB_EVENT_PKEY_CHANGE:
199 case IB_EVENT_SM_CHANGE:
200 case IB_EVENT_CLIENT_REREGISTER:
201 /* Refresh port data asynchronously. */
202 if (event->element.port_num <= sdev->device->phys_port_cnt) {
203 sport = &sdev->port[event->element.port_num - 1];
204 if (!sport->lid && !sport->sm_lid)
205 schedule_work(&sport->work);
206 }
207 break;
208 default:
209 printk(KERN_ERR "received unrecognized IB event %d\n",
210 event->event);
211 break;
212 }
213}
214
215/**
216 * srpt_srq_event() - SRQ event callback function.
217 */
218static void srpt_srq_event(struct ib_event *event, void *ctx)
219{
220 printk(KERN_INFO "SRQ event %d\n", event->event);
221}
222
223/**
224 * srpt_qp_event() - QP event callback function.
225 */
226static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
227{
228 pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n",
229 event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch));
230
231 switch (event->event) {
232 case IB_EVENT_COMM_EST:
233 ib_cm_notify(ch->cm_id, event->event);
234 break;
235 case IB_EVENT_QP_LAST_WQE_REACHED:
236 if (srpt_test_and_set_ch_state(ch, CH_DRAINING,
237 CH_RELEASING))
238 srpt_release_channel(ch);
239 else
240 pr_debug("%s: state %d - ignored LAST_WQE.\n",
241 ch->sess_name, srpt_get_ch_state(ch));
242 break;
243 default:
244 printk(KERN_ERR "received unrecognized IB QP event %d\n",
245 event->event);
246 break;
247 }
248}
249
250/**
251 * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure.
252 *
253 * @slot: one-based slot number.
254 * @value: four-bit value.
255 *
256 * Copies the lowest four bits of value in element slot of the array of four
257 * bit elements called c_list (controller list). The index slot is one-based.
258 */
259static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
260{
261 u16 id;
262 u8 tmp;
263
264 id = (slot - 1) / 2;
265 if (slot & 0x1) {
266 tmp = c_list[id] & 0xf;
267 c_list[id] = (value << 4) | tmp;
268 } else {
269 tmp = c_list[id] & 0xf0;
270 c_list[id] = (value & 0xf) | tmp;
271 }
272}
273
274/**
275 * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram.
276 *
277 * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture
278 * Specification.
279 */
280static void srpt_get_class_port_info(struct ib_dm_mad *mad)
281{
282 struct ib_class_port_info *cif;
283
284 cif = (struct ib_class_port_info *)mad->data;
285 memset(cif, 0, sizeof *cif);
286 cif->base_version = 1;
287 cif->class_version = 1;
288 cif->resp_time_value = 20;
289
290 mad->mad_hdr.status = 0;
291}
292
293/**
294 * srpt_get_iou() - Write IOUnitInfo to a management datagram.
295 *
296 * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture
297 * Specification. See also section B.7, table B.6 in the SRP r16a document.
298 */
299static void srpt_get_iou(struct ib_dm_mad *mad)
300{
301 struct ib_dm_iou_info *ioui;
302 u8 slot;
303 int i;
304
305 ioui = (struct ib_dm_iou_info *)mad->data;
306 ioui->change_id = __constant_cpu_to_be16(1);
307 ioui->max_controllers = 16;
308
309 /* set present for slot 1 and empty for the rest */
310 srpt_set_ioc(ioui->controller_list, 1, 1);
311 for (i = 1, slot = 2; i < 16; i++, slot++)
312 srpt_set_ioc(ioui->controller_list, slot, 0);
313
314 mad->mad_hdr.status = 0;
315}
316
317/**
318 * srpt_get_ioc() - Write IOControllerprofile to a management datagram.
319 *
320 * See also section 16.3.3.4 IOControllerProfile in the InfiniBand
321 * Architecture Specification. See also section B.7, table B.7 in the SRP
322 * r16a document.
323 */
324static void srpt_get_ioc(struct srpt_port *sport, u32 slot,
325 struct ib_dm_mad *mad)
326{
327 struct srpt_device *sdev = sport->sdev;
328 struct ib_dm_ioc_profile *iocp;
329
330 iocp = (struct ib_dm_ioc_profile *)mad->data;
331
332 if (!slot || slot > 16) {
333 mad->mad_hdr.status
334 = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
335 return;
336 }
337
338 if (slot > 2) {
339 mad->mad_hdr.status
340 = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
341 return;
342 }
343
344 memset(iocp, 0, sizeof *iocp);
345 strcpy(iocp->id_string, SRPT_ID_STRING);
346 iocp->guid = cpu_to_be64(srpt_service_guid);
347 iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
348 iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
349 iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
350 iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
351 iocp->subsys_device_id = 0x0;
352 iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
353 iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS);
354 iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL);
355 iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION);
356 iocp->send_queue_depth = cpu_to_be16(sdev->srq_size);
357 iocp->rdma_read_depth = 4;
358 iocp->send_size = cpu_to_be32(srp_max_req_size);
359 iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size,
360 1U << 24));
361 iocp->num_svc_entries = 1;
362 iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
363 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
364
365 mad->mad_hdr.status = 0;
366}
367
368/**
369 * srpt_get_svc_entries() - Write ServiceEntries to a management datagram.
370 *
371 * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
372 * Specification. See also section B.7, table B.8 in the SRP r16a document.
373 */
374static void srpt_get_svc_entries(u64 ioc_guid,
375 u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
376{
377 struct ib_dm_svc_entries *svc_entries;
378
379 WARN_ON(!ioc_guid);
380
381 if (!slot || slot > 16) {
382 mad->mad_hdr.status
383 = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
384 return;
385 }
386
387 if (slot > 2 || lo > hi || hi > 1) {
388 mad->mad_hdr.status
389 = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC);
390 return;
391 }
392
393 svc_entries = (struct ib_dm_svc_entries *)mad->data;
394 memset(svc_entries, 0, sizeof *svc_entries);
395 svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
396 snprintf(svc_entries->service_entries[0].name,
397 sizeof(svc_entries->service_entries[0].name),
398 "%s%016llx",
399 SRP_SERVICE_NAME_PREFIX,
400 ioc_guid);
401
402 mad->mad_hdr.status = 0;
403}
404
405/**
406 * srpt_mgmt_method_get() - Process a received management datagram.
407 * @sp: source port through which the MAD has been received.
408 * @rq_mad: received MAD.
409 * @rsp_mad: response MAD.
410 */
411static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
412 struct ib_dm_mad *rsp_mad)
413{
414 u16 attr_id;
415 u32 slot;
416 u8 hi, lo;
417
418 attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
419 switch (attr_id) {
420 case DM_ATTR_CLASS_PORT_INFO:
421 srpt_get_class_port_info(rsp_mad);
422 break;
423 case DM_ATTR_IOU_INFO:
424 srpt_get_iou(rsp_mad);
425 break;
426 case DM_ATTR_IOC_PROFILE:
427 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
428 srpt_get_ioc(sp, slot, rsp_mad);
429 break;
430 case DM_ATTR_SVC_ENTRIES:
431 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
432 hi = (u8) ((slot >> 8) & 0xff);
433 lo = (u8) (slot & 0xff);
434 slot = (u16) ((slot >> 16) & 0xffff);
435 srpt_get_svc_entries(srpt_service_guid,
436 slot, hi, lo, rsp_mad);
437 break;
438 default:
439 rsp_mad->mad_hdr.status =
440 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
441 break;
442 }
443}
444
445/**
446 * srpt_mad_send_handler() - Post MAD-send callback function.
447 */
448static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
449 struct ib_mad_send_wc *mad_wc)
450{
451 ib_destroy_ah(mad_wc->send_buf->ah);
452 ib_free_send_mad(mad_wc->send_buf);
453}
454
455/**
456 * srpt_mad_recv_handler() - MAD reception callback function.
457 */
458static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
459 struct ib_mad_recv_wc *mad_wc)
460{
461 struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
462 struct ib_ah *ah;
463 struct ib_mad_send_buf *rsp;
464 struct ib_dm_mad *dm_mad;
465
466 if (!mad_wc || !mad_wc->recv_buf.mad)
467 return;
468
469 ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
470 mad_wc->recv_buf.grh, mad_agent->port_num);
471 if (IS_ERR(ah))
472 goto err;
473
474 BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
475
476 rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
477 mad_wc->wc->pkey_index, 0,
478 IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
479 GFP_KERNEL);
480 if (IS_ERR(rsp))
481 goto err_rsp;
482
483 rsp->ah = ah;
484
485 dm_mad = rsp->mad;
486 memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
487 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
488 dm_mad->mad_hdr.status = 0;
489
490 switch (mad_wc->recv_buf.mad->mad_hdr.method) {
491 case IB_MGMT_METHOD_GET:
492 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
493 break;
494 case IB_MGMT_METHOD_SET:
495 dm_mad->mad_hdr.status =
496 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
497 break;
498 default:
499 dm_mad->mad_hdr.status =
500 __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
501 break;
502 }
503
504 if (!ib_post_send_mad(rsp, NULL)) {
505 ib_free_recv_mad(mad_wc);
506 /* will destroy_ah & free_send_mad in send completion */
507 return;
508 }
509
510 ib_free_send_mad(rsp);
511
512err_rsp:
513 ib_destroy_ah(ah);
514err:
515 ib_free_recv_mad(mad_wc);
516}
517
518/**
519 * srpt_refresh_port() - Configure a HCA port.
520 *
521 * Enable InfiniBand management datagram processing, update the cached sm_lid,
522 * lid and gid values, and register a callback function for processing MADs
523 * on the specified port.
524 *
525 * Note: It is safe to call this function more than once for the same port.
526 */
527static int srpt_refresh_port(struct srpt_port *sport)
528{
529 struct ib_mad_reg_req reg_req;
530 struct ib_port_modify port_modify;
531 struct ib_port_attr port_attr;
532 int ret;
533
534 memset(&port_modify, 0, sizeof port_modify);
535 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
536 port_modify.clr_port_cap_mask = 0;
537
538 ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
539 if (ret)
540 goto err_mod_port;
541
542 ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
543 if (ret)
544 goto err_query_port;
545
546 sport->sm_lid = port_attr.sm_lid;
547 sport->lid = port_attr.lid;
548
549 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
550 if (ret)
551 goto err_query_port;
552
553 if (!sport->mad_agent) {
554 memset(&reg_req, 0, sizeof reg_req);
555 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
556 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
557 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
558 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
559
560 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
561 sport->port,
562 IB_QPT_GSI,
563 &reg_req, 0,
564 srpt_mad_send_handler,
565 srpt_mad_recv_handler,
566 sport);
567 if (IS_ERR(sport->mad_agent)) {
568 ret = PTR_ERR(sport->mad_agent);
569 sport->mad_agent = NULL;
570 goto err_query_port;
571 }
572 }
573
574 return 0;
575
576err_query_port:
577
578 port_modify.set_port_cap_mask = 0;
579 port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
580 ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
581
582err_mod_port:
583
584 return ret;
585}
586
587/**
588 * srpt_unregister_mad_agent() - Unregister MAD callback functions.
589 *
590 * Note: It is safe to call this function more than once for the same device.
591 */
592static void srpt_unregister_mad_agent(struct srpt_device *sdev)
593{
594 struct ib_port_modify port_modify = {
595 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
596 };
597 struct srpt_port *sport;
598 int i;
599
600 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
601 sport = &sdev->port[i - 1];
602 WARN_ON(sport->port != i);
603 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
604 printk(KERN_ERR "disabling MAD processing failed.\n");
605 if (sport->mad_agent) {
606 ib_unregister_mad_agent(sport->mad_agent);
607 sport->mad_agent = NULL;
608 }
609 }
610}
611
612/**
613 * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure.
614 */
615static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev,
616 int ioctx_size, int dma_size,
617 enum dma_data_direction dir)
618{
619 struct srpt_ioctx *ioctx;
620
621 ioctx = kmalloc(ioctx_size, GFP_KERNEL);
622 if (!ioctx)
623 goto err;
624
625 ioctx->buf = kmalloc(dma_size, GFP_KERNEL);
626 if (!ioctx->buf)
627 goto err_free_ioctx;
628
629 ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir);
630 if (ib_dma_mapping_error(sdev->device, ioctx->dma))
631 goto err_free_buf;
632
633 return ioctx;
634
635err_free_buf:
636 kfree(ioctx->buf);
637err_free_ioctx:
638 kfree(ioctx);
639err:
640 return NULL;
641}
642
643/**
644 * srpt_free_ioctx() - Free an SRPT I/O context structure.
645 */
646static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx,
647 int dma_size, enum dma_data_direction dir)
648{
649 if (!ioctx)
650 return;
651
652 ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir);
653 kfree(ioctx->buf);
654 kfree(ioctx);
655}
656
657/**
658 * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures.
659 * @sdev: Device to allocate the I/O context ring for.
660 * @ring_size: Number of elements in the I/O context ring.
661 * @ioctx_size: I/O context size.
662 * @dma_size: DMA buffer size.
663 * @dir: DMA data direction.
664 */
665static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
666 int ring_size, int ioctx_size,
667 int dma_size, enum dma_data_direction dir)
668{
669 struct srpt_ioctx **ring;
670 int i;
671
672 WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
673 && ioctx_size != sizeof(struct srpt_send_ioctx));
674
675 ring = kmalloc(ring_size * sizeof(ring[0]), GFP_KERNEL);
676 if (!ring)
677 goto out;
678 for (i = 0; i < ring_size; ++i) {
679 ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir);
680 if (!ring[i])
681 goto err;
682 ring[i]->index = i;
683 }
684 goto out;
685
686err:
687 while (--i >= 0)
688 srpt_free_ioctx(sdev, ring[i], dma_size, dir);
689 kfree(ring);
690 ring = NULL;
691out:
692 return ring;
693}
694
695/**
696 * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures.
697 */
698static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
699 struct srpt_device *sdev, int ring_size,
700 int dma_size, enum dma_data_direction dir)
701{
702 int i;
703
704 for (i = 0; i < ring_size; ++i)
705 srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
706 kfree(ioctx_ring);
707}
708
709/**
710 * srpt_get_cmd_state() - Get the state of a SCSI command.
711 */
712static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx)
713{
714 enum srpt_command_state state;
715 unsigned long flags;
716
717 BUG_ON(!ioctx);
718
719 spin_lock_irqsave(&ioctx->spinlock, flags);
720 state = ioctx->state;
721 spin_unlock_irqrestore(&ioctx->spinlock, flags);
722 return state;
723}
724
725/**
726 * srpt_set_cmd_state() - Set the state of a SCSI command.
727 *
728 * Does not modify the state of aborted commands. Returns the previous command
729 * state.
730 */
731static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx,
732 enum srpt_command_state new)
733{
734 enum srpt_command_state previous;
735 unsigned long flags;
736
737 BUG_ON(!ioctx);
738
739 spin_lock_irqsave(&ioctx->spinlock, flags);
740 previous = ioctx->state;
741 if (previous != SRPT_STATE_DONE)
742 ioctx->state = new;
743 spin_unlock_irqrestore(&ioctx->spinlock, flags);
744
745 return previous;
746}
747
748/**
749 * srpt_test_and_set_cmd_state() - Test and set the state of a command.
750 *
751 * Returns true if and only if the previous command state was equal to 'old'.
752 */
753static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx,
754 enum srpt_command_state old,
755 enum srpt_command_state new)
756{
757 enum srpt_command_state previous;
758 unsigned long flags;
759
760 WARN_ON(!ioctx);
761 WARN_ON(old == SRPT_STATE_DONE);
762 WARN_ON(new == SRPT_STATE_NEW);
763
764 spin_lock_irqsave(&ioctx->spinlock, flags);
765 previous = ioctx->state;
766 if (previous == old)
767 ioctx->state = new;
768 spin_unlock_irqrestore(&ioctx->spinlock, flags);
769 return previous == old;
770}
771
772/**
773 * srpt_post_recv() - Post an IB receive request.
774 */
775static int srpt_post_recv(struct srpt_device *sdev,
776 struct srpt_recv_ioctx *ioctx)
777{
778 struct ib_sge list;
779 struct ib_recv_wr wr, *bad_wr;
780
781 BUG_ON(!sdev);
782 wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index);
783
784 list.addr = ioctx->ioctx.dma;
785 list.length = srp_max_req_size;
786 list.lkey = sdev->mr->lkey;
787
788 wr.next = NULL;
789 wr.sg_list = &list;
790 wr.num_sge = 1;
791
792 return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
793}
794
795/**
796 * srpt_post_send() - Post an IB send request.
797 *
798 * Returns zero upon success and a non-zero value upon failure.
799 */
800static int srpt_post_send(struct srpt_rdma_ch *ch,
801 struct srpt_send_ioctx *ioctx, int len)
802{
803 struct ib_sge list;
804 struct ib_send_wr wr, *bad_wr;
805 struct srpt_device *sdev = ch->sport->sdev;
806 int ret;
807
808 atomic_inc(&ch->req_lim);
809
810 ret = -ENOMEM;
811 if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) {
812 printk(KERN_WARNING "IB send queue full (needed 1)\n");
813 goto out;
814 }
815
816 ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len,
817 DMA_TO_DEVICE);
818
819 list.addr = ioctx->ioctx.dma;
820 list.length = len;
821 list.lkey = sdev->mr->lkey;
822
823 wr.next = NULL;
824 wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index);
825 wr.sg_list = &list;
826 wr.num_sge = 1;
827 wr.opcode = IB_WR_SEND;
828 wr.send_flags = IB_SEND_SIGNALED;
829
830 ret = ib_post_send(ch->qp, &wr, &bad_wr);
831
832out:
833 if (ret < 0) {
834 atomic_inc(&ch->sq_wr_avail);
835 atomic_dec(&ch->req_lim);
836 }
837 return ret;
838}
839
840/**
841 * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
842 * @ioctx: Pointer to the I/O context associated with the request.
843 * @srp_cmd: Pointer to the SRP_CMD request data.
844 * @dir: Pointer to the variable to which the transfer direction will be
845 * written.
846 * @data_len: Pointer to the variable to which the total data length of all
847 * descriptors in the SRP_CMD request will be written.
848 *
849 * This function initializes ioctx->nrbuf and ioctx->r_bufs.
850 *
851 * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
852 * -ENOMEM when memory allocation fails and zero upon success.
853 */
854static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx,
855 struct srp_cmd *srp_cmd,
856 enum dma_data_direction *dir, u64 *data_len)
857{
858 struct srp_indirect_buf *idb;
859 struct srp_direct_buf *db;
860 unsigned add_cdb_offset;
861 int ret;
862
863 /*
864 * The pointer computations below will only be compiled correctly
865 * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
866 * whether srp_cmd::add_data has been declared as a byte pointer.
867 */
868 BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
869 && !__same_type(srp_cmd->add_data[0], (u8)0));
870
871 BUG_ON(!dir);
872 BUG_ON(!data_len);
873
874 ret = 0;
875 *data_len = 0;
876
877 /*
878 * The lower four bits of the buffer format field contain the DATA-IN
879 * buffer descriptor format, and the highest four bits contain the
880 * DATA-OUT buffer descriptor format.
881 */
882 *dir = DMA_NONE;
883 if (srp_cmd->buf_fmt & 0xf)
884 /* DATA-IN: transfer data from target to initiator (read). */
885 *dir = DMA_FROM_DEVICE;
886 else if (srp_cmd->buf_fmt >> 4)
887 /* DATA-OUT: transfer data from initiator to target (write). */
888 *dir = DMA_TO_DEVICE;
889
890 /*
891 * According to the SRP spec, the lower two bits of the 'ADDITIONAL
892 * CDB LENGTH' field are reserved and the size in bytes of this field
893 * is four times the value specified in bits 3..7. Hence the "& ~3".
894 */
895 add_cdb_offset = srp_cmd->add_cdb_len & ~3;
896 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
897 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
898 ioctx->n_rbuf = 1;
899 ioctx->rbufs = &ioctx->single_rbuf;
900
901 db = (struct srp_direct_buf *)(srp_cmd->add_data
902 + add_cdb_offset);
903 memcpy(ioctx->rbufs, db, sizeof *db);
904 *data_len = be32_to_cpu(db->len);
905 } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
906 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
907 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
908 + add_cdb_offset);
909
910 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
911
912 if (ioctx->n_rbuf >
913 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
914 printk(KERN_ERR "received unsupported SRP_CMD request"
915 " type (%u out + %u in != %u / %zu)\n",
916 srp_cmd->data_out_desc_cnt,
917 srp_cmd->data_in_desc_cnt,
918 be32_to_cpu(idb->table_desc.len),
919 sizeof(*db));
920 ioctx->n_rbuf = 0;
921 ret = -EINVAL;
922 goto out;
923 }
924
925 if (ioctx->n_rbuf == 1)
926 ioctx->rbufs = &ioctx->single_rbuf;
927 else {
928 ioctx->rbufs =
929 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
930 if (!ioctx->rbufs) {
931 ioctx->n_rbuf = 0;
932 ret = -ENOMEM;
933 goto out;
934 }
935 }
936
937 db = idb->desc_list;
938 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
939 *data_len = be32_to_cpu(idb->len);
940 }
941out:
942 return ret;
943}
944
945/**
946 * srpt_init_ch_qp() - Initialize queue pair attributes.
947 *
948 * Initialized the attributes of queue pair 'qp' by allowing local write,
949 * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT.
950 */
951static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
952{
953 struct ib_qp_attr *attr;
954 int ret;
955
956 attr = kzalloc(sizeof *attr, GFP_KERNEL);
957 if (!attr)
958 return -ENOMEM;
959
960 attr->qp_state = IB_QPS_INIT;
961 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
962 IB_ACCESS_REMOTE_WRITE;
963 attr->port_num = ch->sport->port;
964 attr->pkey_index = 0;
965
966 ret = ib_modify_qp(qp, attr,
967 IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
968 IB_QP_PKEY_INDEX);
969
970 kfree(attr);
971 return ret;
972}
973
974/**
975 * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR).
976 * @ch: channel of the queue pair.
977 * @qp: queue pair to change the state of.
978 *
979 * Returns zero upon success and a negative value upon failure.
980 *
981 * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
982 * If this structure ever becomes larger, it might be necessary to allocate
983 * it dynamically instead of on the stack.
984 */
985static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
986{
987 struct ib_qp_attr qp_attr;
988 int attr_mask;
989 int ret;
990
991 qp_attr.qp_state = IB_QPS_RTR;
992 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
993 if (ret)
994 goto out;
995
996 qp_attr.max_dest_rd_atomic = 4;
997
998 ret = ib_modify_qp(qp, &qp_attr, attr_mask);
999
1000out:
1001 return ret;
1002}
1003
1004/**
1005 * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS).
1006 * @ch: channel of the queue pair.
1007 * @qp: queue pair to change the state of.
1008 *
1009 * Returns zero upon success and a negative value upon failure.
1010 *
1011 * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
1012 * If this structure ever becomes larger, it might be necessary to allocate
1013 * it dynamically instead of on the stack.
1014 */
1015static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1016{
1017 struct ib_qp_attr qp_attr;
1018 int attr_mask;
1019 int ret;
1020
1021 qp_attr.qp_state = IB_QPS_RTS;
1022 ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
1023 if (ret)
1024 goto out;
1025
1026 qp_attr.max_rd_atomic = 4;
1027
1028 ret = ib_modify_qp(qp, &qp_attr, attr_mask);
1029
1030out:
1031 return ret;
1032}
1033
1034/**
1035 * srpt_ch_qp_err() - Set the channel queue pair state to 'error'.
1036 */
1037static int srpt_ch_qp_err(struct srpt_rdma_ch *ch)
1038{
1039 struct ib_qp_attr qp_attr;
1040
1041 qp_attr.qp_state = IB_QPS_ERR;
1042 return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE);
1043}
1044
1045/**
1046 * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list.
1047 */
1048static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1049 struct srpt_send_ioctx *ioctx)
1050{
1051 struct scatterlist *sg;
1052 enum dma_data_direction dir;
1053
1054 BUG_ON(!ch);
1055 BUG_ON(!ioctx);
1056 BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
1057
1058 while (ioctx->n_rdma)
1059 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
1060
1061 kfree(ioctx->rdma_ius);
1062 ioctx->rdma_ius = NULL;
1063
1064 if (ioctx->mapped_sg_count) {
1065 sg = ioctx->sg;
1066 WARN_ON(!sg);
1067 dir = ioctx->cmd.data_direction;
1068 BUG_ON(dir == DMA_NONE);
1069 ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt,
1070 opposite_dma_dir(dir));
1071 ioctx->mapped_sg_count = 0;
1072 }
1073}
1074
1075/**
1076 * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list.
1077 */
1078static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1079 struct srpt_send_ioctx *ioctx)
1080{
1081 struct se_cmd *cmd;
1082 struct scatterlist *sg, *sg_orig;
1083 int sg_cnt;
1084 enum dma_data_direction dir;
1085 struct rdma_iu *riu;
1086 struct srp_direct_buf *db;
1087 dma_addr_t dma_addr;
1088 struct ib_sge *sge;
1089 u64 raddr;
1090 u32 rsize;
1091 u32 tsize;
1092 u32 dma_len;
1093 int count, nrdma;
1094 int i, j, k;
1095
1096 BUG_ON(!ch);
1097 BUG_ON(!ioctx);
1098 cmd = &ioctx->cmd;
1099 dir = cmd->data_direction;
1100 BUG_ON(dir == DMA_NONE);
1101
1102 ioctx->sg = sg = sg_orig = cmd->t_data_sg;
1103 ioctx->sg_cnt = sg_cnt = cmd->t_data_nents;
1104
1105 count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt,
1106 opposite_dma_dir(dir));
1107 if (unlikely(!count))
1108 return -EAGAIN;
1109
1110 ioctx->mapped_sg_count = count;
1111
1112 if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1113 nrdma = ioctx->n_rdma_ius;
1114 else {
1115 nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE
1116 + ioctx->n_rbuf;
1117
1118 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL);
1119 if (!ioctx->rdma_ius)
1120 goto free_mem;
1121
1122 ioctx->n_rdma_ius = nrdma;
1123 }
1124
1125 db = ioctx->rbufs;
1126 tsize = cmd->data_length;
1127 dma_len = sg_dma_len(&sg[0]);
1128 riu = ioctx->rdma_ius;
1129
1130 /*
1131 * For each remote desc - calculate the #ib_sge.
1132 * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1133 * each remote desc rdma_iu is required a rdma wr;
1134 * else
1135 * we need to allocate extra rdma_iu to carry extra #ib_sge in
1136 * another rdma wr
1137 */
1138 for (i = 0, j = 0;
1139 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1140 rsize = be32_to_cpu(db->len);
1141 raddr = be64_to_cpu(db->va);
1142 riu->raddr = raddr;
1143 riu->rkey = be32_to_cpu(db->key);
1144 riu->sge_cnt = 0;
1145
1146 /* calculate how many sge required for this remote_buf */
1147 while (rsize > 0 && tsize > 0) {
1148
1149 if (rsize >= dma_len) {
1150 tsize -= dma_len;
1151 rsize -= dma_len;
1152 raddr += dma_len;
1153
1154 if (tsize > 0) {
1155 ++j;
1156 if (j < count) {
1157 sg = sg_next(sg);
1158 dma_len = sg_dma_len(sg);
1159 }
1160 }
1161 } else {
1162 tsize -= rsize;
1163 dma_len -= rsize;
1164 rsize = 0;
1165 }
1166
1167 ++riu->sge_cnt;
1168
1169 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1170 ++ioctx->n_rdma;
1171 riu->sge =
1172 kmalloc(riu->sge_cnt * sizeof *riu->sge,
1173 GFP_KERNEL);
1174 if (!riu->sge)
1175 goto free_mem;
1176
1177 ++riu;
1178 riu->sge_cnt = 0;
1179 riu->raddr = raddr;
1180 riu->rkey = be32_to_cpu(db->key);
1181 }
1182 }
1183
1184 ++ioctx->n_rdma;
1185 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1186 GFP_KERNEL);
1187 if (!riu->sge)
1188 goto free_mem;
1189 }
1190
1191 db = ioctx->rbufs;
1192 tsize = cmd->data_length;
1193 riu = ioctx->rdma_ius;
1194 sg = sg_orig;
1195 dma_len = sg_dma_len(&sg[0]);
1196 dma_addr = sg_dma_address(&sg[0]);
1197
1198 /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1199 for (i = 0, j = 0;
1200 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1201 rsize = be32_to_cpu(db->len);
1202 sge = riu->sge;
1203 k = 0;
1204
1205 while (rsize > 0 && tsize > 0) {
1206 sge->addr = dma_addr;
1207 sge->lkey = ch->sport->sdev->mr->lkey;
1208
1209 if (rsize >= dma_len) {
1210 sge->length =
1211 (tsize < dma_len) ? tsize : dma_len;
1212 tsize -= dma_len;
1213 rsize -= dma_len;
1214
1215 if (tsize > 0) {
1216 ++j;
1217 if (j < count) {
1218 sg = sg_next(sg);
1219 dma_len = sg_dma_len(sg);
1220 dma_addr = sg_dma_address(sg);
1221 }
1222 }
1223 } else {
1224 sge->length = (tsize < rsize) ? tsize : rsize;
1225 tsize -= rsize;
1226 dma_len -= rsize;
1227 dma_addr += rsize;
1228 rsize = 0;
1229 }
1230
1231 ++k;
1232 if (k == riu->sge_cnt && rsize > 0 && tsize > 0) {
1233 ++riu;
1234 sge = riu->sge;
1235 k = 0;
1236 } else if (rsize > 0 && tsize > 0)
1237 ++sge;
1238 }
1239 }
1240
1241 return 0;
1242
1243free_mem:
1244 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1245
1246 return -ENOMEM;
1247}
1248
1249/**
1250 * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator.
1251 */
1252static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch)
1253{
1254 struct srpt_send_ioctx *ioctx;
1255 unsigned long flags;
1256
1257 BUG_ON(!ch);
1258
1259 ioctx = NULL;
1260 spin_lock_irqsave(&ch->spinlock, flags);
1261 if (!list_empty(&ch->free_list)) {
1262 ioctx = list_first_entry(&ch->free_list,
1263 struct srpt_send_ioctx, free_list);
1264 list_del(&ioctx->free_list);
1265 }
1266 spin_unlock_irqrestore(&ch->spinlock, flags);
1267
1268 if (!ioctx)
1269 return ioctx;
1270
1271 BUG_ON(ioctx->ch != ch);
1272 spin_lock_init(&ioctx->spinlock);
1273 ioctx->state = SRPT_STATE_NEW;
1274 ioctx->n_rbuf = 0;
1275 ioctx->rbufs = NULL;
1276 ioctx->n_rdma = 0;
1277 ioctx->n_rdma_ius = 0;
1278 ioctx->rdma_ius = NULL;
1279 ioctx->mapped_sg_count = 0;
1280 init_completion(&ioctx->tx_done);
1281 ioctx->queue_status_only = false;
1282 /*
1283 * transport_init_se_cmd() does not initialize all fields, so do it
1284 * here.
1285 */
1286 memset(&ioctx->cmd, 0, sizeof(ioctx->cmd));
1287 memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data));
1288
1289 return ioctx;
1290}
1291
1292/**
1293 * srpt_abort_cmd() - Abort a SCSI command.
1294 * @ioctx: I/O context associated with the SCSI command.
1295 * @context: Preferred execution context.
1296 */
1297static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx)
1298{
1299 enum srpt_command_state state;
1300 unsigned long flags;
1301
1302 BUG_ON(!ioctx);
1303
1304 /*
1305 * If the command is in a state where the target core is waiting for
1306 * the ib_srpt driver, change the state to the next state. Changing
1307 * the state of the command from SRPT_STATE_NEED_DATA to
1308 * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this
1309 * function a second time.
1310 */
1311
1312 spin_lock_irqsave(&ioctx->spinlock, flags);
1313 state = ioctx->state;
1314 switch (state) {
1315 case SRPT_STATE_NEED_DATA:
1316 ioctx->state = SRPT_STATE_DATA_IN;
1317 break;
1318 case SRPT_STATE_DATA_IN:
1319 case SRPT_STATE_CMD_RSP_SENT:
1320 case SRPT_STATE_MGMT_RSP_SENT:
1321 ioctx->state = SRPT_STATE_DONE;
1322 break;
1323 default:
1324 break;
1325 }
1326 spin_unlock_irqrestore(&ioctx->spinlock, flags);
1327
1328 if (state == SRPT_STATE_DONE) {
1329 struct srpt_rdma_ch *ch = ioctx->ch;
1330
1331 BUG_ON(ch->sess == NULL);
1332
1333 target_put_sess_cmd(ch->sess, &ioctx->cmd);
1334 goto out;
1335 }
1336
1337 pr_debug("Aborting cmd with state %d and tag %lld\n", state,
1338 ioctx->tag);
1339
1340 switch (state) {
1341 case SRPT_STATE_NEW:
1342 case SRPT_STATE_DATA_IN:
1343 case SRPT_STATE_MGMT:
1344 /*
1345 * Do nothing - defer abort processing until
1346 * srpt_queue_response() is invoked.
1347 */
1348 WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false));
1349 break;
1350 case SRPT_STATE_NEED_DATA:
1351 /* DMA_TO_DEVICE (write) - RDMA read error. */
1352
1353 /* XXX(hch): this is a horrible layering violation.. */
1354 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1355 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1356 ioctx->cmd.transport_state &= ~CMD_T_ACTIVE;
1357 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1358
1359 complete(&ioctx->cmd.transport_lun_stop_comp);
1360 break;
1361 case SRPT_STATE_CMD_RSP_SENT:
1362 /*
1363 * SRP_RSP sending failed or the SRP_RSP send completion has
1364 * not been received in time.
1365 */
1366 srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
1367 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1368 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1369 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1370 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1371 break;
1372 case SRPT_STATE_MGMT_RSP_SENT:
1373 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1374 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1375 break;
1376 default:
1377 WARN_ON("ERROR: unexpected command state");
1378 break;
1379 }
1380
1381out:
1382 return state;
1383}
1384
1385/**
1386 * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion.
1387 */
1388static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id)
1389{
1390 struct srpt_send_ioctx *ioctx;
1391 enum srpt_command_state state;
1392 struct se_cmd *cmd;
1393 u32 index;
1394
1395 atomic_inc(&ch->sq_wr_avail);
1396
1397 index = idx_from_wr_id(wr_id);
1398 ioctx = ch->ioctx_ring[index];
1399 state = srpt_get_cmd_state(ioctx);
1400 cmd = &ioctx->cmd;
1401
1402 WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1403 && state != SRPT_STATE_MGMT_RSP_SENT
1404 && state != SRPT_STATE_NEED_DATA
1405 && state != SRPT_STATE_DONE);
1406
1407 /* If SRP_RSP sending failed, undo the ch->req_lim change. */
1408 if (state == SRPT_STATE_CMD_RSP_SENT
1409 || state == SRPT_STATE_MGMT_RSP_SENT)
1410 atomic_dec(&ch->req_lim);
1411
1412 srpt_abort_cmd(ioctx);
1413}
1414
1415/**
1416 * srpt_handle_send_comp() - Process an IB send completion notification.
1417 */
1418static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1419 struct srpt_send_ioctx *ioctx)
1420{
1421 enum srpt_command_state state;
1422
1423 atomic_inc(&ch->sq_wr_avail);
1424
1425 state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
1426
1427 if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT
1428 && state != SRPT_STATE_MGMT_RSP_SENT
1429 && state != SRPT_STATE_DONE))
1430 pr_debug("state = %d\n", state);
1431
1432 if (state != SRPT_STATE_DONE) {
1433 srpt_unmap_sg_to_ib_sge(ch, ioctx);
1434 transport_generic_free_cmd(&ioctx->cmd, 0);
1435 } else {
1436 printk(KERN_ERR "IB completion has been received too late for"
1437 " wr_id = %u.\n", ioctx->ioctx.index);
1438 }
1439}
1440
1441/**
1442 * srpt_handle_rdma_comp() - Process an IB RDMA completion notification.
1443 *
1444 * XXX: what is now target_execute_cmd used to be asynchronous, and unmapping
1445 * the data that has been transferred via IB RDMA had to be postponed until the
1446 * check_stop_free() callback. None of this is necessary anymore and needs to
1447 * be cleaned up.
1448 */
1449static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1450 struct srpt_send_ioctx *ioctx,
1451 enum srpt_opcode opcode)
1452{
1453 WARN_ON(ioctx->n_rdma <= 0);
1454 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1455
1456 if (opcode == SRPT_RDMA_READ_LAST) {
1457 if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1458 SRPT_STATE_DATA_IN))
1459 target_execute_cmd(&ioctx->cmd);
1460 else
1461 printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__,
1462 __LINE__, srpt_get_cmd_state(ioctx));
1463 } else if (opcode == SRPT_RDMA_ABORT) {
1464 ioctx->rdma_aborted = true;
1465 } else {
1466 WARN(true, "unexpected opcode %d\n", opcode);
1467 }
1468}
1469
1470/**
1471 * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion.
1472 */
1473static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch,
1474 struct srpt_send_ioctx *ioctx,
1475 enum srpt_opcode opcode)
1476{
1477 struct se_cmd *cmd;
1478 enum srpt_command_state state;
1479 unsigned long flags;
1480
1481 cmd = &ioctx->cmd;
1482 state = srpt_get_cmd_state(ioctx);
1483 switch (opcode) {
1484 case SRPT_RDMA_READ_LAST:
1485 if (ioctx->n_rdma <= 0) {
1486 printk(KERN_ERR "Received invalid RDMA read"
1487 " error completion with idx %d\n",
1488 ioctx->ioctx.index);
1489 break;
1490 }
1491 atomic_add(ioctx->n_rdma, &ch->sq_wr_avail);
1492 if (state == SRPT_STATE_NEED_DATA)
1493 srpt_abort_cmd(ioctx);
1494 else
1495 printk(KERN_ERR "%s[%d]: wrong state = %d\n",
1496 __func__, __LINE__, state);
1497 break;
1498 case SRPT_RDMA_WRITE_LAST:
1499 spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
1500 ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
1501 spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
1502 break;
1503 default:
1504 printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
1505 __LINE__, opcode);
1506 break;
1507 }
1508}
1509
1510/**
1511 * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1512 * @ch: RDMA channel through which the request has been received.
1513 * @ioctx: I/O context associated with the SRP_CMD request. The response will
1514 * be built in the buffer ioctx->buf points at and hence this function will
1515 * overwrite the request data.
1516 * @tag: tag of the request for which this response is being generated.
1517 * @status: value for the STATUS field of the SRP_RSP information unit.
1518 *
1519 * Returns the size in bytes of the SRP_RSP response.
1520 *
1521 * An SRP_RSP response contains a SCSI status or service response. See also
1522 * section 6.9 in the SRP r16a document for the format of an SRP_RSP
1523 * response. See also SPC-2 for more information about sense data.
1524 */
1525static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1526 struct srpt_send_ioctx *ioctx, u64 tag,
1527 int status)
1528{
1529 struct srp_rsp *srp_rsp;
1530 const u8 *sense_data;
1531 int sense_data_len, max_sense_len;
1532
1533 /*
1534 * The lowest bit of all SAM-3 status codes is zero (see also
1535 * paragraph 5.3 in SAM-3).
1536 */
1537 WARN_ON(status & 1);
1538
1539 srp_rsp = ioctx->ioctx.buf;
1540 BUG_ON(!srp_rsp);
1541
1542 sense_data = ioctx->sense_data;
1543 sense_data_len = ioctx->cmd.scsi_sense_length;
1544 WARN_ON(sense_data_len > sizeof(ioctx->sense_data));
1545
1546 memset(srp_rsp, 0, sizeof *srp_rsp);
1547 srp_rsp->opcode = SRP_RSP;
1548 srp_rsp->req_lim_delta =
1549 __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0));
1550 srp_rsp->tag = tag;
1551 srp_rsp->status = status;
1552
1553 if (sense_data_len) {
1554 BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp));
1555 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1556 if (sense_data_len > max_sense_len) {
1557 printk(KERN_WARNING "truncated sense data from %d to %d"
1558 " bytes\n", sense_data_len, max_sense_len);
1559 sense_data_len = max_sense_len;
1560 }
1561
1562 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1563 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1564 memcpy(srp_rsp + 1, sense_data, sense_data_len);
1565 }
1566
1567 return sizeof(*srp_rsp) + sense_data_len;
1568}
1569
1570/**
1571 * srpt_build_tskmgmt_rsp() - Build a task management response.
1572 * @ch: RDMA channel through which the request has been received.
1573 * @ioctx: I/O context in which the SRP_RSP response will be built.
1574 * @rsp_code: RSP_CODE that will be stored in the response.
1575 * @tag: Tag of the request for which this response is being generated.
1576 *
1577 * Returns the size in bytes of the SRP_RSP response.
1578 *
1579 * An SRP_RSP response contains a SCSI status or service response. See also
1580 * section 6.9 in the SRP r16a document for the format of an SRP_RSP
1581 * response.
1582 */
1583static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1584 struct srpt_send_ioctx *ioctx,
1585 u8 rsp_code, u64 tag)
1586{
1587 struct srp_rsp *srp_rsp;
1588 int resp_data_len;
1589 int resp_len;
1590
1591 resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1592 resp_len = sizeof(*srp_rsp) + resp_data_len;
1593
1594 srp_rsp = ioctx->ioctx.buf;
1595 BUG_ON(!srp_rsp);
1596 memset(srp_rsp, 0, sizeof *srp_rsp);
1597
1598 srp_rsp->opcode = SRP_RSP;
1599 srp_rsp->req_lim_delta = __constant_cpu_to_be32(1
1600 + atomic_xchg(&ch->req_lim_delta, 0));
1601 srp_rsp->tag = tag;
1602
1603 if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1604 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1605 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1606 srp_rsp->data[3] = rsp_code;
1607 }
1608
1609 return resp_len;
1610}
1611
1612#define NO_SUCH_LUN ((uint64_t)-1LL)
1613
1614/*
1615 * SCSI LUN addressing method. See also SAM-2 and the section about
1616 * eight byte LUNs.
1617 */
1618enum scsi_lun_addr_method {
1619 SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0,
1620 SCSI_LUN_ADDR_METHOD_FLAT = 1,
1621 SCSI_LUN_ADDR_METHOD_LUN = 2,
1622 SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3,
1623};
1624
1625/*
1626 * srpt_unpack_lun() - Convert from network LUN to linear LUN.
1627 *
1628 * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte
1629 * order (big endian) to a linear LUN. Supports three LUN addressing methods:
1630 * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40).
1631 */
1632static uint64_t srpt_unpack_lun(const uint8_t *lun, int len)
1633{
1634 uint64_t res = NO_SUCH_LUN;
1635 int addressing_method;
1636
1637 if (unlikely(len < 2)) {
1638 printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or "
1639 "more", len);
1640 goto out;
1641 }
1642
1643 switch (len) {
1644 case 8:
1645 if ((*((__be64 *)lun) &
1646 __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0)
1647 goto out_err;
1648 break;
1649 case 4:
1650 if (*((__be16 *)&lun[2]) != 0)
1651 goto out_err;
1652 break;
1653 case 6:
1654 if (*((__be32 *)&lun[2]) != 0)
1655 goto out_err;
1656 break;
1657 case 2:
1658 break;
1659 default:
1660 goto out_err;
1661 }
1662
1663 addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */
1664 switch (addressing_method) {
1665 case SCSI_LUN_ADDR_METHOD_PERIPHERAL:
1666 case SCSI_LUN_ADDR_METHOD_FLAT:
1667 case SCSI_LUN_ADDR_METHOD_LUN:
1668 res = *(lun + 1) | (((*lun) & 0x3f) << 8);
1669 break;
1670
1671 case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN:
1672 default:
1673 printk(KERN_ERR "Unimplemented LUN addressing method %u",
1674 addressing_method);
1675 break;
1676 }
1677
1678out:
1679 return res;
1680
1681out_err:
1682 printk(KERN_ERR "Support for multi-level LUNs has not yet been"
1683 " implemented");
1684 goto out;
1685}
1686
1687static int srpt_check_stop_free(struct se_cmd *cmd)
1688{
1689 struct srpt_send_ioctx *ioctx = container_of(cmd,
1690 struct srpt_send_ioctx, cmd);
1691
1692 return target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
1693}
1694
1695/**
1696 * srpt_handle_cmd() - Process SRP_CMD.
1697 */
1698static int srpt_handle_cmd(struct srpt_rdma_ch *ch,
1699 struct srpt_recv_ioctx *recv_ioctx,
1700 struct srpt_send_ioctx *send_ioctx)
1701{
1702 struct se_cmd *cmd;
1703 struct srp_cmd *srp_cmd;
1704 uint64_t unpacked_lun;
1705 u64 data_len;
1706 enum dma_data_direction dir;
1707 sense_reason_t ret;
1708 int rc;
1709
1710 BUG_ON(!send_ioctx);
1711
1712 srp_cmd = recv_ioctx->ioctx.buf;
1713 cmd = &send_ioctx->cmd;
1714 send_ioctx->tag = srp_cmd->tag;
1715
1716 switch (srp_cmd->task_attr) {
1717 case SRP_CMD_SIMPLE_Q:
1718 cmd->sam_task_attr = MSG_SIMPLE_TAG;
1719 break;
1720 case SRP_CMD_ORDERED_Q:
1721 default:
1722 cmd->sam_task_attr = MSG_ORDERED_TAG;
1723 break;
1724 case SRP_CMD_HEAD_OF_Q:
1725 cmd->sam_task_attr = MSG_HEAD_TAG;
1726 break;
1727 case SRP_CMD_ACA:
1728 cmd->sam_task_attr = MSG_ACA_TAG;
1729 break;
1730 }
1731
1732 if (srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len)) {
1733 printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n",
1734 srp_cmd->tag);
1735 ret = TCM_INVALID_CDB_FIELD;
1736 goto send_sense;
1737 }
1738
1739 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
1740 sizeof(srp_cmd->lun));
1741 rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
1742 &send_ioctx->sense_data[0], unpacked_lun, data_len,
1743 MSG_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
1744 if (rc != 0) {
1745 ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1746 goto send_sense;
1747 }
1748 return 0;
1749
1750send_sense:
1751 transport_send_check_condition_and_sense(cmd, ret, 0);
1752 return -1;
1753}
1754
1755/**
1756 * srpt_rx_mgmt_fn_tag() - Process a task management function by tag.
1757 * @ch: RDMA channel of the task management request.
1758 * @fn: Task management function to perform.
1759 * @req_tag: Tag of the SRP task management request.
1760 * @mgmt_ioctx: I/O context of the task management request.
1761 *
1762 * Returns zero if the target core will process the task management
1763 * request asynchronously.
1764 *
1765 * Note: It is assumed that the initiator serializes tag-based task management
1766 * requests.
1767 */
1768static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag)
1769{
1770 struct srpt_device *sdev;
1771 struct srpt_rdma_ch *ch;
1772 struct srpt_send_ioctx *target;
1773 int ret, i;
1774
1775 ret = -EINVAL;
1776 ch = ioctx->ch;
1777 BUG_ON(!ch);
1778 BUG_ON(!ch->sport);
1779 sdev = ch->sport->sdev;
1780 BUG_ON(!sdev);
1781 spin_lock_irq(&sdev->spinlock);
1782 for (i = 0; i < ch->rq_size; ++i) {
1783 target = ch->ioctx_ring[i];
1784 if (target->cmd.se_lun == ioctx->cmd.se_lun &&
1785 target->tag == tag &&
1786 srpt_get_cmd_state(target) != SRPT_STATE_DONE) {
1787 ret = 0;
1788 /* now let the target core abort &target->cmd; */
1789 break;
1790 }
1791 }
1792 spin_unlock_irq(&sdev->spinlock);
1793 return ret;
1794}
1795
1796static int srp_tmr_to_tcm(int fn)
1797{
1798 switch (fn) {
1799 case SRP_TSK_ABORT_TASK:
1800 return TMR_ABORT_TASK;
1801 case SRP_TSK_ABORT_TASK_SET:
1802 return TMR_ABORT_TASK_SET;
1803 case SRP_TSK_CLEAR_TASK_SET:
1804 return TMR_CLEAR_TASK_SET;
1805 case SRP_TSK_LUN_RESET:
1806 return TMR_LUN_RESET;
1807 case SRP_TSK_CLEAR_ACA:
1808 return TMR_CLEAR_ACA;
1809 default:
1810 return -1;
1811 }
1812}
1813
1814/**
1815 * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
1816 *
1817 * Returns 0 if and only if the request will be processed by the target core.
1818 *
1819 * For more information about SRP_TSK_MGMT information units, see also section
1820 * 6.7 in the SRP r16a document.
1821 */
1822static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1823 struct srpt_recv_ioctx *recv_ioctx,
1824 struct srpt_send_ioctx *send_ioctx)
1825{
1826 struct srp_tsk_mgmt *srp_tsk;
1827 struct se_cmd *cmd;
1828 struct se_session *sess = ch->sess;
1829 uint64_t unpacked_lun;
1830 uint32_t tag = 0;
1831 int tcm_tmr;
1832 int rc;
1833
1834 BUG_ON(!send_ioctx);
1835
1836 srp_tsk = recv_ioctx->ioctx.buf;
1837 cmd = &send_ioctx->cmd;
1838
1839 pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld"
1840 " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func,
1841 srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess);
1842
1843 srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT);
1844 send_ioctx->tag = srp_tsk->tag;
1845 tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func);
1846 if (tcm_tmr < 0) {
1847 send_ioctx->cmd.se_tmr_req->response =
1848 TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
1849 goto fail;
1850 }
1851 unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun,
1852 sizeof(srp_tsk->lun));
1853
1854 if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) {
1855 rc = srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag);
1856 if (rc < 0) {
1857 send_ioctx->cmd.se_tmr_req->response =
1858 TMR_TASK_DOES_NOT_EXIST;
1859 goto fail;
1860 }
1861 tag = srp_tsk->task_tag;
1862 }
1863 rc = target_submit_tmr(&send_ioctx->cmd, sess, NULL, unpacked_lun,
1864 srp_tsk, tcm_tmr, GFP_KERNEL, tag,
1865 TARGET_SCF_ACK_KREF);
1866 if (rc != 0) {
1867 send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
1868 goto fail;
1869 }
1870 return;
1871fail:
1872 transport_send_check_condition_and_sense(cmd, 0, 0); // XXX:
1873}
1874
1875/**
1876 * srpt_handle_new_iu() - Process a newly received information unit.
1877 * @ch: RDMA channel through which the information unit has been received.
1878 * @ioctx: SRPT I/O context associated with the information unit.
1879 */
1880static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1881 struct srpt_recv_ioctx *recv_ioctx,
1882 struct srpt_send_ioctx *send_ioctx)
1883{
1884 struct srp_cmd *srp_cmd;
1885 enum rdma_ch_state ch_state;
1886
1887 BUG_ON(!ch);
1888 BUG_ON(!recv_ioctx);
1889
1890 ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1891 recv_ioctx->ioctx.dma, srp_max_req_size,
1892 DMA_FROM_DEVICE);
1893
1894 ch_state = srpt_get_ch_state(ch);
1895 if (unlikely(ch_state == CH_CONNECTING)) {
1896 list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list);
1897 goto out;
1898 }
1899
1900 if (unlikely(ch_state != CH_LIVE))
1901 goto out;
1902
1903 srp_cmd = recv_ioctx->ioctx.buf;
1904 if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) {
1905 if (!send_ioctx)
1906 send_ioctx = srpt_get_send_ioctx(ch);
1907 if (unlikely(!send_ioctx)) {
1908 list_add_tail(&recv_ioctx->wait_list,
1909 &ch->cmd_wait_list);
1910 goto out;
1911 }
1912 }
1913
1914 switch (srp_cmd->opcode) {
1915 case SRP_CMD:
1916 srpt_handle_cmd(ch, recv_ioctx, send_ioctx);
1917 break;
1918 case SRP_TSK_MGMT:
1919 srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx);
1920 break;
1921 case SRP_I_LOGOUT:
1922 printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n");
1923 break;
1924 case SRP_CRED_RSP:
1925 pr_debug("received SRP_CRED_RSP\n");
1926 break;
1927 case SRP_AER_RSP:
1928 pr_debug("received SRP_AER_RSP\n");
1929 break;
1930 case SRP_RSP:
1931 printk(KERN_ERR "Received SRP_RSP\n");
1932 break;
1933 default:
1934 printk(KERN_ERR "received IU with unknown opcode 0x%x\n",
1935 srp_cmd->opcode);
1936 break;
1937 }
1938
1939 srpt_post_recv(ch->sport->sdev, recv_ioctx);
1940out:
1941 return;
1942}
1943
1944static void srpt_process_rcv_completion(struct ib_cq *cq,
1945 struct srpt_rdma_ch *ch,
1946 struct ib_wc *wc)
1947{
1948 struct srpt_device *sdev = ch->sport->sdev;
1949 struct srpt_recv_ioctx *ioctx;
1950 u32 index;
1951
1952 index = idx_from_wr_id(wc->wr_id);
1953 if (wc->status == IB_WC_SUCCESS) {
1954 int req_lim;
1955
1956 req_lim = atomic_dec_return(&ch->req_lim);
1957 if (unlikely(req_lim < 0))
1958 printk(KERN_ERR "req_lim = %d < 0\n", req_lim);
1959 ioctx = sdev->ioctx_ring[index];
1960 srpt_handle_new_iu(ch, ioctx, NULL);
1961 } else {
1962 printk(KERN_INFO "receiving failed for idx %u with status %d\n",
1963 index, wc->status);
1964 }
1965}
1966
1967/**
1968 * srpt_process_send_completion() - Process an IB send completion.
1969 *
1970 * Note: Although this has not yet been observed during tests, at least in
1971 * theory it is possible that the srpt_get_send_ioctx() call invoked by
1972 * srpt_handle_new_iu() fails. This is possible because the req_lim_delta
1973 * value in each response is set to one, and it is possible that this response
1974 * makes the initiator send a new request before the send completion for that
1975 * response has been processed. This could e.g. happen if the call to
1976 * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or
1977 * if IB retransmission causes generation of the send completion to be
1978 * delayed. Incoming information units for which srpt_get_send_ioctx() fails
1979 * are queued on cmd_wait_list. The code below processes these delayed
1980 * requests one at a time.
1981 */
1982static void srpt_process_send_completion(struct ib_cq *cq,
1983 struct srpt_rdma_ch *ch,
1984 struct ib_wc *wc)
1985{
1986 struct srpt_send_ioctx *send_ioctx;
1987 uint32_t index;
1988 enum srpt_opcode opcode;
1989
1990 index = idx_from_wr_id(wc->wr_id);
1991 opcode = opcode_from_wr_id(wc->wr_id);
1992 send_ioctx = ch->ioctx_ring[index];
1993 if (wc->status == IB_WC_SUCCESS) {
1994 if (opcode == SRPT_SEND)
1995 srpt_handle_send_comp(ch, send_ioctx);
1996 else {
1997 WARN_ON(opcode != SRPT_RDMA_ABORT &&
1998 wc->opcode != IB_WC_RDMA_READ);
1999 srpt_handle_rdma_comp(ch, send_ioctx, opcode);
2000 }
2001 } else {
2002 if (opcode == SRPT_SEND) {
2003 printk(KERN_INFO "sending response for idx %u failed"
2004 " with status %d\n", index, wc->status);
2005 srpt_handle_send_err_comp(ch, wc->wr_id);
2006 } else if (opcode != SRPT_RDMA_MID) {
2007 printk(KERN_INFO "RDMA t %d for idx %u failed with"
2008 " status %d", opcode, index, wc->status);
2009 srpt_handle_rdma_err_comp(ch, send_ioctx, opcode);
2010 }
2011 }
2012
2013 while (unlikely(opcode == SRPT_SEND
2014 && !list_empty(&ch->cmd_wait_list)
2015 && srpt_get_ch_state(ch) == CH_LIVE
2016 && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) {
2017 struct srpt_recv_ioctx *recv_ioctx;
2018
2019 recv_ioctx = list_first_entry(&ch->cmd_wait_list,
2020 struct srpt_recv_ioctx,
2021 wait_list);
2022 list_del(&recv_ioctx->wait_list);
2023 srpt_handle_new_iu(ch, recv_ioctx, send_ioctx);
2024 }
2025}
2026
2027static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch)
2028{
2029 struct ib_wc *const wc = ch->wc;
2030 int i, n;
2031
2032 WARN_ON(cq != ch->cq);
2033
2034 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
2035 while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) {
2036 for (i = 0; i < n; i++) {
2037 if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV)
2038 srpt_process_rcv_completion(cq, ch, &wc[i]);
2039 else
2040 srpt_process_send_completion(cq, ch, &wc[i]);
2041 }
2042 }
2043}
2044
2045/**
2046 * srpt_completion() - IB completion queue callback function.
2047 *
2048 * Notes:
2049 * - It is guaranteed that a completion handler will never be invoked
2050 * concurrently on two different CPUs for the same completion queue. See also
2051 * Documentation/infiniband/core_locking.txt and the implementation of
2052 * handle_edge_irq() in kernel/irq/chip.c.
2053 * - When threaded IRQs are enabled, completion handlers are invoked in thread
2054 * context instead of interrupt context.
2055 */
2056static void srpt_completion(struct ib_cq *cq, void *ctx)
2057{
2058 struct srpt_rdma_ch *ch = ctx;
2059
2060 wake_up_interruptible(&ch->wait_queue);
2061}
2062
2063static int srpt_compl_thread(void *arg)
2064{
2065 struct srpt_rdma_ch *ch;
2066
2067 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2068 current->flags |= PF_NOFREEZE;
2069
2070 ch = arg;
2071 BUG_ON(!ch);
2072 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n",
2073 ch->sess_name, ch->thread->comm, current->pid);
2074 while (!kthread_should_stop()) {
2075 wait_event_interruptible(ch->wait_queue,
2076 (srpt_process_completion(ch->cq, ch),
2077 kthread_should_stop()));
2078 }
2079 printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n",
2080 ch->sess_name, ch->thread->comm, current->pid);
2081 return 0;
2082}
2083
2084/**
2085 * srpt_create_ch_ib() - Create receive and send completion queues.
2086 */
2087static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
2088{
2089 struct ib_qp_init_attr *qp_init;
2090 struct srpt_port *sport = ch->sport;
2091 struct srpt_device *sdev = sport->sdev;
2092 u32 srp_sq_size = sport->port_attrib.srp_sq_size;
2093 int ret;
2094
2095 WARN_ON(ch->rq_size < 1);
2096
2097 ret = -ENOMEM;
2098 qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
2099 if (!qp_init)
2100 goto out;
2101
2102 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch,
2103 ch->rq_size + srp_sq_size, 0);
2104 if (IS_ERR(ch->cq)) {
2105 ret = PTR_ERR(ch->cq);
2106 printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n",
2107 ch->rq_size + srp_sq_size, ret);
2108 goto out;
2109 }
2110
2111 qp_init->qp_context = (void *)ch;
2112 qp_init->event_handler
2113 = (void(*)(struct ib_event *, void*))srpt_qp_event;
2114 qp_init->send_cq = ch->cq;
2115 qp_init->recv_cq = ch->cq;
2116 qp_init->srq = sdev->srq;
2117 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
2118 qp_init->qp_type = IB_QPT_RC;
2119 qp_init->cap.max_send_wr = srp_sq_size;
2120 qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
2121
2122 ch->qp = ib_create_qp(sdev->pd, qp_init);
2123 if (IS_ERR(ch->qp)) {
2124 ret = PTR_ERR(ch->qp);
2125 printk(KERN_ERR "failed to create_qp ret= %d\n", ret);
2126 goto err_destroy_cq;
2127 }
2128
2129 atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr);
2130
2131 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
2132 __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
2133 qp_init->cap.max_send_wr, ch->cm_id);
2134
2135 ret = srpt_init_ch_qp(ch, ch->qp);
2136 if (ret)
2137 goto err_destroy_qp;
2138
2139 init_waitqueue_head(&ch->wait_queue);
2140
2141 pr_debug("creating thread for session %s\n", ch->sess_name);
2142
2143 ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl");
2144 if (IS_ERR(ch->thread)) {
2145 printk(KERN_ERR "failed to create kernel thread %ld\n",
2146 PTR_ERR(ch->thread));
2147 ch->thread = NULL;
2148 goto err_destroy_qp;
2149 }
2150
2151out:
2152 kfree(qp_init);
2153 return ret;
2154
2155err_destroy_qp:
2156 ib_destroy_qp(ch->qp);
2157err_destroy_cq:
2158 ib_destroy_cq(ch->cq);
2159 goto out;
2160}
2161
2162static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch)
2163{
2164 if (ch->thread)
2165 kthread_stop(ch->thread);
2166
2167 ib_destroy_qp(ch->qp);
2168 ib_destroy_cq(ch->cq);
2169}
2170
2171/**
2172 * __srpt_close_ch() - Close an RDMA channel by setting the QP error state.
2173 *
2174 * Reset the QP and make sure all resources associated with the channel will
2175 * be deallocated at an appropriate time.
2176 *
2177 * Note: The caller must hold ch->sport->sdev->spinlock.
2178 */
2179static void __srpt_close_ch(struct srpt_rdma_ch *ch)
2180{
2181 struct srpt_device *sdev;
2182 enum rdma_ch_state prev_state;
2183 unsigned long flags;
2184
2185 sdev = ch->sport->sdev;
2186
2187 spin_lock_irqsave(&ch->spinlock, flags);
2188 prev_state = ch->state;
2189 switch (prev_state) {
2190 case CH_CONNECTING:
2191 case CH_LIVE:
2192 ch->state = CH_DISCONNECTING;
2193 break;
2194 default:
2195 break;
2196 }
2197 spin_unlock_irqrestore(&ch->spinlock, flags);
2198
2199 switch (prev_state) {
2200 case CH_CONNECTING:
2201 ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0,
2202 NULL, 0);
2203 /* fall through */
2204 case CH_LIVE:
2205 if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0)
2206 printk(KERN_ERR "sending CM DREQ failed.\n");
2207 break;
2208 case CH_DISCONNECTING:
2209 break;
2210 case CH_DRAINING:
2211 case CH_RELEASING:
2212 break;
2213 }
2214}
2215
2216/**
2217 * srpt_close_ch() - Close an RDMA channel.
2218 */
2219static void srpt_close_ch(struct srpt_rdma_ch *ch)
2220{
2221 struct srpt_device *sdev;
2222
2223 sdev = ch->sport->sdev;
2224 spin_lock_irq(&sdev->spinlock);
2225 __srpt_close_ch(ch);
2226 spin_unlock_irq(&sdev->spinlock);
2227}
2228
2229/**
2230 * srpt_drain_channel() - Drain a channel by resetting the IB queue pair.
2231 * @cm_id: Pointer to the CM ID of the channel to be drained.
2232 *
2233 * Note: Must be called from inside srpt_cm_handler to avoid a race between
2234 * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
2235 * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one()
2236 * waits until all target sessions for the associated IB device have been
2237 * unregistered and target session registration involves a call to
2238 * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until
2239 * this function has finished).
2240 */
2241static void srpt_drain_channel(struct ib_cm_id *cm_id)
2242{
2243 struct srpt_device *sdev;
2244 struct srpt_rdma_ch *ch;
2245 int ret;
2246 bool do_reset = false;
2247
2248 WARN_ON_ONCE(irqs_disabled());
2249
2250 sdev = cm_id->context;
2251 BUG_ON(!sdev);
2252 spin_lock_irq(&sdev->spinlock);
2253 list_for_each_entry(ch, &sdev->rch_list, list) {
2254 if (ch->cm_id == cm_id) {
2255 do_reset = srpt_test_and_set_ch_state(ch,
2256 CH_CONNECTING, CH_DRAINING) ||
2257 srpt_test_and_set_ch_state(ch,
2258 CH_LIVE, CH_DRAINING) ||
2259 srpt_test_and_set_ch_state(ch,
2260 CH_DISCONNECTING, CH_DRAINING);
2261 break;
2262 }
2263 }
2264 spin_unlock_irq(&sdev->spinlock);
2265
2266 if (do_reset) {
2267 ret = srpt_ch_qp_err(ch);
2268 if (ret < 0)
2269 printk(KERN_ERR "Setting queue pair in error state"
2270 " failed: %d\n", ret);
2271 }
2272}
2273
2274/**
2275 * srpt_find_channel() - Look up an RDMA channel.
2276 * @cm_id: Pointer to the CM ID of the channel to be looked up.
2277 *
2278 * Return NULL if no matching RDMA channel has been found.
2279 */
2280static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
2281 struct ib_cm_id *cm_id)
2282{
2283 struct srpt_rdma_ch *ch;
2284 bool found;
2285
2286 WARN_ON_ONCE(irqs_disabled());
2287 BUG_ON(!sdev);
2288
2289 found = false;
2290 spin_lock_irq(&sdev->spinlock);
2291 list_for_each_entry(ch, &sdev->rch_list, list) {
2292 if (ch->cm_id == cm_id) {
2293 found = true;
2294 break;
2295 }
2296 }
2297 spin_unlock_irq(&sdev->spinlock);
2298
2299 return found ? ch : NULL;
2300}
2301
2302/**
2303 * srpt_release_channel() - Release channel resources.
2304 *
2305 * Schedules the actual release because:
2306 * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would
2307 * trigger a deadlock.
2308 * - It is not safe to call TCM transport_* functions from interrupt context.
2309 */
2310static void srpt_release_channel(struct srpt_rdma_ch *ch)
2311{
2312 schedule_work(&ch->release_work);
2313}
2314
2315static void srpt_release_channel_work(struct work_struct *w)
2316{
2317 struct srpt_rdma_ch *ch;
2318 struct srpt_device *sdev;
2319 struct se_session *se_sess;
2320
2321 ch = container_of(w, struct srpt_rdma_ch, release_work);
2322 pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess,
2323 ch->release_done);
2324
2325 sdev = ch->sport->sdev;
2326 BUG_ON(!sdev);
2327
2328 se_sess = ch->sess;
2329 BUG_ON(!se_sess);
2330
2331 target_wait_for_sess_cmds(se_sess, 0);
2332
2333 transport_deregister_session_configfs(se_sess);
2334 transport_deregister_session(se_sess);
2335 ch->sess = NULL;
2336
2337 srpt_destroy_ch_ib(ch);
2338
2339 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2340 ch->sport->sdev, ch->rq_size,
2341 ch->rsp_size, DMA_TO_DEVICE);
2342
2343 spin_lock_irq(&sdev->spinlock);
2344 list_del(&ch->list);
2345 spin_unlock_irq(&sdev->spinlock);
2346
2347 ib_destroy_cm_id(ch->cm_id);
2348
2349 if (ch->release_done)
2350 complete(ch->release_done);
2351
2352 wake_up(&sdev->ch_releaseQ);
2353
2354 kfree(ch);
2355}
2356
2357static struct srpt_node_acl *__srpt_lookup_acl(struct srpt_port *sport,
2358 u8 i_port_id[16])
2359{
2360 struct srpt_node_acl *nacl;
2361
2362 list_for_each_entry(nacl, &sport->port_acl_list, list)
2363 if (memcmp(nacl->i_port_id, i_port_id,
2364 sizeof(nacl->i_port_id)) == 0)
2365 return nacl;
2366
2367 return NULL;
2368}
2369
2370static struct srpt_node_acl *srpt_lookup_acl(struct srpt_port *sport,
2371 u8 i_port_id[16])
2372{
2373 struct srpt_node_acl *nacl;
2374
2375 spin_lock_irq(&sport->port_acl_lock);
2376 nacl = __srpt_lookup_acl(sport, i_port_id);
2377 spin_unlock_irq(&sport->port_acl_lock);
2378
2379 return nacl;
2380}
2381
2382/**
2383 * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED.
2384 *
2385 * Ownership of the cm_id is transferred to the target session if this
2386 * functions returns zero. Otherwise the caller remains the owner of cm_id.
2387 */
2388static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
2389 struct ib_cm_req_event_param *param,
2390 void *private_data)
2391{
2392 struct srpt_device *sdev = cm_id->context;
2393 struct srpt_port *sport = &sdev->port[param->port - 1];
2394 struct srp_login_req *req;
2395 struct srp_login_rsp *rsp;
2396 struct srp_login_rej *rej;
2397 struct ib_cm_rep_param *rep_param;
2398 struct srpt_rdma_ch *ch, *tmp_ch;
2399 struct srpt_node_acl *nacl;
2400 u32 it_iu_len;
2401 int i;
2402 int ret = 0;
2403
2404 WARN_ON_ONCE(irqs_disabled());
2405
2406 if (WARN_ON(!sdev || !private_data))
2407 return -EINVAL;
2408
2409 req = (struct srp_login_req *)private_data;
2410
2411 it_iu_len = be32_to_cpu(req->req_it_iu_len);
2412
2413 printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx,"
2414 " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d"
2415 " (guid=0x%llx:0x%llx)\n",
2416 be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]),
2417 be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]),
2418 be64_to_cpu(*(__be64 *)&req->target_port_id[0]),
2419 be64_to_cpu(*(__be64 *)&req->target_port_id[8]),
2420 it_iu_len,
2421 param->port,
2422 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]),
2423 be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8]));
2424
2425 rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
2426 rej = kzalloc(sizeof *rej, GFP_KERNEL);
2427 rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
2428
2429 if (!rsp || !rej || !rep_param) {
2430 ret = -ENOMEM;
2431 goto out;
2432 }
2433
2434 if (it_iu_len > srp_max_req_size || it_iu_len < 64) {
2435 rej->reason = __constant_cpu_to_be32(
2436 SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
2437 ret = -EINVAL;
2438 printk(KERN_ERR "rejected SRP_LOGIN_REQ because its"
2439 " length (%d bytes) is out of range (%d .. %d)\n",
2440 it_iu_len, 64, srp_max_req_size);
2441 goto reject;
2442 }
2443
2444 if (!sport->enabled) {
2445 rej->reason = __constant_cpu_to_be32(
2446 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2447 ret = -EINVAL;
2448 printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port"
2449 " has not yet been enabled\n");
2450 goto reject;
2451 }
2452
2453 if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
2454 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
2455
2456 spin_lock_irq(&sdev->spinlock);
2457
2458 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2459 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
2460 && !memcmp(ch->t_port_id, req->target_port_id, 16)
2461 && param->port == ch->sport->port
2462 && param->listen_id == ch->sport->sdev->cm_id
2463 && ch->cm_id) {
2464 enum rdma_ch_state ch_state;
2465
2466 ch_state = srpt_get_ch_state(ch);
2467 if (ch_state != CH_CONNECTING
2468 && ch_state != CH_LIVE)
2469 continue;
2470
2471 /* found an existing channel */
2472 pr_debug("Found existing channel %s"
2473 " cm_id= %p state= %d\n",
2474 ch->sess_name, ch->cm_id, ch_state);
2475
2476 __srpt_close_ch(ch);
2477
2478 rsp->rsp_flags =
2479 SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
2480 }
2481 }
2482
2483 spin_unlock_irq(&sdev->spinlock);
2484
2485 } else
2486 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
2487
2488 if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid)
2489 || *(__be64 *)(req->target_port_id + 8) !=
2490 cpu_to_be64(srpt_service_guid)) {
2491 rej->reason = __constant_cpu_to_be32(
2492 SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
2493 ret = -ENOMEM;
2494 printk(KERN_ERR "rejected SRP_LOGIN_REQ because it"
2495 " has an invalid target port identifier.\n");
2496 goto reject;
2497 }
2498
2499 ch = kzalloc(sizeof *ch, GFP_KERNEL);
2500 if (!ch) {
2501 rej->reason = __constant_cpu_to_be32(
2502 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2503 printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n");
2504 ret = -ENOMEM;
2505 goto reject;
2506 }
2507
2508 INIT_WORK(&ch->release_work, srpt_release_channel_work);
2509 memcpy(ch->i_port_id, req->initiator_port_id, 16);
2510 memcpy(ch->t_port_id, req->target_port_id, 16);
2511 ch->sport = &sdev->port[param->port - 1];
2512 ch->cm_id = cm_id;
2513 /*
2514 * Avoid QUEUE_FULL conditions by limiting the number of buffers used
2515 * for the SRP protocol to the command queue size.
2516 */
2517 ch->rq_size = SRPT_RQ_SIZE;
2518 spin_lock_init(&ch->spinlock);
2519 ch->state = CH_CONNECTING;
2520 INIT_LIST_HEAD(&ch->cmd_wait_list);
2521 ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size;
2522
2523 ch->ioctx_ring = (struct srpt_send_ioctx **)
2524 srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size,
2525 sizeof(*ch->ioctx_ring[0]),
2526 ch->rsp_size, DMA_TO_DEVICE);
2527 if (!ch->ioctx_ring)
2528 goto free_ch;
2529
2530 INIT_LIST_HEAD(&ch->free_list);
2531 for (i = 0; i < ch->rq_size; i++) {
2532 ch->ioctx_ring[i]->ch = ch;
2533 list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list);
2534 }
2535
2536 ret = srpt_create_ch_ib(ch);
2537 if (ret) {
2538 rej->reason = __constant_cpu_to_be32(
2539 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2540 printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating"
2541 " a new RDMA channel failed.\n");
2542 goto free_ring;
2543 }
2544
2545 ret = srpt_ch_qp_rtr(ch, ch->qp);
2546 if (ret) {
2547 rej->reason = __constant_cpu_to_be32(
2548 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2549 printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling"
2550 " RTR failed (error code = %d)\n", ret);
2551 goto destroy_ib;
2552 }
2553 /*
2554 * Use the initator port identifier as the session name.
2555 */
2556 snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx",
2557 be64_to_cpu(*(__be64 *)ch->i_port_id),
2558 be64_to_cpu(*(__be64 *)(ch->i_port_id + 8)));
2559
2560 pr_debug("registering session %s\n", ch->sess_name);
2561
2562 nacl = srpt_lookup_acl(sport, ch->i_port_id);
2563 if (!nacl) {
2564 printk(KERN_INFO "Rejected login because no ACL has been"
2565 " configured yet for initiator %s.\n", ch->sess_name);
2566 rej->reason = __constant_cpu_to_be32(
2567 SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
2568 goto destroy_ib;
2569 }
2570
2571 ch->sess = transport_init_session();
2572 if (IS_ERR(ch->sess)) {
2573 rej->reason = __constant_cpu_to_be32(
2574 SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2575 pr_debug("Failed to create session\n");
2576 goto deregister_session;
2577 }
2578 ch->sess->se_node_acl = &nacl->nacl;
2579 transport_register_session(&sport->port_tpg_1, &nacl->nacl, ch->sess, ch);
2580
2581 pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess,
2582 ch->sess_name, ch->cm_id);
2583
2584 /* create srp_login_response */
2585 rsp->opcode = SRP_LOGIN_RSP;
2586 rsp->tag = req->tag;
2587 rsp->max_it_iu_len = req->req_it_iu_len;
2588 rsp->max_ti_iu_len = req->req_it_iu_len;
2589 ch->max_ti_iu_len = it_iu_len;
2590 rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
2591 | SRP_BUF_FORMAT_INDIRECT);
2592 rsp->req_lim_delta = cpu_to_be32(ch->rq_size);
2593 atomic_set(&ch->req_lim, ch->rq_size);
2594 atomic_set(&ch->req_lim_delta, 0);
2595
2596 /* create cm reply */
2597 rep_param->qp_num = ch->qp->qp_num;
2598 rep_param->private_data = (void *)rsp;
2599 rep_param->private_data_len = sizeof *rsp;
2600 rep_param->rnr_retry_count = 7;
2601 rep_param->flow_control = 1;
2602 rep_param->failover_accepted = 0;
2603 rep_param->srq = 1;
2604 rep_param->responder_resources = 4;
2605 rep_param->initiator_depth = 4;
2606
2607 ret = ib_send_cm_rep(cm_id, rep_param);
2608 if (ret) {
2609 printk(KERN_ERR "sending SRP_LOGIN_REQ response failed"
2610 " (error code = %d)\n", ret);
2611 goto release_channel;
2612 }
2613
2614 spin_lock_irq(&sdev->spinlock);
2615 list_add_tail(&ch->list, &sdev->rch_list);
2616 spin_unlock_irq(&sdev->spinlock);
2617
2618 goto out;
2619
2620release_channel:
2621 srpt_set_ch_state(ch, CH_RELEASING);
2622 transport_deregister_session_configfs(ch->sess);
2623
2624deregister_session:
2625 transport_deregister_session(ch->sess);
2626 ch->sess = NULL;
2627
2628destroy_ib:
2629 srpt_destroy_ch_ib(ch);
2630
2631free_ring:
2632 srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
2633 ch->sport->sdev, ch->rq_size,
2634 ch->rsp_size, DMA_TO_DEVICE);
2635free_ch:
2636 kfree(ch);
2637
2638reject:
2639 rej->opcode = SRP_LOGIN_REJ;
2640 rej->tag = req->tag;
2641 rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT
2642 | SRP_BUF_FORMAT_INDIRECT);
2643
2644 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2645 (void *)rej, sizeof *rej);
2646
2647out:
2648 kfree(rep_param);
2649 kfree(rsp);
2650 kfree(rej);
2651
2652 return ret;
2653}
2654
2655static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2656{
2657 printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id);
2658 srpt_drain_channel(cm_id);
2659}
2660
2661/**
2662 * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event.
2663 *
2664 * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2665 * and that the recipient may begin transmitting (RTU = ready to use).
2666 */
2667static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2668{
2669 struct srpt_rdma_ch *ch;
2670 int ret;
2671
2672 ch = srpt_find_channel(cm_id->context, cm_id);
2673 BUG_ON(!ch);
2674
2675 if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) {
2676 struct srpt_recv_ioctx *ioctx, *ioctx_tmp;
2677
2678 ret = srpt_ch_qp_rts(ch, ch->qp);
2679
2680 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2681 wait_list) {
2682 list_del(&ioctx->wait_list);
2683 srpt_handle_new_iu(ch, ioctx, NULL);
2684 }
2685 if (ret)
2686 srpt_close_ch(ch);
2687 }
2688}
2689
2690static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2691{
2692 printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id);
2693 srpt_drain_channel(cm_id);
2694}
2695
2696static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2697{
2698 printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id);
2699 srpt_drain_channel(cm_id);
2700}
2701
2702/**
2703 * srpt_cm_dreq_recv() - Process reception of a DREQ message.
2704 */
2705static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2706{
2707 struct srpt_rdma_ch *ch;
2708 unsigned long flags;
2709 bool send_drep = false;
2710
2711 ch = srpt_find_channel(cm_id->context, cm_id);
2712 BUG_ON(!ch);
2713
2714 pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch));
2715
2716 spin_lock_irqsave(&ch->spinlock, flags);
2717 switch (ch->state) {
2718 case CH_CONNECTING:
2719 case CH_LIVE:
2720 send_drep = true;
2721 ch->state = CH_DISCONNECTING;
2722 break;
2723 case CH_DISCONNECTING:
2724 case CH_DRAINING:
2725 case CH_RELEASING:
2726 WARN(true, "unexpected channel state %d\n", ch->state);
2727 break;
2728 }
2729 spin_unlock_irqrestore(&ch->spinlock, flags);
2730
2731 if (send_drep) {
2732 if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0)
2733 printk(KERN_ERR "Sending IB DREP failed.\n");
2734 printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n",
2735 ch->sess_name);
2736 }
2737}
2738
2739/**
2740 * srpt_cm_drep_recv() - Process reception of a DREP message.
2741 */
2742static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2743{
2744 printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n",
2745 cm_id);
2746 srpt_drain_channel(cm_id);
2747}
2748
2749/**
2750 * srpt_cm_handler() - IB connection manager callback function.
2751 *
2752 * A non-zero return value will cause the caller destroy the CM ID.
2753 *
2754 * Note: srpt_cm_handler() must only return a non-zero value when transferring
2755 * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2756 * a non-zero value in any other case will trigger a race with the
2757 * ib_destroy_cm_id() call in srpt_release_channel().
2758 */
2759static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2760{
2761 int ret;
2762
2763 ret = 0;
2764 switch (event->event) {
2765 case IB_CM_REQ_RECEIVED:
2766 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2767 event->private_data);
2768 break;
2769 case IB_CM_REJ_RECEIVED:
2770 srpt_cm_rej_recv(cm_id);
2771 break;
2772 case IB_CM_RTU_RECEIVED:
2773 case IB_CM_USER_ESTABLISHED:
2774 srpt_cm_rtu_recv(cm_id);
2775 break;
2776 case IB_CM_DREQ_RECEIVED:
2777 srpt_cm_dreq_recv(cm_id);
2778 break;
2779 case IB_CM_DREP_RECEIVED:
2780 srpt_cm_drep_recv(cm_id);
2781 break;
2782 case IB_CM_TIMEWAIT_EXIT:
2783 srpt_cm_timewait_exit(cm_id);
2784 break;
2785 case IB_CM_REP_ERROR:
2786 srpt_cm_rep_error(cm_id);
2787 break;
2788 case IB_CM_DREQ_ERROR:
2789 printk(KERN_INFO "Received IB DREQ ERROR event.\n");
2790 break;
2791 case IB_CM_MRA_RECEIVED:
2792 printk(KERN_INFO "Received IB MRA event\n");
2793 break;
2794 default:
2795 printk(KERN_ERR "received unrecognized IB CM event %d\n",
2796 event->event);
2797 break;
2798 }
2799
2800 return ret;
2801}
2802
2803/**
2804 * srpt_perform_rdmas() - Perform IB RDMA.
2805 *
2806 * Returns zero upon success or a negative number upon failure.
2807 */
2808static int srpt_perform_rdmas(struct srpt_rdma_ch *ch,
2809 struct srpt_send_ioctx *ioctx)
2810{
2811 struct ib_send_wr wr;
2812 struct ib_send_wr *bad_wr;
2813 struct rdma_iu *riu;
2814 int i;
2815 int ret;
2816 int sq_wr_avail;
2817 enum dma_data_direction dir;
2818 const int n_rdma = ioctx->n_rdma;
2819
2820 dir = ioctx->cmd.data_direction;
2821 if (dir == DMA_TO_DEVICE) {
2822 /* write */
2823 ret = -ENOMEM;
2824 sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail);
2825 if (sq_wr_avail < 0) {
2826 printk(KERN_WARNING "IB send queue full (needed %d)\n",
2827 n_rdma);
2828 goto out;
2829 }
2830 }
2831
2832 ioctx->rdma_aborted = false;
2833 ret = 0;
2834 riu = ioctx->rdma_ius;
2835 memset(&wr, 0, sizeof wr);
2836
2837 for (i = 0; i < n_rdma; ++i, ++riu) {
2838 if (dir == DMA_FROM_DEVICE) {
2839 wr.opcode = IB_WR_RDMA_WRITE;
2840 wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2841 SRPT_RDMA_WRITE_LAST :
2842 SRPT_RDMA_MID,
2843 ioctx->ioctx.index);
2844 } else {
2845 wr.opcode = IB_WR_RDMA_READ;
2846 wr.wr_id = encode_wr_id(i == n_rdma - 1 ?
2847 SRPT_RDMA_READ_LAST :
2848 SRPT_RDMA_MID,
2849 ioctx->ioctx.index);
2850 }
2851 wr.next = NULL;
2852 wr.wr.rdma.remote_addr = riu->raddr;
2853 wr.wr.rdma.rkey = riu->rkey;
2854 wr.num_sge = riu->sge_cnt;
2855 wr.sg_list = riu->sge;
2856
2857 /* only get completion event for the last rdma write */
2858 if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE)
2859 wr.send_flags = IB_SEND_SIGNALED;
2860
2861 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2862 if (ret)
2863 break;
2864 }
2865
2866 if (ret)
2867 printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d",
2868 __func__, __LINE__, ret, i, n_rdma);
2869 if (ret && i > 0) {
2870 wr.num_sge = 0;
2871 wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index);
2872 wr.send_flags = IB_SEND_SIGNALED;
2873 while (ch->state == CH_LIVE &&
2874 ib_post_send(ch->qp, &wr, &bad_wr) != 0) {
2875 printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]",
2876 ioctx->ioctx.index);
2877 msleep(1000);
2878 }
2879 while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) {
2880 printk(KERN_INFO "Waiting until RDMA abort finished [%d]",
2881 ioctx->ioctx.index);
2882 msleep(1000);
2883 }
2884 }
2885out:
2886 if (unlikely(dir == DMA_TO_DEVICE && ret < 0))
2887 atomic_add(n_rdma, &ch->sq_wr_avail);
2888 return ret;
2889}
2890
2891/**
2892 * srpt_xfer_data() - Start data transfer from initiator to target.
2893 */
2894static int srpt_xfer_data(struct srpt_rdma_ch *ch,
2895 struct srpt_send_ioctx *ioctx)
2896{
2897 int ret;
2898
2899 ret = srpt_map_sg_to_ib_sge(ch, ioctx);
2900 if (ret) {
2901 printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2902 goto out;
2903 }
2904
2905 ret = srpt_perform_rdmas(ch, ioctx);
2906 if (ret) {
2907 if (ret == -EAGAIN || ret == -ENOMEM)
2908 printk(KERN_INFO "%s[%d] queue full -- ret=%d\n",
2909 __func__, __LINE__, ret);
2910 else
2911 printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n",
2912 __func__, __LINE__, ret);
2913 goto out_unmap;
2914 }
2915
2916out:
2917 return ret;
2918out_unmap:
2919 srpt_unmap_sg_to_ib_sge(ch, ioctx);
2920 goto out;
2921}
2922
2923static int srpt_write_pending_status(struct se_cmd *se_cmd)
2924{
2925 struct srpt_send_ioctx *ioctx;
2926
2927 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2928 return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA;
2929}
2930
2931/*
2932 * srpt_write_pending() - Start data transfer from initiator to target (write).
2933 */
2934static int srpt_write_pending(struct se_cmd *se_cmd)
2935{
2936 struct srpt_rdma_ch *ch;
2937 struct srpt_send_ioctx *ioctx;
2938 enum srpt_command_state new_state;
2939 enum rdma_ch_state ch_state;
2940 int ret;
2941
2942 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
2943
2944 new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
2945 WARN_ON(new_state == SRPT_STATE_DONE);
2946
2947 ch = ioctx->ch;
2948 BUG_ON(!ch);
2949
2950 ch_state = srpt_get_ch_state(ch);
2951 switch (ch_state) {
2952 case CH_CONNECTING:
2953 WARN(true, "unexpected channel state %d\n", ch_state);
2954 ret = -EINVAL;
2955 goto out;
2956 case CH_LIVE:
2957 break;
2958 case CH_DISCONNECTING:
2959 case CH_DRAINING:
2960 case CH_RELEASING:
2961 pr_debug("cmd with tag %lld: channel disconnecting\n",
2962 ioctx->tag);
2963 srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN);
2964 ret = -EINVAL;
2965 goto out;
2966 }
2967 ret = srpt_xfer_data(ch, ioctx);
2968
2969out:
2970 return ret;
2971}
2972
2973static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status)
2974{
2975 switch (tcm_mgmt_status) {
2976 case TMR_FUNCTION_COMPLETE:
2977 return SRP_TSK_MGMT_SUCCESS;
2978 case TMR_FUNCTION_REJECTED:
2979 return SRP_TSK_MGMT_FUNC_NOT_SUPP;
2980 }
2981 return SRP_TSK_MGMT_FAILED;
2982}
2983
2984/**
2985 * srpt_queue_response() - Transmits the response to a SCSI command.
2986 *
2987 * Callback function called by the TCM core. Must not block since it can be
2988 * invoked on the context of the IB completion handler.
2989 */
2990static int srpt_queue_response(struct se_cmd *cmd)
2991{
2992 struct srpt_rdma_ch *ch;
2993 struct srpt_send_ioctx *ioctx;
2994 enum srpt_command_state state;
2995 unsigned long flags;
2996 int ret;
2997 enum dma_data_direction dir;
2998 int resp_len;
2999 u8 srp_tm_status;
3000
3001 ret = 0;
3002
3003 ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
3004 ch = ioctx->ch;
3005 BUG_ON(!ch);
3006
3007 spin_lock_irqsave(&ioctx->spinlock, flags);
3008 state = ioctx->state;
3009 switch (state) {
3010 case SRPT_STATE_NEW:
3011 case SRPT_STATE_DATA_IN:
3012 ioctx->state = SRPT_STATE_CMD_RSP_SENT;
3013 break;
3014 case SRPT_STATE_MGMT:
3015 ioctx->state = SRPT_STATE_MGMT_RSP_SENT;
3016 break;
3017 default:
3018 WARN(true, "ch %p; cmd %d: unexpected command state %d\n",
3019 ch, ioctx->ioctx.index, ioctx->state);
3020 break;
3021 }
3022 spin_unlock_irqrestore(&ioctx->spinlock, flags);
3023
3024 if (unlikely(transport_check_aborted_status(&ioctx->cmd, false)
3025 || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) {
3026 atomic_inc(&ch->req_lim_delta);
3027 srpt_abort_cmd(ioctx);
3028 goto out;
3029 }
3030
3031 dir = ioctx->cmd.data_direction;
3032
3033 /* For read commands, transfer the data to the initiator. */
3034 if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length &&
3035 !ioctx->queue_status_only) {
3036 ret = srpt_xfer_data(ch, ioctx);
3037 if (ret) {
3038 printk(KERN_ERR "xfer_data failed for tag %llu\n",
3039 ioctx->tag);
3040 goto out;
3041 }
3042 }
3043
3044 if (state != SRPT_STATE_MGMT)
3045 resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag,
3046 cmd->scsi_status);
3047 else {
3048 srp_tm_status
3049 = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response);
3050 resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status,
3051 ioctx->tag);
3052 }
3053 ret = srpt_post_send(ch, ioctx, resp_len);
3054 if (ret) {
3055 printk(KERN_ERR "sending cmd response failed for tag %llu\n",
3056 ioctx->tag);
3057 srpt_unmap_sg_to_ib_sge(ch, ioctx);
3058 srpt_set_cmd_state(ioctx, SRPT_STATE_DONE);
3059 target_put_sess_cmd(ioctx->ch->sess, &ioctx->cmd);
3060 }
3061
3062out:
3063 return ret;
3064}
3065
3066static int srpt_queue_status(struct se_cmd *cmd)
3067{
3068 struct srpt_send_ioctx *ioctx;
3069
3070 ioctx = container_of(cmd, struct srpt_send_ioctx, cmd);
3071 BUG_ON(ioctx->sense_data != cmd->sense_buffer);
3072 if (cmd->se_cmd_flags &
3073 (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE))
3074 WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION);
3075 ioctx->queue_status_only = true;
3076 return srpt_queue_response(cmd);
3077}
3078
3079static void srpt_refresh_port_work(struct work_struct *work)
3080{
3081 struct srpt_port *sport = container_of(work, struct srpt_port, work);
3082
3083 srpt_refresh_port(sport);
3084}
3085
3086static int srpt_ch_list_empty(struct srpt_device *sdev)
3087{
3088 int res;
3089
3090 spin_lock_irq(&sdev->spinlock);
3091 res = list_empty(&sdev->rch_list);
3092 spin_unlock_irq(&sdev->spinlock);
3093
3094 return res;
3095}
3096
3097/**
3098 * srpt_release_sdev() - Free the channel resources associated with a target.
3099 */
3100static int srpt_release_sdev(struct srpt_device *sdev)
3101{
3102 struct srpt_rdma_ch *ch, *tmp_ch;
3103 int res;
3104
3105 WARN_ON_ONCE(irqs_disabled());
3106
3107 BUG_ON(!sdev);
3108
3109 spin_lock_irq(&sdev->spinlock);
3110 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
3111 __srpt_close_ch(ch);
3112 spin_unlock_irq(&sdev->spinlock);
3113
3114 res = wait_event_interruptible(sdev->ch_releaseQ,
3115 srpt_ch_list_empty(sdev));
3116 if (res)
3117 printk(KERN_ERR "%s: interrupted.\n", __func__);
3118
3119 return 0;
3120}
3121
3122static struct srpt_port *__srpt_lookup_port(const char *name)
3123{
3124 struct ib_device *dev;
3125 struct srpt_device *sdev;
3126 struct srpt_port *sport;
3127 int i;
3128
3129 list_for_each_entry(sdev, &srpt_dev_list, list) {
3130 dev = sdev->device;
3131 if (!dev)
3132 continue;
3133
3134 for (i = 0; i < dev->phys_port_cnt; i++) {
3135 sport = &sdev->port[i];
3136
3137 if (!strcmp(sport->port_guid, name))
3138 return sport;
3139 }
3140 }
3141
3142 return NULL;
3143}
3144
3145static struct srpt_port *srpt_lookup_port(const char *name)
3146{
3147 struct srpt_port *sport;
3148
3149 spin_lock(&srpt_dev_lock);
3150 sport = __srpt_lookup_port(name);
3151 spin_unlock(&srpt_dev_lock);
3152
3153 return sport;
3154}
3155
3156/**
3157 * srpt_add_one() - Infiniband device addition callback function.
3158 */
3159static void srpt_add_one(struct ib_device *device)
3160{
3161 struct srpt_device *sdev;
3162 struct srpt_port *sport;
3163 struct ib_srq_init_attr srq_attr;
3164 int i;
3165
3166 pr_debug("device = %p, device->dma_ops = %p\n", device,
3167 device->dma_ops);
3168
3169 sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
3170 if (!sdev)
3171 goto err;
3172
3173 sdev->device = device;
3174 INIT_LIST_HEAD(&sdev->rch_list);
3175 init_waitqueue_head(&sdev->ch_releaseQ);
3176 spin_lock_init(&sdev->spinlock);
3177
3178 if (ib_query_device(device, &sdev->dev_attr))
3179 goto free_dev;
3180
3181 sdev->pd = ib_alloc_pd(device);
3182 if (IS_ERR(sdev->pd))
3183 goto free_dev;
3184
3185 sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
3186 if (IS_ERR(sdev->mr))
3187 goto err_pd;
3188
3189 sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr);
3190
3191 srq_attr.event_handler = srpt_srq_event;
3192 srq_attr.srq_context = (void *)sdev;
3193 srq_attr.attr.max_wr = sdev->srq_size;
3194 srq_attr.attr.max_sge = 1;
3195 srq_attr.attr.srq_limit = 0;
3196 srq_attr.srq_type = IB_SRQT_BASIC;
3197
3198 sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
3199 if (IS_ERR(sdev->srq))
3200 goto err_mr;
3201
3202 pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
3203 __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr,
3204 device->name);
3205
3206 if (!srpt_service_guid)
3207 srpt_service_guid = be64_to_cpu(device->node_guid);
3208
3209 sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
3210 if (IS_ERR(sdev->cm_id))
3211 goto err_srq;
3212
3213 /* print out target login information */
3214 pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx,"
3215 "pkey=ffff,service_id=%016llx\n", srpt_service_guid,
3216 srpt_service_guid, srpt_service_guid);
3217
3218 /*
3219 * We do not have a consistent service_id (ie. also id_ext of target_id)
3220 * to identify this target. We currently use the guid of the first HCA
3221 * in the system as service_id; therefore, the target_id will change
3222 * if this HCA is gone bad and replaced by different HCA
3223 */
3224 if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
3225 goto err_cm;
3226
3227 INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
3228 srpt_event_handler);
3229 if (ib_register_event_handler(&sdev->event_handler))
3230 goto err_cm;
3231
3232 sdev->ioctx_ring = (struct srpt_recv_ioctx **)
3233 srpt_alloc_ioctx_ring(sdev, sdev->srq_size,
3234 sizeof(*sdev->ioctx_ring[0]),
3235 srp_max_req_size, DMA_FROM_DEVICE);
3236 if (!sdev->ioctx_ring)
3237 goto err_event;
3238
3239 for (i = 0; i < sdev->srq_size; ++i)
3240 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
3241
3242 WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
3243
3244 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3245 sport = &sdev->port[i - 1];
3246 sport->sdev = sdev;
3247 sport->port = i;
3248 sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
3249 sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE;
3250 sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE;
3251 INIT_WORK(&sport->work, srpt_refresh_port_work);
3252 INIT_LIST_HEAD(&sport->port_acl_list);
3253 spin_lock_init(&sport->port_acl_lock);
3254
3255 if (srpt_refresh_port(sport)) {
3256 printk(KERN_ERR "MAD registration failed for %s-%d.\n",
3257 srpt_sdev_name(sdev), i);
3258 goto err_ring;
3259 }
3260 snprintf(sport->port_guid, sizeof(sport->port_guid),
3261 "0x%016llx%016llx",
3262 be64_to_cpu(sport->gid.global.subnet_prefix),
3263 be64_to_cpu(sport->gid.global.interface_id));
3264 }
3265
3266 spin_lock(&srpt_dev_lock);
3267 list_add_tail(&sdev->list, &srpt_dev_list);
3268 spin_unlock(&srpt_dev_lock);
3269
3270out:
3271 ib_set_client_data(device, &srpt_client, sdev);
3272 pr_debug("added %s.\n", device->name);
3273 return;
3274
3275err_ring:
3276 srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
3277 sdev->srq_size, srp_max_req_size,
3278 DMA_FROM_DEVICE);
3279err_event:
3280 ib_unregister_event_handler(&sdev->event_handler);
3281err_cm:
3282 ib_destroy_cm_id(sdev->cm_id);
3283err_srq:
3284 ib_destroy_srq(sdev->srq);
3285err_mr:
3286 ib_dereg_mr(sdev->mr);
3287err_pd:
3288 ib_dealloc_pd(sdev->pd);
3289free_dev:
3290 kfree(sdev);
3291err:
3292 sdev = NULL;
3293 printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name);
3294 goto out;
3295}
3296
3297/**
3298 * srpt_remove_one() - InfiniBand device removal callback function.
3299 */
3300static void srpt_remove_one(struct ib_device *device)
3301{
3302 struct srpt_device *sdev;
3303 int i;
3304
3305 sdev = ib_get_client_data(device, &srpt_client);
3306 if (!sdev) {
3307 printk(KERN_INFO "%s(%s): nothing to do.\n", __func__,
3308 device->name);
3309 return;
3310 }
3311
3312 srpt_unregister_mad_agent(sdev);
3313
3314 ib_unregister_event_handler(&sdev->event_handler);
3315
3316 /* Cancel any work queued by the just unregistered IB event handler. */
3317 for (i = 0; i < sdev->device->phys_port_cnt; i++)
3318 cancel_work_sync(&sdev->port[i].work);
3319
3320 ib_destroy_cm_id(sdev->cm_id);
3321
3322 /*
3323 * Unregistering a target must happen after destroying sdev->cm_id
3324 * such that no new SRP_LOGIN_REQ information units can arrive while
3325 * destroying the target.
3326 */
3327 spin_lock(&srpt_dev_lock);
3328 list_del(&sdev->list);
3329 spin_unlock(&srpt_dev_lock);
3330 srpt_release_sdev(sdev);
3331
3332 ib_destroy_srq(sdev->srq);
3333 ib_dereg_mr(sdev->mr);
3334 ib_dealloc_pd(sdev->pd);
3335
3336 srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev,
3337 sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE);
3338 sdev->ioctx_ring = NULL;
3339 kfree(sdev);
3340}
3341
3342static struct ib_client srpt_client = {
3343 .name = DRV_NAME,
3344 .add = srpt_add_one,
3345 .remove = srpt_remove_one
3346};
3347
3348static int srpt_check_true(struct se_portal_group *se_tpg)
3349{
3350 return 1;
3351}
3352
3353static int srpt_check_false(struct se_portal_group *se_tpg)
3354{
3355 return 0;
3356}
3357
3358static char *srpt_get_fabric_name(void)
3359{
3360 return "srpt";
3361}
3362
3363static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg)
3364{
3365 return SCSI_TRANSPORTID_PROTOCOLID_SRP;
3366}
3367
3368static char *srpt_get_fabric_wwn(struct se_portal_group *tpg)
3369{
3370 struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
3371
3372 return sport->port_guid;
3373}
3374
3375static u16 srpt_get_tag(struct se_portal_group *tpg)
3376{
3377 return 1;
3378}
3379
3380static u32 srpt_get_default_depth(struct se_portal_group *se_tpg)
3381{
3382 return 1;
3383}
3384
3385static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg,
3386 struct se_node_acl *se_nacl,
3387 struct t10_pr_registration *pr_reg,
3388 int *format_code, unsigned char *buf)
3389{
3390 struct srpt_node_acl *nacl;
3391 struct spc_rdma_transport_id *tr_id;
3392
3393 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3394 tr_id = (void *)buf;
3395 tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP;
3396 memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id));
3397 return sizeof(*tr_id);
3398}
3399
3400static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg,
3401 struct se_node_acl *se_nacl,
3402 struct t10_pr_registration *pr_reg,
3403 int *format_code)
3404{
3405 *format_code = 0;
3406 return sizeof(struct spc_rdma_transport_id);
3407}
3408
3409static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
3410 const char *buf, u32 *out_tid_len,
3411 char **port_nexus_ptr)
3412{
3413 struct spc_rdma_transport_id *tr_id;
3414
3415 *port_nexus_ptr = NULL;
3416 *out_tid_len = sizeof(struct spc_rdma_transport_id);
3417 tr_id = (void *)buf;
3418 return (char *)tr_id->i_port_id;
3419}
3420
3421static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg)
3422{
3423 struct srpt_node_acl *nacl;
3424
3425 nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL);
3426 if (!nacl) {
3427 printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n");
3428 return NULL;
3429 }
3430
3431 return &nacl->nacl;
3432}
3433
3434static void srpt_release_fabric_acl(struct se_portal_group *se_tpg,
3435 struct se_node_acl *se_nacl)
3436{
3437 struct srpt_node_acl *nacl;
3438
3439 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3440 kfree(nacl);
3441}
3442
3443static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg)
3444{
3445 return 1;
3446}
3447
3448static void srpt_release_cmd(struct se_cmd *se_cmd)
3449{
3450 struct srpt_send_ioctx *ioctx = container_of(se_cmd,
3451 struct srpt_send_ioctx, cmd);
3452 struct srpt_rdma_ch *ch = ioctx->ch;
3453 unsigned long flags;
3454
3455 WARN_ON(ioctx->state != SRPT_STATE_DONE);
3456 WARN_ON(ioctx->mapped_sg_count != 0);
3457
3458 if (ioctx->n_rbuf > 1) {
3459 kfree(ioctx->rbufs);
3460 ioctx->rbufs = NULL;
3461 ioctx->n_rbuf = 0;
3462 }
3463
3464 spin_lock_irqsave(&ch->spinlock, flags);
3465 list_add(&ioctx->free_list, &ch->free_list);
3466 spin_unlock_irqrestore(&ch->spinlock, flags);
3467}
3468
3469/**
3470 * srpt_shutdown_session() - Whether or not a session may be shut down.
3471 */
3472static int srpt_shutdown_session(struct se_session *se_sess)
3473{
3474 return true;
3475}
3476
3477/**
3478 * srpt_close_session() - Forcibly close a session.
3479 *
3480 * Callback function invoked by the TCM core to clean up sessions associated
3481 * with a node ACL when the user invokes
3482 * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3483 */
3484static void srpt_close_session(struct se_session *se_sess)
3485{
3486 DECLARE_COMPLETION_ONSTACK(release_done);
3487 struct srpt_rdma_ch *ch;
3488 struct srpt_device *sdev;
3489 int res;
3490
3491 ch = se_sess->fabric_sess_ptr;
3492 WARN_ON(ch->sess != se_sess);
3493
3494 pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch));
3495
3496 sdev = ch->sport->sdev;
3497 spin_lock_irq(&sdev->spinlock);
3498 BUG_ON(ch->release_done);
3499 ch->release_done = &release_done;
3500 __srpt_close_ch(ch);
3501 spin_unlock_irq(&sdev->spinlock);
3502
3503 res = wait_for_completion_timeout(&release_done, 60 * HZ);
3504 WARN_ON(res <= 0);
3505}
3506
3507/**
3508 * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB).
3509 *
3510 * A quote from RFC 4455 (SCSI-MIB) about this MIB object:
3511 * This object represents an arbitrary integer used to uniquely identify a
3512 * particular attached remote initiator port to a particular SCSI target port
3513 * within a particular SCSI target device within a particular SCSI instance.
3514 */
3515static u32 srpt_sess_get_index(struct se_session *se_sess)
3516{
3517 return 0;
3518}
3519
3520static void srpt_set_default_node_attrs(struct se_node_acl *nacl)
3521{
3522}
3523
3524static u32 srpt_get_task_tag(struct se_cmd *se_cmd)
3525{
3526 struct srpt_send_ioctx *ioctx;
3527
3528 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
3529 return ioctx->tag;
3530}
3531
3532/* Note: only used from inside debug printk's by the TCM core. */
3533static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd)
3534{
3535 struct srpt_send_ioctx *ioctx;
3536
3537 ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd);
3538 return srpt_get_cmd_state(ioctx);
3539}
3540
3541/**
3542 * srpt_parse_i_port_id() - Parse an initiator port ID.
3543 * @name: ASCII representation of a 128-bit initiator port ID.
3544 * @i_port_id: Binary 128-bit port ID.
3545 */
3546static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name)
3547{
3548 const char *p;
3549 unsigned len, count, leading_zero_bytes;
3550 int ret, rc;
3551
3552 p = name;
3553 if (strnicmp(p, "0x", 2) == 0)
3554 p += 2;
3555 ret = -EINVAL;
3556 len = strlen(p);
3557 if (len % 2)
3558 goto out;
3559 count = min(len / 2, 16U);
3560 leading_zero_bytes = 16 - count;
3561 memset(i_port_id, 0, leading_zero_bytes);
3562 rc = hex2bin(i_port_id + leading_zero_bytes, p, count);
3563 if (rc < 0)
3564 pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc);
3565 ret = 0;
3566out:
3567 return ret;
3568}
3569
3570/*
3571 * configfs callback function invoked for
3572 * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3573 */
3574static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg,
3575 struct config_group *group,
3576 const char *name)
3577{
3578 struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1);
3579 struct se_node_acl *se_nacl, *se_nacl_new;
3580 struct srpt_node_acl *nacl;
3581 int ret = 0;
3582 u32 nexus_depth = 1;
3583 u8 i_port_id[16];
3584
3585 if (srpt_parse_i_port_id(i_port_id, name) < 0) {
3586 printk(KERN_ERR "invalid initiator port ID %s\n", name);
3587 ret = -EINVAL;
3588 goto err;
3589 }
3590
3591 se_nacl_new = srpt_alloc_fabric_acl(tpg);
3592 if (!se_nacl_new) {
3593 ret = -ENOMEM;
3594 goto err;
3595 }
3596 /*
3597 * nacl_new may be released by core_tpg_add_initiator_node_acl()
3598 * when converting a node ACL from demo mode to explict
3599 */
3600 se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name,
3601 nexus_depth);
3602 if (IS_ERR(se_nacl)) {
3603 ret = PTR_ERR(se_nacl);
3604 goto err;
3605 }
3606 /* Locate our struct srpt_node_acl and set sdev and i_port_id. */
3607 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3608 memcpy(&nacl->i_port_id[0], &i_port_id[0], 16);
3609 nacl->sport = sport;
3610
3611 spin_lock_irq(&sport->port_acl_lock);
3612 list_add_tail(&nacl->list, &sport->port_acl_list);
3613 spin_unlock_irq(&sport->port_acl_lock);
3614
3615 return se_nacl;
3616err:
3617 return ERR_PTR(ret);
3618}
3619
3620/*
3621 * configfs callback function invoked for
3622 * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id
3623 */
3624static void srpt_drop_nodeacl(struct se_node_acl *se_nacl)
3625{
3626 struct srpt_node_acl *nacl;
3627 struct srpt_device *sdev;
3628 struct srpt_port *sport;
3629
3630 nacl = container_of(se_nacl, struct srpt_node_acl, nacl);
3631 sport = nacl->sport;
3632 sdev = sport->sdev;
3633 spin_lock_irq(&sport->port_acl_lock);
3634 list_del(&nacl->list);
3635 spin_unlock_irq(&sport->port_acl_lock);
3636 core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1);
3637 srpt_release_fabric_acl(NULL, se_nacl);
3638}
3639
3640static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size(
3641 struct se_portal_group *se_tpg,
3642 char *page)
3643{
3644 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3645
3646 return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size);
3647}
3648
3649static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size(
3650 struct se_portal_group *se_tpg,
3651 const char *page,
3652 size_t count)
3653{
3654 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3655 unsigned long val;
3656 int ret;
3657
3658 ret = strict_strtoul(page, 0, &val);
3659 if (ret < 0) {
3660 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3661 return -EINVAL;
3662 }
3663 if (val > MAX_SRPT_RDMA_SIZE) {
3664 pr_err("val: %lu exceeds MAX_SRPT_RDMA_SIZE: %d\n", val,
3665 MAX_SRPT_RDMA_SIZE);
3666 return -EINVAL;
3667 }
3668 if (val < DEFAULT_MAX_RDMA_SIZE) {
3669 pr_err("val: %lu smaller than DEFAULT_MAX_RDMA_SIZE: %d\n",
3670 val, DEFAULT_MAX_RDMA_SIZE);
3671 return -EINVAL;
3672 }
3673 sport->port_attrib.srp_max_rdma_size = val;
3674
3675 return count;
3676}
3677
3678TF_TPG_ATTRIB_ATTR(srpt, srp_max_rdma_size, S_IRUGO | S_IWUSR);
3679
3680static ssize_t srpt_tpg_attrib_show_srp_max_rsp_size(
3681 struct se_portal_group *se_tpg,
3682 char *page)
3683{
3684 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3685
3686 return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size);
3687}
3688
3689static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size(
3690 struct se_portal_group *se_tpg,
3691 const char *page,
3692 size_t count)
3693{
3694 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3695 unsigned long val;
3696 int ret;
3697
3698 ret = strict_strtoul(page, 0, &val);
3699 if (ret < 0) {
3700 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3701 return -EINVAL;
3702 }
3703 if (val > MAX_SRPT_RSP_SIZE) {
3704 pr_err("val: %lu exceeds MAX_SRPT_RSP_SIZE: %d\n", val,
3705 MAX_SRPT_RSP_SIZE);
3706 return -EINVAL;
3707 }
3708 if (val < MIN_MAX_RSP_SIZE) {
3709 pr_err("val: %lu smaller than MIN_MAX_RSP_SIZE: %d\n", val,
3710 MIN_MAX_RSP_SIZE);
3711 return -EINVAL;
3712 }
3713 sport->port_attrib.srp_max_rsp_size = val;
3714
3715 return count;
3716}
3717
3718TF_TPG_ATTRIB_ATTR(srpt, srp_max_rsp_size, S_IRUGO | S_IWUSR);
3719
3720static ssize_t srpt_tpg_attrib_show_srp_sq_size(
3721 struct se_portal_group *se_tpg,
3722 char *page)
3723{
3724 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3725
3726 return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size);
3727}
3728
3729static ssize_t srpt_tpg_attrib_store_srp_sq_size(
3730 struct se_portal_group *se_tpg,
3731 const char *page,
3732 size_t count)
3733{
3734 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3735 unsigned long val;
3736 int ret;
3737
3738 ret = strict_strtoul(page, 0, &val);
3739 if (ret < 0) {
3740 pr_err("strict_strtoul() failed with ret: %d\n", ret);
3741 return -EINVAL;
3742 }
3743 if (val > MAX_SRPT_SRQ_SIZE) {
3744 pr_err("val: %lu exceeds MAX_SRPT_SRQ_SIZE: %d\n", val,
3745 MAX_SRPT_SRQ_SIZE);
3746 return -EINVAL;
3747 }
3748 if (val < MIN_SRPT_SRQ_SIZE) {
3749 pr_err("val: %lu smaller than MIN_SRPT_SRQ_SIZE: %d\n", val,
3750 MIN_SRPT_SRQ_SIZE);
3751 return -EINVAL;
3752 }
3753 sport->port_attrib.srp_sq_size = val;
3754
3755 return count;
3756}
3757
3758TF_TPG_ATTRIB_ATTR(srpt, srp_sq_size, S_IRUGO | S_IWUSR);
3759
3760static struct configfs_attribute *srpt_tpg_attrib_attrs[] = {
3761 &srpt_tpg_attrib_srp_max_rdma_size.attr,
3762 &srpt_tpg_attrib_srp_max_rsp_size.attr,
3763 &srpt_tpg_attrib_srp_sq_size.attr,
3764 NULL,
3765};
3766
3767static ssize_t srpt_tpg_show_enable(
3768 struct se_portal_group *se_tpg,
3769 char *page)
3770{
3771 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3772
3773 return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0);
3774}
3775
3776static ssize_t srpt_tpg_store_enable(
3777 struct se_portal_group *se_tpg,
3778 const char *page,
3779 size_t count)
3780{
3781 struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1);
3782 unsigned long tmp;
3783 int ret;
3784
3785 ret = strict_strtoul(page, 0, &tmp);
3786 if (ret < 0) {
3787 printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n");
3788 return -EINVAL;
3789 }
3790
3791 if ((tmp != 0) && (tmp != 1)) {
3792 printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp);
3793 return -EINVAL;
3794 }
3795 if (tmp == 1)
3796 sport->enabled = true;
3797 else
3798 sport->enabled = false;
3799
3800 return count;
3801}
3802
3803TF_TPG_BASE_ATTR(srpt, enable, S_IRUGO | S_IWUSR);
3804
3805static struct configfs_attribute *srpt_tpg_attrs[] = {
3806 &srpt_tpg_enable.attr,
3807 NULL,
3808};
3809
3810/**
3811 * configfs callback invoked for
3812 * mkdir /sys/kernel/config/target/$driver/$port/$tpg
3813 */
3814static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn,
3815 struct config_group *group,
3816 const char *name)
3817{
3818 struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
3819 int res;
3820
3821 /* Initialize sport->port_wwn and sport->port_tpg_1 */
3822 res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn,
3823 &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL);
3824 if (res)
3825 return ERR_PTR(res);
3826
3827 return &sport->port_tpg_1;
3828}
3829
3830/**
3831 * configfs callback invoked for
3832 * rmdir /sys/kernel/config/target/$driver/$port/$tpg
3833 */
3834static void srpt_drop_tpg(struct se_portal_group *tpg)
3835{
3836 struct srpt_port *sport = container_of(tpg,
3837 struct srpt_port, port_tpg_1);
3838
3839 sport->enabled = false;
3840 core_tpg_deregister(&sport->port_tpg_1);
3841}
3842
3843/**
3844 * configfs callback invoked for
3845 * mkdir /sys/kernel/config/target/$driver/$port
3846 */
3847static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf,
3848 struct config_group *group,
3849 const char *name)
3850{
3851 struct srpt_port *sport;
3852 int ret;
3853
3854 sport = srpt_lookup_port(name);
3855 pr_debug("make_tport(%s)\n", name);
3856 ret = -EINVAL;
3857 if (!sport)
3858 goto err;
3859
3860 return &sport->port_wwn;
3861
3862err:
3863 return ERR_PTR(ret);
3864}
3865
3866/**
3867 * configfs callback invoked for
3868 * rmdir /sys/kernel/config/target/$driver/$port
3869 */
3870static void srpt_drop_tport(struct se_wwn *wwn)
3871{
3872 struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn);
3873
3874 pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item));
3875}
3876
3877static ssize_t srpt_wwn_show_attr_version(struct target_fabric_configfs *tf,
3878 char *buf)
3879{
3880 return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION);
3881}
3882
3883TF_WWN_ATTR_RO(srpt, version);
3884
3885static struct configfs_attribute *srpt_wwn_attrs[] = {
3886 &srpt_wwn_version.attr,
3887 NULL,
3888};
3889
3890static struct target_core_fabric_ops srpt_template = {
3891 .get_fabric_name = srpt_get_fabric_name,
3892 .get_fabric_proto_ident = srpt_get_fabric_proto_ident,
3893 .tpg_get_wwn = srpt_get_fabric_wwn,
3894 .tpg_get_tag = srpt_get_tag,
3895 .tpg_get_default_depth = srpt_get_default_depth,
3896 .tpg_get_pr_transport_id = srpt_get_pr_transport_id,
3897 .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len,
3898 .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id,
3899 .tpg_check_demo_mode = srpt_check_false,
3900 .tpg_check_demo_mode_cache = srpt_check_true,
3901 .tpg_check_demo_mode_write_protect = srpt_check_true,
3902 .tpg_check_prod_mode_write_protect = srpt_check_false,
3903 .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl,
3904 .tpg_release_fabric_acl = srpt_release_fabric_acl,
3905 .tpg_get_inst_index = srpt_tpg_get_inst_index,
3906 .release_cmd = srpt_release_cmd,
3907 .check_stop_free = srpt_check_stop_free,
3908 .shutdown_session = srpt_shutdown_session,
3909 .close_session = srpt_close_session,
3910 .sess_get_index = srpt_sess_get_index,
3911 .sess_get_initiator_sid = NULL,
3912 .write_pending = srpt_write_pending,
3913 .write_pending_status = srpt_write_pending_status,
3914 .set_default_node_attributes = srpt_set_default_node_attrs,
3915 .get_task_tag = srpt_get_task_tag,
3916 .get_cmd_state = srpt_get_tcm_cmd_state,
3917 .queue_data_in = srpt_queue_response,
3918 .queue_status = srpt_queue_status,
3919 .queue_tm_rsp = srpt_queue_response,
3920 /*
3921 * Setup function pointers for generic logic in
3922 * target_core_fabric_configfs.c
3923 */
3924 .fabric_make_wwn = srpt_make_tport,
3925 .fabric_drop_wwn = srpt_drop_tport,
3926 .fabric_make_tpg = srpt_make_tpg,
3927 .fabric_drop_tpg = srpt_drop_tpg,
3928 .fabric_post_link = NULL,
3929 .fabric_pre_unlink = NULL,
3930 .fabric_make_np = NULL,
3931 .fabric_drop_np = NULL,
3932 .fabric_make_nodeacl = srpt_make_nodeacl,
3933 .fabric_drop_nodeacl = srpt_drop_nodeacl,
3934};
3935
3936/**
3937 * srpt_init_module() - Kernel module initialization.
3938 *
3939 * Note: Since ib_register_client() registers callback functions, and since at
3940 * least one of these callback functions (srpt_add_one()) calls target core
3941 * functions, this driver must be registered with the target core before
3942 * ib_register_client() is called.
3943 */
3944static int __init srpt_init_module(void)
3945{
3946 int ret;
3947
3948 ret = -EINVAL;
3949 if (srp_max_req_size < MIN_MAX_REQ_SIZE) {
3950 printk(KERN_ERR "invalid value %d for kernel module parameter"
3951 " srp_max_req_size -- must be at least %d.\n",
3952 srp_max_req_size, MIN_MAX_REQ_SIZE);
3953 goto out;
3954 }
3955
3956 if (srpt_srq_size < MIN_SRPT_SRQ_SIZE
3957 || srpt_srq_size > MAX_SRPT_SRQ_SIZE) {
3958 printk(KERN_ERR "invalid value %d for kernel module parameter"
3959 " srpt_srq_size -- must be in the range [%d..%d].\n",
3960 srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE);
3961 goto out;
3962 }
3963
3964 srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt");
3965 if (IS_ERR(srpt_target)) {
3966 printk(KERN_ERR "couldn't register\n");
3967 ret = PTR_ERR(srpt_target);
3968 goto out;
3969 }
3970
3971 srpt_target->tf_ops = srpt_template;
3972
3973 /*
3974 * Set up default attribute lists.
3975 */
3976 srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs;
3977 srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs;
3978 srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs;
3979 srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL;
3980 srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL;
3981 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL;
3982 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
3983 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
3984 srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL;
3985
3986 ret = target_fabric_configfs_register(srpt_target);
3987 if (ret < 0) {
3988 printk(KERN_ERR "couldn't register\n");
3989 goto out_free_target;
3990 }
3991
3992 ret = ib_register_client(&srpt_client);
3993 if (ret) {
3994 printk(KERN_ERR "couldn't register IB client\n");
3995 goto out_unregister_target;
3996 }
3997
3998 return 0;
3999
4000out_unregister_target:
4001 target_fabric_configfs_deregister(srpt_target);
4002 srpt_target = NULL;
4003out_free_target:
4004 if (srpt_target)
4005 target_fabric_configfs_free(srpt_target);
4006out:
4007 return ret;
4008}
4009
4010static void __exit srpt_cleanup_module(void)
4011{
4012 ib_unregister_client(&srpt_client);
4013 target_fabric_configfs_deregister(srpt_target);
4014 srpt_target = NULL;
4015}
4016
4017module_init(srpt_init_module);
4018module_exit(srpt_cleanup_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
deleted file mode 100644
index 4caf55cda7b..00000000000
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ /dev/null
@@ -1,442 +0,0 @@
1/*
2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 * Copyright (C) 2009 - 2010 Bart Van Assche <bvanassche@acm.org>.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 *
33 */
34
35#ifndef IB_SRPT_H
36#define IB_SRPT_H
37
38#include <linux/types.h>
39#include <linux/list.h>
40#include <linux/wait.h>
41
42#include <rdma/ib_verbs.h>
43#include <rdma/ib_sa.h>
44#include <rdma/ib_cm.h>
45
46#include <scsi/srp.h>
47
48#include "ib_dm_mad.h"
49
50/*
51 * The prefix the ServiceName field must start with in the device management
52 * ServiceEntries attribute pair. See also the SRP specification.
53 */
54#define SRP_SERVICE_NAME_PREFIX "SRP.T10:"
55
56enum {
57 /*
58 * SRP IOControllerProfile attributes for SRP target ports that have
59 * not been defined in <scsi/srp.h>. Source: section B.7, table B.7
60 * in the SRP specification.
61 */
62 SRP_PROTOCOL = 0x0108,
63 SRP_PROTOCOL_VERSION = 0x0001,
64 SRP_IO_SUBCLASS = 0x609e,
65 SRP_SEND_TO_IOC = 0x01,
66 SRP_SEND_FROM_IOC = 0x02,
67 SRP_RDMA_READ_FROM_IOC = 0x08,
68 SRP_RDMA_WRITE_FROM_IOC = 0x20,
69
70 /*
71 * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP
72 * specification.
73 */
74 SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */
75 SRP_LOSOLNT = 0x10, /* logout solicited notification */
76 SRP_CRSOLNT = 0x20, /* credit request solicited notification */
77 SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */
78
79 /*
80 * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables
81 * 18 and 20 in the SRP specification.
82 */
83 SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */
84 SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */
85
86 /*
87 * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables
88 * 16 and 22 in the SRP specification.
89 */
90 SRP_SOLNT = 0x01, /* SOLNT = solicited notification */
91
92 /* See also table 24 in the SRP specification. */
93 SRP_TSK_MGMT_SUCCESS = 0x00,
94 SRP_TSK_MGMT_FUNC_NOT_SUPP = 0x04,
95 SRP_TSK_MGMT_FAILED = 0x05,
96
97 /* See also table 21 in the SRP specification. */
98 SRP_CMD_SIMPLE_Q = 0x0,
99 SRP_CMD_HEAD_OF_Q = 0x1,
100 SRP_CMD_ORDERED_Q = 0x2,
101 SRP_CMD_ACA = 0x4,
102
103 SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0,
104 SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1,
105 SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
106
107 SRPT_DEF_SG_TABLESIZE = 128,
108 SRPT_DEF_SG_PER_WQE = 16,
109
110 MIN_SRPT_SQ_SIZE = 16,
111 DEF_SRPT_SQ_SIZE = 4096,
112 SRPT_RQ_SIZE = 128,
113 MIN_SRPT_SRQ_SIZE = 4,
114 DEFAULT_SRPT_SRQ_SIZE = 4095,
115 MAX_SRPT_SRQ_SIZE = 65535,
116 MAX_SRPT_RDMA_SIZE = 1U << 24,
117 MAX_SRPT_RSP_SIZE = 1024,
118
119 MIN_MAX_REQ_SIZE = 996,
120 DEFAULT_MAX_REQ_SIZE
121 = sizeof(struct srp_cmd)/*48*/
122 + sizeof(struct srp_indirect_buf)/*20*/
123 + 128 * sizeof(struct srp_direct_buf)/*16*/,
124
125 MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4,
126 DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */
127
128 DEFAULT_MAX_RDMA_SIZE = 65536,
129};
130
131enum srpt_opcode {
132 SRPT_RECV,
133 SRPT_SEND,
134 SRPT_RDMA_MID,
135 SRPT_RDMA_ABORT,
136 SRPT_RDMA_READ_LAST,
137 SRPT_RDMA_WRITE_LAST,
138};
139
140static inline u64 encode_wr_id(u8 opcode, u32 idx)
141{
142 return ((u64)opcode << 32) | idx;
143}
144static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id)
145{
146 return wr_id >> 32;
147}
148static inline u32 idx_from_wr_id(u64 wr_id)
149{
150 return (u32)wr_id;
151}
152
153struct rdma_iu {
154 u64 raddr;
155 u32 rkey;
156 struct ib_sge *sge;
157 u32 sge_cnt;
158 int mem_id;
159};
160
161/**
162 * enum srpt_command_state - SCSI command state managed by SRPT.
163 * @SRPT_STATE_NEW: New command arrived and is being processed.
164 * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting
165 * for data arrival.
166 * @SRPT_STATE_DATA_IN: Data for the write or bidir command arrived and is
167 * being processed.
168 * @SRPT_STATE_CMD_RSP_SENT: SRP_RSP for SRP_CMD has been sent.
169 * @SRPT_STATE_MGMT: Processing a SCSI task management command.
170 * @SRPT_STATE_MGMT_RSP_SENT: SRP_RSP for SRP_TSK_MGMT has been sent.
171 * @SRPT_STATE_DONE: Command processing finished successfully, command
172 * processing has been aborted or command processing
173 * failed.
174 */
175enum srpt_command_state {
176 SRPT_STATE_NEW = 0,
177 SRPT_STATE_NEED_DATA = 1,
178 SRPT_STATE_DATA_IN = 2,
179 SRPT_STATE_CMD_RSP_SENT = 3,
180 SRPT_STATE_MGMT = 4,
181 SRPT_STATE_MGMT_RSP_SENT = 5,
182 SRPT_STATE_DONE = 6,
183};
184
185/**
186 * struct srpt_ioctx - Shared SRPT I/O context information.
187 * @buf: Pointer to the buffer.
188 * @dma: DMA address of the buffer.
189 * @index: Index of the I/O context in its ioctx_ring array.
190 */
191struct srpt_ioctx {
192 void *buf;
193 dma_addr_t dma;
194 uint32_t index;
195};
196
197/**
198 * struct srpt_recv_ioctx - SRPT receive I/O context.
199 * @ioctx: See above.
200 * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list.
201 */
202struct srpt_recv_ioctx {
203 struct srpt_ioctx ioctx;
204 struct list_head wait_list;
205};
206
207/**
208 * struct srpt_send_ioctx - SRPT send I/O context.
209 * @ioctx: See above.
210 * @ch: Channel pointer.
211 * @free_list: Node in srpt_rdma_ch.free_list.
212 * @n_rbuf: Number of data buffers in the received SRP command.
213 * @rbufs: Pointer to SRP data buffer array.
214 * @single_rbuf: SRP data buffer if the command has only a single buffer.
215 * @sg: Pointer to sg-list associated with this I/O context.
216 * @sg_cnt: SG-list size.
217 * @mapped_sg_count: ib_dma_map_sg() return value.
218 * @n_rdma_ius: Number of elements in the rdma_ius array.
219 * @rdma_ius: Array with information about the RDMA mapping.
220 * @tag: Tag of the received SRP information unit.
221 * @spinlock: Protects 'state'.
222 * @state: I/O context state.
223 * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether
224 * the already initiated transfers have finished.
225 * @cmd: Target core command data structure.
226 * @sense_data: SCSI sense data.
227 */
228struct srpt_send_ioctx {
229 struct srpt_ioctx ioctx;
230 struct srpt_rdma_ch *ch;
231 struct rdma_iu *rdma_ius;
232 struct srp_direct_buf *rbufs;
233 struct srp_direct_buf single_rbuf;
234 struct scatterlist *sg;
235 struct list_head free_list;
236 spinlock_t spinlock;
237 enum srpt_command_state state;
238 bool rdma_aborted;
239 struct se_cmd cmd;
240 struct completion tx_done;
241 u64 tag;
242 int sg_cnt;
243 int mapped_sg_count;
244 u16 n_rdma_ius;
245 u8 n_rdma;
246 u8 n_rbuf;
247 bool queue_status_only;
248 u8 sense_data[SCSI_SENSE_BUFFERSIZE];
249};
250
251/**
252 * enum rdma_ch_state - SRP channel state.
253 * @CH_CONNECTING: QP is in RTR state; waiting for RTU.
254 * @CH_LIVE: QP is in RTS state.
255 * @CH_DISCONNECTING: DREQ has been received; waiting for DREP
256 * or DREQ has been send and waiting for DREP
257 * or .
258 * @CH_DRAINING: QP is in ERR state; waiting for last WQE event.
259 * @CH_RELEASING: Last WQE event has been received; releasing resources.
260 */
261enum rdma_ch_state {
262 CH_CONNECTING,
263 CH_LIVE,
264 CH_DISCONNECTING,
265 CH_DRAINING,
266 CH_RELEASING
267};
268
269/**
270 * struct srpt_rdma_ch - RDMA channel.
271 * @wait_queue: Allows the kernel thread to wait for more work.
272 * @thread: Kernel thread that processes the IB queues associated with
273 * the channel.
274 * @cm_id: IB CM ID associated with the channel.
275 * @qp: IB queue pair used for communicating over this channel.
276 * @cq: IB completion queue for this channel.
277 * @rq_size: IB receive queue size.
278 * @rsp_size IB response message size in bytes.
279 * @sq_wr_avail: number of work requests available in the send queue.
280 * @sport: pointer to the information of the HCA port used by this
281 * channel.
282 * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ.
283 * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ.
284 * @max_ti_iu_len: maximum target-to-initiator information unit length.
285 * @req_lim: request limit: maximum number of requests that may be sent
286 * by the initiator without having received a response.
287 * @req_lim_delta: Number of credits not yet sent back to the initiator.
288 * @spinlock: Protects free_list and state.
289 * @free_list: Head of list with free send I/O contexts.
290 * @state: channel state. See also enum rdma_ch_state.
291 * @ioctx_ring: Send ring.
292 * @wc: IB work completion array for srpt_process_completion().
293 * @list: Node for insertion in the srpt_device.rch_list list.
294 * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This
295 * list contains struct srpt_ioctx elements and is protected
296 * against concurrent modification by the cm_id spinlock.
297 * @sess: Session information associated with this SRP channel.
298 * @sess_name: Session name.
299 * @release_work: Allows scheduling of srpt_release_channel().
300 * @release_done: Enables waiting for srpt_release_channel() completion.
301 */
302struct srpt_rdma_ch {
303 wait_queue_head_t wait_queue;
304 struct task_struct *thread;
305 struct ib_cm_id *cm_id;
306 struct ib_qp *qp;
307 struct ib_cq *cq;
308 int rq_size;
309 u32 rsp_size;
310 atomic_t sq_wr_avail;
311 struct srpt_port *sport;
312 u8 i_port_id[16];
313 u8 t_port_id[16];
314 int max_ti_iu_len;
315 atomic_t req_lim;
316 atomic_t req_lim_delta;
317 spinlock_t spinlock;
318 struct list_head free_list;
319 enum rdma_ch_state state;
320 struct srpt_send_ioctx **ioctx_ring;
321 struct ib_wc wc[16];
322 struct list_head list;
323 struct list_head cmd_wait_list;
324 struct se_session *sess;
325 u8 sess_name[36];
326 struct work_struct release_work;
327 struct completion *release_done;
328};
329
330/**
331 * struct srpt_port_attib - Attributes for SRPT port
332 * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections.
333 * @srp_max_rsp_size: Maximum size of SRP response messages in bytes.
334 * @srp_sq_size: Shared receive queue (SRQ) size.
335 */
336struct srpt_port_attrib {
337 u32 srp_max_rdma_size;
338 u32 srp_max_rsp_size;
339 u32 srp_sq_size;
340};
341
342/**
343 * struct srpt_port - Information associated by SRPT with a single IB port.
344 * @sdev: backpointer to the HCA information.
345 * @mad_agent: per-port management datagram processing information.
346 * @enabled: Whether or not this target port is enabled.
347 * @port_guid: ASCII representation of Port GUID
348 * @port: one-based port number.
349 * @sm_lid: cached value of the port's sm_lid.
350 * @lid: cached value of the port's lid.
351 * @gid: cached value of the port's gid.
352 * @port_acl_lock spinlock for port_acl_list:
353 * @work: work structure for refreshing the aforementioned cached values.
354 * @port_tpg_1 Target portal group = 1 data.
355 * @port_wwn: Target core WWN data.
356 * @port_acl_list: Head of the list with all node ACLs for this port.
357 */
358struct srpt_port {
359 struct srpt_device *sdev;
360 struct ib_mad_agent *mad_agent;
361 bool enabled;
362 u8 port_guid[64];
363 u8 port;
364 u16 sm_lid;
365 u16 lid;
366 union ib_gid gid;
367 spinlock_t port_acl_lock;
368 struct work_struct work;
369 struct se_portal_group port_tpg_1;
370 struct se_wwn port_wwn;
371 struct list_head port_acl_list;
372 struct srpt_port_attrib port_attrib;
373};
374
375/**
376 * struct srpt_device - Information associated by SRPT with a single HCA.
377 * @device: Backpointer to the struct ib_device managed by the IB core.
378 * @pd: IB protection domain.
379 * @mr: L_Key (local key) with write access to all local memory.
380 * @srq: Per-HCA SRQ (shared receive queue).
381 * @cm_id: Connection identifier.
382 * @dev_attr: Attributes of the InfiniBand device as obtained during the
383 * ib_client.add() callback.
384 * @srq_size: SRQ size.
385 * @ioctx_ring: Per-HCA SRQ.
386 * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list.
387 * @ch_releaseQ: Enables waiting for removal from rch_list.
388 * @spinlock: Protects rch_list and tpg.
389 * @port: Information about the ports owned by this HCA.
390 * @event_handler: Per-HCA asynchronous IB event handler.
391 * @list: Node in srpt_dev_list.
392 */
393struct srpt_device {
394 struct ib_device *device;
395 struct ib_pd *pd;
396 struct ib_mr *mr;
397 struct ib_srq *srq;
398 struct ib_cm_id *cm_id;
399 struct ib_device_attr dev_attr;
400 int srq_size;
401 struct srpt_recv_ioctx **ioctx_ring;
402 struct list_head rch_list;
403 wait_queue_head_t ch_releaseQ;
404 spinlock_t spinlock;
405 struct srpt_port port[2];
406 struct ib_event_handler event_handler;
407 struct list_head list;
408};
409
410/**
411 * struct srpt_node_acl - Per-initiator ACL data (managed via configfs).
412 * @i_port_id: 128-bit SRP initiator port ID.
413 * @sport: port information.
414 * @nacl: Target core node ACL information.
415 * @list: Element of the per-HCA ACL list.
416 */
417struct srpt_node_acl {
418 u8 i_port_id[16];
419 struct srpt_port *sport;
420 struct se_node_acl nacl;
421 struct list_head list;
422};
423
424/*
425 * SRP-releated SCSI persistent reservation definitions.
426 *
427 * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction).
428 * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using
429 * SCSI over an RDMA interface).
430 */
431
432enum {
433 SCSI_TRANSPORTID_PROTOCOLID_SRP = 4,
434};
435
436struct spc_rdma_transport_id {
437 uint8_t protocol_identifier;
438 uint8_t reserved[7];
439 uint8_t i_port_id[16];
440};
441
442#endif /* IB_SRPT_H */